Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
0663b039
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0663b039
编写于
9月 20, 2022
作者:
Z
zhoujun
提交者:
GitHub
9月 20, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7648 from WenmuZhou/cpp_infer
add layout cpp infer
上级
0a59848d
dd9f48da
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
1045 addition
and
472 deletion
+1045
-472
deploy/cpp_infer/include/args.h
deploy/cpp_infer/include/args.h
+7
-1
deploy/cpp_infer/include/ocr_cls.h
deploy/cpp_infer/include/ocr_cls.h
+1
-15
deploy/cpp_infer/include/ocr_det.h
deploy/cpp_infer/include/ocr_det.h
+3
-17
deploy/cpp_infer/include/ocr_rec.h
deploy/cpp_infer/include/ocr_rec.h
+2
-17
deploy/cpp_infer/include/paddleocr.h
deploy/cpp_infer/include/paddleocr.h
+17
-30
deploy/cpp_infer/include/paddlestructure.h
deploy/cpp_infer/include/paddlestructure.h
+19
-29
deploy/cpp_infer/include/postprocess_op.h
deploy/cpp_infer/include/postprocess_op.h
+23
-15
deploy/cpp_infer/include/preprocess_op.h
deploy/cpp_infer/include/preprocess_op.h
+12
-15
deploy/cpp_infer/include/structure_layout.h
deploy/cpp_infer/include/structure_layout.h
+78
-0
deploy/cpp_infer/include/structure_table.h
deploy/cpp_infer/include/structure_table.h
+2
-17
deploy/cpp_infer/include/utility.h
deploy/cpp_infer/include/utility.h
+10
-2
deploy/cpp_infer/readme.md
deploy/cpp_infer/readme.md
+79
-4
deploy/cpp_infer/readme_ch.md
deploy/cpp_infer/readme_ch.md
+81
-5
deploy/cpp_infer/src/args.cpp
deploy/cpp_infer/src/args.cpp
+9
-1
deploy/cpp_infer/src/main.cpp
deploy/cpp_infer/src/main.cpp
+89
-54
deploy/cpp_infer/src/ocr_cls.cpp
deploy/cpp_infer/src/ocr_cls.cpp
+5
-5
deploy/cpp_infer/src/ocr_det.cpp
deploy/cpp_infer/src/ocr_det.cpp
+3
-4
deploy/cpp_infer/src/ocr_rec.cpp
deploy/cpp_infer/src/ocr_rec.cpp
+7
-9
deploy/cpp_infer/src/paddleocr.cpp
deploy/cpp_infer/src/paddleocr.cpp
+93
-114
deploy/cpp_infer/src/paddlestructure.cpp
deploy/cpp_infer/src/paddlestructure.cpp
+114
-84
deploy/cpp_infer/src/postprocess_op.cpp
deploy/cpp_infer/src/postprocess_op.cpp
+134
-2
deploy/cpp_infer/src/preprocess_op.cpp
deploy/cpp_infer/src/preprocess_op.cpp
+11
-21
deploy/cpp_infer/src/structure_layout.cpp
deploy/cpp_infer/src/structure_layout.cpp
+149
-0
deploy/cpp_infer/src/structure_table.cpp
deploy/cpp_infer/src/structure_table.cpp
+8
-3
deploy/cpp_infer/src/utility.cpp
deploy/cpp_infer/src/utility.cpp
+89
-8
未找到文件。
deploy/cpp_infer/include/args.h
浏览文件 @
0663b039
...
...
@@ -49,6 +49,11 @@ DECLARE_int32(rec_batch_num);
DECLARE_string
(
rec_char_dict_path
);
DECLARE_int32
(
rec_img_h
);
DECLARE_int32
(
rec_img_w
);
// layout model related
DECLARE_string
(
layout_model_dir
);
DECLARE_string
(
layout_dict_path
);
DECLARE_double
(
layout_score_threshold
);
DECLARE_double
(
layout_nms_threshold
);
// structure model related
DECLARE_string
(
table_model_dir
);
DECLARE_int32
(
table_max_len
);
...
...
@@ -59,4 +64,5 @@ DECLARE_bool(merge_no_span_structure);
DECLARE_bool
(
det
);
DECLARE_bool
(
rec
);
DECLARE_bool
(
cls
);
DECLARE_bool
(
table
);
\ No newline at end of file
DECLARE_bool
(
table
);
DECLARE_bool
(
layout
);
\ No newline at end of file
deploy/cpp_infer/include/ocr_cls.h
浏览文件 @
0663b039
...
...
@@ -14,26 +14,12 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h>
#include <include/utility.h>
using
namespace
paddle_infer
;
namespace
PaddleOCR
{
class
Classifier
{
...
...
@@ -66,7 +52,7 @@ public:
std
::
vector
<
float
>
&
cls_scores
,
std
::
vector
<
double
>
&
times
);
private:
std
::
shared_ptr
<
Predictor
>
predictor_
;
std
::
shared_ptr
<
paddle_infer
::
Predictor
>
predictor_
;
bool
use_gpu_
=
false
;
int
gpu_id_
=
0
;
...
...
deploy/cpp_infer/include/ocr_det.h
浏览文件 @
0663b039
...
...
@@ -14,26 +14,12 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h>
#include <include/preprocess_op.h>
using
namespace
paddle_infer
;
namespace
PaddleOCR
{
class
DBDetector
{
...
...
@@ -41,7 +27,7 @@ public:
explicit
DBDetector
(
const
std
::
string
&
model_dir
,
const
bool
&
use_gpu
,
const
int
&
gpu_id
,
const
int
&
gpu_mem
,
const
int
&
cpu_math_library_num_threads
,
const
bool
&
use_mkldnn
,
const
string
&
limit_type
,
const
bool
&
use_mkldnn
,
const
st
d
::
st
ring
&
limit_type
,
const
int
&
limit_side_len
,
const
double
&
det_db_thresh
,
const
double
&
det_db_box_thresh
,
const
double
&
det_db_unclip_ratio
,
...
...
@@ -77,7 +63,7 @@ public:
std
::
vector
<
double
>
&
times
);
private:
std
::
shared_ptr
<
Predictor
>
predictor_
;
std
::
shared_ptr
<
paddle_infer
::
Predictor
>
predictor_
;
bool
use_gpu_
=
false
;
int
gpu_id_
=
0
;
...
...
@@ -85,7 +71,7 @@ private:
int
cpu_math_library_num_threads_
=
4
;
bool
use_mkldnn_
=
false
;
string
limit_type_
=
"max"
;
st
d
::
st
ring
limit_type_
=
"max"
;
int
limit_side_len_
=
960
;
double
det_db_thresh_
=
0.3
;
...
...
deploy/cpp_infer/include/ocr_rec.h
浏览文件 @
0663b039
...
...
@@ -14,27 +14,12 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h>
#include <include/preprocess_op.h>
#include <include/utility.h>
using
namespace
paddle_infer
;
namespace
PaddleOCR
{
class
CRNNRecognizer
{
...
...
@@ -42,7 +27,7 @@ public:
explicit
CRNNRecognizer
(
const
std
::
string
&
model_dir
,
const
bool
&
use_gpu
,
const
int
&
gpu_id
,
const
int
&
gpu_mem
,
const
int
&
cpu_math_library_num_threads
,
const
bool
&
use_mkldnn
,
const
string
&
label_path
,
const
bool
&
use_mkldnn
,
const
st
d
::
st
ring
&
label_path
,
const
bool
&
use_tensorrt
,
const
std
::
string
&
precision
,
const
int
&
rec_batch_num
,
const
int
&
rec_img_h
,
...
...
@@ -75,7 +60,7 @@ public:
std
::
vector
<
float
>
&
rec_text_scores
,
std
::
vector
<
double
>
&
times
);
private:
std
::
shared_ptr
<
Predictor
>
predictor_
;
std
::
shared_ptr
<
paddle_infer
::
Predictor
>
predictor_
;
bool
use_gpu_
=
false
;
int
gpu_id_
=
0
;
...
...
deploy/cpp_infer/include/paddleocr.h
浏览文件 @
0663b039
...
...
@@ -14,28 +14,9 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h>
#include <include/preprocess_op.h>
#include <include/utility.h>
using
namespace
paddle_infer
;
namespace
PaddleOCR
{
...
...
@@ -43,21 +24,27 @@ class PPOCR {
public:
explicit
PPOCR
();
~
PPOCR
();
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
ocr
(
std
::
vector
<
cv
::
String
>
cv_all_img_names
,
bool
det
=
true
,
bool
rec
=
true
,
bool
cls
=
true
);
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
ocr
(
std
::
vector
<
cv
::
Mat
>
img_list
,
bool
det
=
true
,
bool
rec
=
true
,
bool
cls
=
true
);
std
::
vector
<
OCRPredictResult
>
ocr
(
cv
::
Mat
img
,
bool
det
=
true
,
bool
rec
=
true
,
bool
cls
=
true
);
void
reset_timer
();
void
benchmark_log
(
int
img_num
);
protected:
void
det
(
cv
::
Mat
img
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
,
std
::
vector
<
double
>
&
times
);
std
::
vector
<
double
>
time_info_det
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_rec
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_cls
=
{
0
,
0
,
0
};
void
det
(
cv
::
Mat
img
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
);
void
rec
(
std
::
vector
<
cv
::
Mat
>
img_list
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
,
std
::
vector
<
double
>
&
times
);
std
::
vector
<
OCRPredictResult
>
&
ocr_results
);
void
cls
(
std
::
vector
<
cv
::
Mat
>
img_list
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
,
std
::
vector
<
double
>
&
times
);
void
log
(
std
::
vector
<
double
>
&
det_times
,
std
::
vector
<
double
>
&
rec_times
,
std
::
vector
<
double
>
&
cls_times
,
int
img_num
);
std
::
vector
<
OCRPredictResult
>
&
ocr_results
);
private:
DBDetector
*
detector_
=
nullptr
;
...
...
deploy/cpp_infer/include/paddlestructure.h
浏览文件 @
0663b039
...
...
@@ -14,27 +14,9 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr.h>
#include <include/
preprocess_op
.h>
#include <include/
structure_layout
.h>
#include <include/structure_table.h>
#include <include/utility.h>
using
namespace
paddle_infer
;
namespace
PaddleOCR
{
...
...
@@ -42,23 +24,31 @@ class PaddleStructure : public PPOCR {
public:
explicit
PaddleStructure
();
~
PaddleStructure
();
std
::
vector
<
std
::
vector
<
StructurePredictResult
>>
structure
(
std
::
vector
<
cv
::
String
>
cv_all_img_names
,
bool
layout
=
false
,
bool
table
=
true
);
std
::
vector
<
StructurePredictResult
>
structure
(
cv
::
Mat
img
,
bool
layout
=
false
,
bool
table
=
true
,
bool
ocr
=
false
);
void
reset_timer
();
void
benchmark_log
(
int
img_num
);
private:
StructureTableRecognizer
*
recognizer_
=
nullptr
;
std
::
vector
<
double
>
time_info_table
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_layout
=
{
0
,
0
,
0
};
StructureTableRecognizer
*
table_model_
=
nullptr
;
StructureLayoutRecognizer
*
layout_model_
=
nullptr
;
void
layout
(
cv
::
Mat
img
,
std
::
vector
<
StructurePredictResult
>
&
structure_result
);
void
table
(
cv
::
Mat
img
,
StructurePredictResult
&
structure_result
);
void
table
(
cv
::
Mat
img
,
StructurePredictResult
&
structure_result
,
std
::
vector
<
double
>
&
time_info_table
,
std
::
vector
<
double
>
&
time_info_det
,
std
::
vector
<
double
>
&
time_info_rec
,
std
::
vector
<
double
>
&
time_info_cls
);
std
::
string
rebuild_table
(
std
::
vector
<
std
::
string
>
rec_html_tags
,
std
::
vector
<
std
::
vector
<
int
>>
rec_boxes
,
std
::
vector
<
OCRPredictResult
>
&
ocr_result
);
float
iou
(
std
::
vector
<
int
>
&
box1
,
std
::
vector
<
int
>
&
box2
);
float
dis
(
std
::
vector
<
int
>
&
box1
,
std
::
vector
<
int
>
&
box2
);
static
bool
comparison_dis
(
const
std
::
vector
<
float
>
&
dis1
,
...
...
deploy/cpp_infer/include/postprocess_op.h
浏览文件 @
0663b039
...
...
@@ -14,24 +14,9 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include "include/clipper.h"
#include "include/utility.h"
using
namespace
std
;
namespace
PaddleOCR
{
class
DBPostProcessor
{
...
...
@@ -106,4 +91,27 @@ private:
std
::
string
beg
=
"sos"
;
};
class
PicodetPostProcessor
{
public:
void
init
(
std
::
string
label_path
,
const
double
score_threshold
=
0.4
,
const
double
nms_threshold
=
0.5
,
const
std
::
vector
<
int
>
&
fpn_stride
=
{
8
,
16
,
32
,
64
});
void
Run
(
std
::
vector
<
StructurePredictResult
>
&
results
,
std
::
vector
<
std
::
vector
<
float
>>
outs
,
std
::
vector
<
int
>
ori_shape
,
std
::
vector
<
int
>
resize_shape
,
int
eg_max
);
std
::
vector
<
int
>
fpn_stride_
=
{
8
,
16
,
32
,
64
};
private:
StructurePredictResult
disPred2Bbox
(
std
::
vector
<
float
>
bbox_pred
,
int
label
,
float
score
,
int
x
,
int
y
,
int
stride
,
std
::
vector
<
int
>
im_shape
,
int
reg_max
);
void
nms
(
std
::
vector
<
StructurePredictResult
>
&
input_boxes
,
float
nms_threshold
);
std
::
vector
<
std
::
string
>
label_list_
;
double
score_threshold_
=
0.4
;
double
nms_threshold_
=
0.5
;
int
num_class_
=
5
;
};
}
// namespace PaddleOCR
deploy/cpp_infer/include/preprocess_op.h
浏览文件 @
0663b039
...
...
@@ -14,21 +14,12 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
using
namespace
std
;
using
namespace
paddle
;
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
namespace
PaddleOCR
{
...
...
@@ -51,9 +42,9 @@ public:
class
ResizeImgType0
{
public:
virtual
void
Run
(
const
cv
::
Mat
&
img
,
cv
::
Mat
&
resize_img
,
string
limit_type
,
int
limit_side_len
,
float
&
ratio_h
,
float
&
ratio_w
,
bool
use_tensorrt
);
virtual
void
Run
(
const
cv
::
Mat
&
img
,
cv
::
Mat
&
resize_img
,
std
::
string
limit_type
,
int
limit_side_len
,
float
&
ratio_h
,
float
&
ratio_w
,
bool
use_tensorrt
);
};
class
CrnnResizeImg
{
...
...
@@ -82,4 +73,10 @@ public:
const
int
max_len
=
488
);
};
class
Resize
{
public:
virtual
void
Run
(
const
cv
::
Mat
&
img
,
cv
::
Mat
&
resize_img
,
const
int
h
,
const
int
w
);
};
}
// namespace PaddleOCR
\ No newline at end of file
deploy/cpp_infer/include/structure_layout.h
0 → 100644
浏览文件 @
0663b039
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <include/postprocess_op.h>
#include <include/preprocess_op.h>
namespace
PaddleOCR
{
class
StructureLayoutRecognizer
{
public:
explicit
StructureLayoutRecognizer
(
const
std
::
string
&
model_dir
,
const
bool
&
use_gpu
,
const
int
&
gpu_id
,
const
int
&
gpu_mem
,
const
int
&
cpu_math_library_num_threads
,
const
bool
&
use_mkldnn
,
const
std
::
string
&
label_path
,
const
bool
&
use_tensorrt
,
const
std
::
string
&
precision
,
const
double
&
layout_score_threshold
,
const
double
&
layout_nms_threshold
)
{
this
->
use_gpu_
=
use_gpu
;
this
->
gpu_id_
=
gpu_id
;
this
->
gpu_mem_
=
gpu_mem
;
this
->
cpu_math_library_num_threads_
=
cpu_math_library_num_threads
;
this
->
use_mkldnn_
=
use_mkldnn
;
this
->
use_tensorrt_
=
use_tensorrt
;
this
->
precision_
=
precision
;
this
->
post_processor_
.
init
(
label_path
,
layout_score_threshold
,
layout_nms_threshold
);
LoadModel
(
model_dir
);
}
// Load Paddle inference model
void
LoadModel
(
const
std
::
string
&
model_dir
);
void
Run
(
cv
::
Mat
img
,
std
::
vector
<
StructurePredictResult
>
&
result
,
std
::
vector
<
double
>
&
times
);
private:
std
::
shared_ptr
<
paddle_infer
::
Predictor
>
predictor_
;
bool
use_gpu_
=
false
;
int
gpu_id_
=
0
;
int
gpu_mem_
=
4000
;
int
cpu_math_library_num_threads_
=
4
;
bool
use_mkldnn_
=
false
;
std
::
vector
<
float
>
mean_
=
{
0.485
f
,
0.456
f
,
0.406
f
};
std
::
vector
<
float
>
scale_
=
{
1
/
0.229
f
,
1
/
0.224
f
,
1
/
0.225
f
};
bool
is_scale_
=
true
;
bool
use_tensorrt_
=
false
;
std
::
string
precision_
=
"fp32"
;
// pre-process
Resize
resize_op_
;
Normalize
normalize_op_
;
Permute
permute_op_
;
// post-process
PicodetPostProcessor
post_processor_
;
};
}
// namespace PaddleOCR
\ No newline at end of file
deploy/cpp_infer/include/structure_table.h
浏览文件 @
0663b039
...
...
@@ -14,26 +14,11 @@
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h>
#include <include/preprocess_op.h>
#include <include/utility.h>
using
namespace
paddle_infer
;
namespace
PaddleOCR
{
...
...
@@ -42,7 +27,7 @@ public:
explicit
StructureTableRecognizer
(
const
std
::
string
&
model_dir
,
const
bool
&
use_gpu
,
const
int
&
gpu_id
,
const
int
&
gpu_mem
,
const
int
&
cpu_math_library_num_threads
,
const
bool
&
use_mkldnn
,
const
string
&
label_path
,
const
bool
&
use_mkldnn
,
const
st
d
::
st
ring
&
label_path
,
const
bool
&
use_tensorrt
,
const
std
::
string
&
precision
,
const
int
&
table_batch_num
,
const
int
&
table_max_len
,
const
bool
&
merge_no_span_structure
)
{
...
...
@@ -70,7 +55,7 @@ public:
std
::
vector
<
double
>
&
times
);
private:
std
::
shared_ptr
<
Predictor
>
predictor_
;
std
::
shared_ptr
<
paddle_infer
::
Predictor
>
predictor_
;
bool
use_gpu_
=
false
;
int
gpu_id_
=
0
;
...
...
deploy/cpp_infer/include/utility.h
浏览文件 @
0663b039
...
...
@@ -41,12 +41,13 @@ struct OCRPredictResult {
};
struct
StructurePredictResult
{
std
::
vector
<
in
t
>
box
;
std
::
vector
<
floa
t
>
box
;
std
::
vector
<
std
::
vector
<
int
>>
cell_box
;
std
::
string
type
;
std
::
vector
<
OCRPredictResult
>
text_res
;
std
::
string
html
;
float
html_score
=
-
1
;
float
confidence
;
};
class
Utility
{
...
...
@@ -82,13 +83,20 @@ public:
static
void
print_result
(
const
std
::
vector
<
OCRPredictResult
>
&
ocr_result
);
static
cv
::
Mat
crop_image
(
cv
::
Mat
&
img
,
std
::
vector
<
int
>
&
area
);
static
cv
::
Mat
crop_image
(
cv
::
Mat
&
img
,
const
std
::
vector
<
int
>
&
area
);
static
cv
::
Mat
crop_image
(
cv
::
Mat
&
img
,
const
std
::
vector
<
float
>
&
area
);
static
void
sorted_boxes
(
std
::
vector
<
OCRPredictResult
>
&
ocr_result
);
static
std
::
vector
<
int
>
xyxyxyxy2xyxy
(
std
::
vector
<
std
::
vector
<
int
>>
&
box
);
static
std
::
vector
<
int
>
xyxyxyxy2xyxy
(
std
::
vector
<
int
>
&
box
);
static
float
fast_exp
(
float
x
);
static
std
::
vector
<
float
>
activation_function_softmax
(
std
::
vector
<
float
>
&
src
);
static
float
iou
(
std
::
vector
<
int
>
&
box1
,
std
::
vector
<
int
>
&
box2
);
static
float
iou
(
std
::
vector
<
float
>
&
box1
,
std
::
vector
<
float
>
&
box2
);
private:
static
bool
comparison_box
(
const
OCRPredictResult
&
result1
,
const
OCRPredictResult
&
result2
)
{
...
...
deploy/cpp_infer/readme.md
浏览文件 @
0663b039
...
...
@@ -174,6 +174,9 @@ inference/
|-- table
| |--inference.pdiparams
| |--inference.pdmodel
|-- layout
| |--inference.pdiparams
| |--inference.pdmodel
```
...
...
@@ -278,8 +281,30 @@ Specifically,
--cls
=
true
\
```
##### 7. layout+table
```
shell
./build/ppocr
--det_model_dir
=
inference/det_db
\
--rec_model_dir
=
inference/rec_rcnn
\
--table_model_dir
=
inference/table
\
--image_dir
=
../../ppstructure/docs/table/table.jpg
\
--layout_model_dir
=
inference/layout
\
--type
=
structure
\
--table
=
true
\
--layout
=
true
```
##### 8. layout
```
shell
./build/ppocr
--layout_model_dir
=
inference/layout
\
--image_dir
=
../../ppstructure/docs/table/1.png
\
--type
=
structure
\
--table
=
false
\
--layout
=
true
\
--det
=
false
\
--rec
=
false
```
#####
7
. table
#####
9
. table
```
shell
./build/ppocr
--det_model_dir
=
inference/det_db
\
--rec_model_dir
=
inference/rec_rcnn
\
...
...
@@ -343,6 +368,16 @@ More parameters are as follows,
|rec_img_h|int|48|image height of recognition|
|rec_img_w|int|320|image width of recognition|
-
Layout related parameters
|parameter|data type|default|meaning|
| :---: | :---: | :---: | :---: |
|layout_model_dir|string|-| Address of layout inference model|
|layout_dict_path|string|../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt|dictionary file|
|layout_score_threshold|float|0.5|Threshold of score.|
|layout_nms_threshold|float|0.5|Threshold of nms.|
-
Table recognition related parameters
|parameter|data type|default|meaning|
...
...
@@ -368,11 +403,51 @@ predict img: ../../doc/imgs/12.jpg
The detection visualized image saved
in
./output//12.jpg
```
-
table
-
layout+
table
```
bash
predict img: ../../ppstructure/docs/table/table.jpg
0
type
: table, region:
[
0,0,371,293], res: <html><body><table><thead><
tr
>
<td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><
tr
>
<td>SegLink
[
26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><
tr
>
<td>PixelLink
[
4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><
tr
>
<td>TextSnake
[
18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><
tr
>
<td>TextField
[
37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><
tr
>
<td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><
tr
>
<td>FTSN
[
3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><
tr
>
<td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><
tr
>
<td>CRAFT
[
2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><
tr
>
<td>MCN
[
16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><
tr
>
<td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><
tr
>
<td>PAN
[
34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><
tr
>
<td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><
tr
>
<td>DRRG
[
41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><
tr
>
<td>Ours
(
SynText
)
</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><
tr
>
<td>Ours
(
MLT-17
)
</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
predict img: ../../ppstructure/docs/table/1.png
0
type
: text, region:
[
12,729,410,848], score: 0.781044, res: count of ocr result is : 7
**********
print ocr result
**********
0 det boxes:
[[
4,1],[79,1],[79,12],[4,12]] rec text: CTW1500. rec score: 0.769472
...
6 det boxes:
[[
4,99],[391,99],[391,112],[4,112]] rec text: sate-of-the-artmethods[12.34.36l.ourapproachachieves rec score: 0.90414
**********
end print ocr result
**********
1
type
: text, region:
[
69,342,342,359], score: 0.703666, res: count of ocr result is : 1
**********
print ocr result
**********
0 det boxes:
[[
8,2],[269,2],[269,13],[8,13]] rec text: Table6.Experimentalresults on CTW-1500 rec score: 0.890454
**********
end print ocr result
**********
2
type
: text, region:
[
70,316,706,332], score: 0.659738, res: count of ocr result is : 2
**********
print ocr result
**********
0 det boxes:
[[
373,2],[630,2],[630,11],[373,11]] rec text: oroposals.andthegreencontoursarefinal rec score: 0.919729
1 det boxes:
[[
8,3],[357,3],[357,11],[8,11]] rec text: Visualexperimentalresultshebluecontoursareboundar rec score: 0.915963
**********
end print ocr result
**********
3
type
: text, region:
[
489,342,789,359], score: 0.630538, res: count of ocr result is : 1
**********
print ocr result
**********
0 det boxes:
[[
8,2],[294,2],[294,14],[8,14]] rec text: Table7.Experimentalresults onMSRA-TD500 rec score: 0.942251
**********
end print ocr result
**********
4
type
: text, region:
[
444,751,841,848], score: 0.607345, res: count of ocr result is : 5
**********
print ocr result
**********
0 det boxes:
[[
19,3],[389,3],[389,17],[19,17]] rec text: Inthispaper,weproposeanovel adaptivebound rec score: 0.941031
1 det boxes:
[[
4,22],[390,22],[390,36],[4,36]] rec text: aryproposalnetworkforarbitraryshapetextdetection rec score: 0.960172
2 det boxes:
[[
4,42],[392,42],[392,56],[4,56]] rec text: whichadoptanboundaryproposalmodeltogeneratecoarse rec score: 0.934647
3 det boxes:
[[
4,61],[389,61],[389,75],[4,75]] rec text: ooundaryproposals,andthenadoptanadaptiveboundary rec score: 0.946296
4 det boxes:
[[
5,80],[387,80],[387,93],[5,93]] rec text: leformationmodelcombinedwithGCNandRNNtoper rec score: 0.952401
**********
end print ocr result
**********
5
type
: title, region:
[
444,705,564,724], score: 0.785429, res: count of ocr result is : 1
**********
print ocr result
**********
0 det boxes:
[[
6,2],[113,2],[113,14],[6,14]] rec text: 5.Conclusion rec score: 0.856903
**********
end print ocr result
**********
6
type
: table, region:
[
14,360,402,711], score: 0.963643, res: <html><body><table><thead><
tr
>
<td>Methods</td><td>Ext</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><
tr
>
<td>TextSnake
[
18]</td><td>Syn</td><td>85.3</td><td>67.9</td><td>75.6</td><td></td></tr><
tr
>
<td>CSE
[
17]</td><td>MiLT</td><td>76.1</td><td>78.7</td><td>77.4</td><td>0.38</td></tr><
tr
>
<td>LOMO[40]</td><td>Syn</td><td>76.5</td><td>85.7</td><td>80.8</td><td>4.4</td></tr><
tr
>
<td>ATRR[35]</td><td>Sy-</td><td>80.2</td><td>80.1</td><td>80.1</td><td>-</td></tr><
tr
>
<td>SegLink++
[
28]</td><td>Syn</td><td>79.8</td><td>82.8</td><td>81.3</td><td>-</td></tr><
tr
>
<td>TextField
[
37]</td><td>Syn</td><td>79.8</td><td>83.0</td><td>81.4</td><td>6.0</td></tr><
tr
>
<td>MSR[38]</td><td>Syn</td><td>79.0</td><td>84.1</td><td>81.5</td><td>4.3</td></tr><
tr
>
<td>PSENet-1s
[
33]</td><td>MLT</td><td>79.7</td><td>84.8</td><td>82.2</td><td>3.9</td></tr><
tr
>
<td>DB
[
12]</td><td>Syn</td><td>80.2</td><td>86.9</td><td>83.4</td><td>22.0</td></tr><
tr
>
<td>CRAFT
[
2]</td><td>Syn</td><td>81.1</td><td>86.0</td><td>83.5</td><td>-</td></tr><
tr
>
<td>TextDragon
[
5]</td><td>MLT+</td><td>82.8</td><td>84.5</td><td>83.6</td><td></td></tr><
tr
>
<td>PAN
[
34]</td><td>Syn</td><td>81.2</td><td>86.4</td><td>83.7</td><td>39.8</td></tr><
tr
>
<td>ContourNet
[
36]</td><td></td><td>84.1</td><td>83.7</td><td>83.9</td><td>4.5</td></tr><
tr
>
<td>DRRG
[
41]</td><td>MLT</td><td>83.02</td><td>85.93</td><td>84.45</td><td>-</td></tr><
tr
>
<td>TextPerception[23]</td><td>Syn</td><td>81.9</td><td>87.5</td><td>84.6</td><td></td></tr><
tr
>
<td>Ours</td><td> Syn</td><td>80.57</td><td>87.66</td><td>83.97</td><td>12.08</td></tr><
tr
>
<td>Ours</td><td></td><td>81.45</td><td>87.81</td><td>84.51</td><td>12.15</td></tr><
tr
>
<td>Ours</td><td>MLT</td><td>83.60</td><td>86.45</td><td>85.00</td><td>12.21</td></tr></tbody></table></body></html>
The table visualized image saved
in
./output//6_1.png
7
type
: table, region:
[
462,359,820,657], score: 0.953917, res: <html><body><table><thead><
tr
>
<td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><
tr
>
<td>SegLink
[
26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><
tr
>
<td>PixelLink
[
4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><
tr
>
<td>TextSnake
[
18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><
tr
>
<td>TextField
[
37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><
tr
>
<td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><
tr
>
<td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>:</td></tr><
tr
>
<td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td></td></tr><
tr
>
<td>CRAFT
[
2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><
tr
>
<td>MCN
[
16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><
tr
>
<td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><
tr
>
<td>PAN
[
34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><
tr
>
<td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><
tr
>
<td>DRRG
[
41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><
tr
>
<td>Ours
(
SynText
)
</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><
tr
>
<td>Ours
(
MLT-17
)
</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
The table visualized image saved
in
./output//7_1.png
8
type
: figure, region:
[
14,3,836,310], score: 0.969443, res: count of ocr result is : 26
**********
print ocr result
**********
0 det boxes:
[[
506,14],[539,15],[539,22],[506,21]] rec text: E rec score: 0.318073
...
25 det boxes:
[[
680,290],[759,288],[759,303],[680,305]] rec text:
(
d
)
CTW1500 rec score: 0.95911
**********
end print ocr result
**********
```
<a
name=
"3"
></a>
...
...
deploy/cpp_infer/readme_ch.md
浏览文件 @
0663b039
...
...
@@ -184,6 +184,9 @@ inference/
|-- table
| |--inference.pdiparams
| |--inference.pdmodel
|-- layout
| |--inference.pdiparams
| |--inference.pdmodel
```
<a
name=
"22"
></a>
...
...
@@ -288,7 +291,30 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--cls
=
true
\
```
##### 7. 表格识别
##### 7. 版面分析+表格识别
```
shell
./build/ppocr
--det_model_dir
=
inference/det_db
\
--rec_model_dir
=
inference/rec_rcnn
\
--table_model_dir
=
inference/table
\
--image_dir
=
../../ppstructure/docs/table/table.jpg
\
--layout_model_dir
=
inference/layout
\
--type
=
structure
\
--table
=
true
\
--layout
=
true
```
##### 8. 版面分析
```
shell
./build/ppocr
--layout_model_dir
=
inference/layout
\
--image_dir
=
../../ppstructure/docs/table/1.png
\
--type
=
structure
\
--table
=
false
\
--layout
=
true
\
--det
=
false
\
--rec
=
false
```
##### 9. 表格识别
```
shell
./build/ppocr
--det_model_dir
=
inference/det_db
\
--rec_model_dir
=
inference/rec_rcnn
\
...
...
@@ -352,12 +378,22 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|rec_img_w|int|320|文字识别模型输入图像宽度|
-
版面分析模型相关
|参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: |
|layout_model_dir|string|-|版面分析模型inference model地址|
|layout_dict_path|string|../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt|字典文件|
|layout_score_threshold|float|0.5|检测框的分数阈值|
|layout_nms_threshold|float|0.5|nms的阈值|
-
表格识别模型相关
|参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: |
|table_model_dir|string|-|表格识别模型inference model地址|
|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|字典文件|
|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict
_ch
.txt|字典文件|
|table_max_len|int|488|表格识别模型输入图像长边大小,最终网络输入图像大小为(table_max_len,table_max_len)|
|merge_no_span_structure|bool|true|是否合并
<td>
和
</td>
为
<td></td>
|
...
...
@@ -378,11 +414,51 @@ predict img: ../../doc/imgs/12.jpg
The detection visualized image saved
in
./output//12.jpg
```
-
table
-
layout+
table
```
bash
predict img: ../../ppstructure/docs/table/table.jpg
0
type
: table, region:
[
0,0,371,293], res: <html><body><table><thead><
tr
>
<td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><
tr
>
<td>SegLink
[
26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><
tr
>
<td>PixelLink
[
4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><
tr
>
<td>TextSnake
[
18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><
tr
>
<td>TextField
[
37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><
tr
>
<td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><
tr
>
<td>FTSN
[
3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><
tr
>
<td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><
tr
>
<td>CRAFT
[
2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><
tr
>
<td>MCN
[
16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><
tr
>
<td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><
tr
>
<td>PAN
[
34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><
tr
>
<td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><
tr
>
<td>DRRG
[
41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><
tr
>
<td>Ours
(
SynText
)
</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><
tr
>
<td>Ours
(
MLT-17
)
</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
predict img: ../../ppstructure/docs/table/1.png
0
type
: text, region:
[
12,729,410,848], score: 0.781044, res: count of ocr result is : 7
**********
print ocr result
**********
0 det boxes:
[[
4,1],[79,1],[79,12],[4,12]] rec text: CTW1500. rec score: 0.769472
...
6 det boxes:
[[
4,99],[391,99],[391,112],[4,112]] rec text: sate-of-the-artmethods[12.34.36l.ourapproachachieves rec score: 0.90414
**********
end print ocr result
**********
1
type
: text, region:
[
69,342,342,359], score: 0.703666, res: count of ocr result is : 1
**********
print ocr result
**********
0 det boxes:
[[
8,2],[269,2],[269,13],[8,13]] rec text: Table6.Experimentalresults on CTW-1500 rec score: 0.890454
**********
end print ocr result
**********
2
type
: text, region:
[
70,316,706,332], score: 0.659738, res: count of ocr result is : 2
**********
print ocr result
**********
0 det boxes:
[[
373,2],[630,2],[630,11],[373,11]] rec text: oroposals.andthegreencontoursarefinal rec score: 0.919729
1 det boxes:
[[
8,3],[357,3],[357,11],[8,11]] rec text: Visualexperimentalresultshebluecontoursareboundar rec score: 0.915963
**********
end print ocr result
**********
3
type
: text, region:
[
489,342,789,359], score: 0.630538, res: count of ocr result is : 1
**********
print ocr result
**********
0 det boxes:
[[
8,2],[294,2],[294,14],[8,14]] rec text: Table7.Experimentalresults onMSRA-TD500 rec score: 0.942251
**********
end print ocr result
**********
4
type
: text, region:
[
444,751,841,848], score: 0.607345, res: count of ocr result is : 5
**********
print ocr result
**********
0 det boxes:
[[
19,3],[389,3],[389,17],[19,17]] rec text: Inthispaper,weproposeanovel adaptivebound rec score: 0.941031
1 det boxes:
[[
4,22],[390,22],[390,36],[4,36]] rec text: aryproposalnetworkforarbitraryshapetextdetection rec score: 0.960172
2 det boxes:
[[
4,42],[392,42],[392,56],[4,56]] rec text: whichadoptanboundaryproposalmodeltogeneratecoarse rec score: 0.934647
3 det boxes:
[[
4,61],[389,61],[389,75],[4,75]] rec text: ooundaryproposals,andthenadoptanadaptiveboundary rec score: 0.946296
4 det boxes:
[[
5,80],[387,80],[387,93],[5,93]] rec text: leformationmodelcombinedwithGCNandRNNtoper rec score: 0.952401
**********
end print ocr result
**********
5
type
: title, region:
[
444,705,564,724], score: 0.785429, res: count of ocr result is : 1
**********
print ocr result
**********
0 det boxes:
[[
6,2],[113,2],[113,14],[6,14]] rec text: 5.Conclusion rec score: 0.856903
**********
end print ocr result
**********
6
type
: table, region:
[
14,360,402,711], score: 0.963643, res: <html><body><table><thead><
tr
>
<td>Methods</td><td>Ext</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><
tr
>
<td>TextSnake
[
18]</td><td>Syn</td><td>85.3</td><td>67.9</td><td>75.6</td><td></td></tr><
tr
>
<td>CSE
[
17]</td><td>MiLT</td><td>76.1</td><td>78.7</td><td>77.4</td><td>0.38</td></tr><
tr
>
<td>LOMO[40]</td><td>Syn</td><td>76.5</td><td>85.7</td><td>80.8</td><td>4.4</td></tr><
tr
>
<td>ATRR[35]</td><td>Sy-</td><td>80.2</td><td>80.1</td><td>80.1</td><td>-</td></tr><
tr
>
<td>SegLink++
[
28]</td><td>Syn</td><td>79.8</td><td>82.8</td><td>81.3</td><td>-</td></tr><
tr
>
<td>TextField
[
37]</td><td>Syn</td><td>79.8</td><td>83.0</td><td>81.4</td><td>6.0</td></tr><
tr
>
<td>MSR[38]</td><td>Syn</td><td>79.0</td><td>84.1</td><td>81.5</td><td>4.3</td></tr><
tr
>
<td>PSENet-1s
[
33]</td><td>MLT</td><td>79.7</td><td>84.8</td><td>82.2</td><td>3.9</td></tr><
tr
>
<td>DB
[
12]</td><td>Syn</td><td>80.2</td><td>86.9</td><td>83.4</td><td>22.0</td></tr><
tr
>
<td>CRAFT
[
2]</td><td>Syn</td><td>81.1</td><td>86.0</td><td>83.5</td><td>-</td></tr><
tr
>
<td>TextDragon
[
5]</td><td>MLT+</td><td>82.8</td><td>84.5</td><td>83.6</td><td></td></tr><
tr
>
<td>PAN
[
34]</td><td>Syn</td><td>81.2</td><td>86.4</td><td>83.7</td><td>39.8</td></tr><
tr
>
<td>ContourNet
[
36]</td><td></td><td>84.1</td><td>83.7</td><td>83.9</td><td>4.5</td></tr><
tr
>
<td>DRRG
[
41]</td><td>MLT</td><td>83.02</td><td>85.93</td><td>84.45</td><td>-</td></tr><
tr
>
<td>TextPerception[23]</td><td>Syn</td><td>81.9</td><td>87.5</td><td>84.6</td><td></td></tr><
tr
>
<td>Ours</td><td> Syn</td><td>80.57</td><td>87.66</td><td>83.97</td><td>12.08</td></tr><
tr
>
<td>Ours</td><td></td><td>81.45</td><td>87.81</td><td>84.51</td><td>12.15</td></tr><
tr
>
<td>Ours</td><td>MLT</td><td>83.60</td><td>86.45</td><td>85.00</td><td>12.21</td></tr></tbody></table></body></html>
The table visualized image saved
in
./output//6_1.png
7
type
: table, region:
[
462,359,820,657], score: 0.953917, res: <html><body><table><thead><
tr
>
<td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><
tr
>
<td>SegLink
[
26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><
tr
>
<td>PixelLink
[
4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><
tr
>
<td>TextSnake
[
18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><
tr
>
<td>TextField
[
37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><
tr
>
<td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><
tr
>
<td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>:</td></tr><
tr
>
<td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td></td></tr><
tr
>
<td>CRAFT
[
2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><
tr
>
<td>MCN
[
16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><
tr
>
<td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><
tr
>
<td>PAN
[
34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><
tr
>
<td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><
tr
>
<td>DRRG
[
41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><
tr
>
<td>Ours
(
SynText
)
</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><
tr
>
<td>Ours
(
MLT-17
)
</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
The table visualized image saved
in
./output//7_1.png
8
type
: figure, region:
[
14,3,836,310], score: 0.969443, res: count of ocr result is : 26
**********
print ocr result
**********
0 det boxes:
[[
506,14],[539,15],[539,22],[506,21]] rec text: E rec score: 0.318073
...
25 det boxes:
[[
680,290],[759,288],[759,303],[680,305]] rec text:
(
d
)
CTW1500 rec score: 0.95911
**********
end print ocr result
**********
```
<a
name=
"3"
></a>
...
...
deploy/cpp_infer/src/args.cpp
浏览文件 @
0663b039
...
...
@@ -51,6 +51,13 @@ DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
DEFINE_int32
(
rec_img_h
,
48
,
"rec image height"
);
DEFINE_int32
(
rec_img_w
,
320
,
"rec image width"
);
// layout model related
DEFINE_string
(
layout_model_dir
,
""
,
"Path of table layout inference model."
);
DEFINE_string
(
layout_dict_path
,
"../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt"
,
"Path of dictionary."
);
DEFINE_double
(
layout_score_threshold
,
0.5
,
"Threshold of score."
);
DEFINE_double
(
layout_nms_threshold
,
0.5
,
"Threshold of nms."
);
// structure model related
DEFINE_string
(
table_model_dir
,
""
,
"Path of table struture inference model."
);
DEFINE_int32
(
table_max_len
,
488
,
"max len size of input image."
);
...
...
@@ -65,4 +72,5 @@ DEFINE_string(table_char_dict_path,
DEFINE_bool
(
det
,
true
,
"Whether use det in forward."
);
DEFINE_bool
(
rec
,
true
,
"Whether use rec in forward."
);
DEFINE_bool
(
cls
,
false
,
"Whether use cls in forward."
);
DEFINE_bool
(
table
,
false
,
"Whether use table structure in forward."
);
\ No newline at end of file
DEFINE_bool
(
table
,
false
,
"Whether use table structure in forward."
);
DEFINE_bool
(
layout
,
false
,
"Whether use layout analysis in forward."
);
\ No newline at end of file
deploy/cpp_infer/src/main.cpp
浏览文件 @
0663b039
...
...
@@ -65,9 +65,18 @@ void check_params() {
exit
(
1
);
}
}
if
(
FLAGS_layout
)
{
if
(
FLAGS_layout_model_dir
.
empty
()
||
FLAGS_image_dir
.
empty
())
{
std
::
cout
<<
"Usage[layout]: ./ppocr "
<<
"--layout_model_dir=/PATH/TO/LAYOUT_INFERENCE_MODEL/ "
<<
"--image_dir=/PATH/TO/INPUT/IMAGE/"
<<
std
::
endl
;
exit
(
1
);
}
}
if
(
FLAGS_precision
!=
"fp32"
&&
FLAGS_precision
!=
"fp16"
&&
FLAGS_precision
!=
"int8"
)
{
cout
<<
"precison should be 'fp32'(default), 'fp16' or 'int8'. "
<<
endl
;
std
::
cout
<<
"precison should be 'fp32'(default), 'fp16' or 'int8'. "
<<
std
::
endl
;
exit
(
1
);
}
}
...
...
@@ -75,71 +84,94 @@ void check_params() {
void
ocr
(
std
::
vector
<
cv
::
String
>
&
cv_all_img_names
)
{
PPOCR
ocr
=
PPOCR
();
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
ocr_results
=
ocr
.
ocr
(
cv_all_img_names
,
FLAGS_det
,
FLAGS_rec
,
FLAGS_cls
);
if
(
FLAGS_benchmark
)
{
ocr
.
reset_timer
();
}
std
::
vector
<
cv
::
Mat
>
img_list
;
std
::
vector
<
cv
::
String
>
img_names
;
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
if
(
FLAGS_benchmark
)
{
cout
<<
cv_all_img_names
[
i
]
<<
'\t'
;
if
(
FLAGS_rec
&&
FLAGS_det
)
{
Utility
::
print_result
(
ocr_results
[
i
]);
}
else
if
(
FLAGS_det
)
{
for
(
int
n
=
0
;
n
<
ocr_results
[
i
].
size
();
n
++
)
{
for
(
int
m
=
0
;
m
<
ocr_results
[
i
][
n
].
box
.
size
();
m
++
)
{
cout
<<
ocr_results
[
i
][
n
].
box
[
m
][
0
]
<<
' '
<<
ocr_results
[
i
][
n
].
box
[
m
][
1
]
<<
' '
;
}
}
cout
<<
endl
;
}
else
{
Utility
::
print_result
(
ocr_results
[
i
]);
}
}
else
{
cout
<<
cv_all_img_names
[
i
]
<<
"
\n
"
;
Utility
::
print_result
(
ocr_results
[
i
]);
if
(
FLAGS_visualize
&&
FLAGS_det
)
{
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
srcimg
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
endl
;
exit
(
1
);
}
std
::
string
file_name
=
Utility
::
basename
(
cv_all_img_names
[
i
]);
cv
::
Mat
img
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
img
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
continue
;
}
img_list
.
push_back
(
img
);
img_names
.
push_back
(
cv_all_img_names
[
i
]);
}
Utility
::
VisualizeBboxes
(
srcimg
,
ocr_results
[
i
],
FLAGS_output
+
"/"
+
file_name
);
}
cout
<<
"***************************"
<<
endl
;
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
ocr_results
=
ocr
.
ocr
(
img_list
,
FLAGS_det
,
FLAGS_rec
,
FLAGS_cls
);
for
(
int
i
=
0
;
i
<
img_names
.
size
();
++
i
)
{
std
::
cout
<<
"predict img: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
Utility
::
print_result
(
ocr_results
[
i
]);
if
(
FLAGS_visualize
&&
FLAGS_det
)
{
std
::
string
file_name
=
Utility
::
basename
(
img_names
[
i
]);
cv
::
Mat
srcimg
=
img_list
[
i
];
Utility
::
VisualizeBboxes
(
srcimg
,
ocr_results
[
i
],
FLAGS_output
+
"/"
+
file_name
);
}
}
if
(
FLAGS_benchmark
)
{
ocr
.
benchmark_log
(
cv_all_img_names
.
size
());
}
}
void
structure
(
std
::
vector
<
cv
::
String
>
&
cv_all_img_names
)
{
PaddleOCR
::
PaddleStructure
engine
=
PaddleOCR
::
PaddleStructure
();
std
::
vector
<
std
::
vector
<
StructurePredictResult
>>
structure_results
=
engine
.
structure
(
cv_all_img_names
,
false
,
FLAGS_table
);
if
(
FLAGS_benchmark
)
{
engine
.
reset_timer
();
}
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
i
++
)
{
cout
<<
"predict img: "
<<
cv_all_img_names
[
i
]
<<
endl
;
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
for
(
int
j
=
0
;
j
<
structure_results
[
i
].
size
();
j
++
)
{
std
::
cout
<<
j
<<
"
\t
type: "
<<
structure_results
[
i
][
j
].
type
std
::
cout
<<
"predict img: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
cv
::
Mat
img
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
img
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
continue
;
}
std
::
vector
<
StructurePredictResult
>
structure_results
=
engine
.
structure
(
img
,
FLAGS_layout
,
FLAGS_table
,
FLAGS_det
&&
FLAGS_rec
);
for
(
int
j
=
0
;
j
<
structure_results
.
size
();
j
++
)
{
std
::
cout
<<
j
<<
"
\t
type: "
<<
structure_results
[
j
].
type
<<
", region: ["
;
std
::
cout
<<
structure_results
[
i
][
j
].
box
[
0
]
<<
","
<<
structure_results
[
i
][
j
].
box
[
1
]
<<
","
<<
structure_results
[
i
][
j
].
box
[
2
]
<<
","
<<
structure_results
[
i
][
j
].
box
[
3
]
<<
"], res: "
;
if
(
structure_results
[
i
][
j
].
type
==
"table"
)
{
std
::
cout
<<
structure_results
[
i
][
j
].
html
<<
std
::
endl
;
std
::
string
file_name
=
Utility
::
basename
(
cv_all_img_names
[
i
]);
Utility
::
VisualizeBboxes
(
srcimg
,
structure_results
[
i
][
j
],
FLAGS_output
+
"/"
+
std
::
to_string
(
j
)
+
"_"
+
file_name
);
std
::
cout
<<
structure_results
[
j
].
box
[
0
]
<<
","
<<
structure_results
[
j
].
box
[
1
]
<<
","
<<
structure_results
[
j
].
box
[
2
]
<<
","
<<
structure_results
[
j
].
box
[
3
]
<<
"], score: "
;
std
::
cout
<<
structure_results
[
j
].
confidence
<<
", res: "
;
if
(
structure_results
[
j
].
type
==
"table"
)
{
std
::
cout
<<
structure_results
[
j
].
html
<<
std
::
endl
;
if
(
structure_results
[
j
].
cell_box
.
size
()
>
0
&&
FLAGS_visualize
)
{
std
::
string
file_name
=
Utility
::
basename
(
cv_all_img_names
[
i
]);
Utility
::
VisualizeBboxes
(
img
,
structure_results
[
j
],
FLAGS_output
+
"/"
+
std
::
to_string
(
j
)
+
"_"
+
file_name
);
}
}
else
{
Utility
::
print_result
(
structure_results
[
i
][
j
].
text_res
);
std
::
cout
<<
"count of ocr result is : "
<<
structure_results
[
j
].
text_res
.
size
()
<<
std
::
endl
;
if
(
structure_results
[
j
].
text_res
.
size
()
>
0
)
{
std
::
cout
<<
"********** print ocr result "
<<
"**********"
<<
std
::
endl
;
Utility
::
print_result
(
structure_results
[
j
].
text_res
);
std
::
cout
<<
"********** end print ocr result "
<<
"**********"
<<
std
::
endl
;
}
}
}
}
if
(
FLAGS_benchmark
)
{
engine
.
benchmark_log
(
cv_all_img_names
.
size
());
}
}
int
main
(
int
argc
,
char
**
argv
)
{
...
...
@@ -149,19 +181,22 @@ int main(int argc, char **argv) {
if
(
!
Utility
::
PathExists
(
FLAGS_image_dir
))
{
std
::
cerr
<<
"[ERROR] image path not exist! image_dir: "
<<
FLAGS_image_dir
<<
endl
;
<<
std
::
endl
;
exit
(
1
);
}
std
::
vector
<
cv
::
String
>
cv_all_img_names
;
cv
::
glob
(
FLAGS_image_dir
,
cv_all_img_names
);
std
::
cout
<<
"total images num: "
<<
cv_all_img_names
.
size
()
<<
endl
;
std
::
cout
<<
"total images num: "
<<
cv_all_img_names
.
size
()
<<
std
::
endl
;
if
(
!
Utility
::
PathExists
(
FLAGS_output
))
{
Utility
::
CreateDir
(
FLAGS_output
);
}
if
(
FLAGS_type
==
"ocr"
)
{
ocr
(
cv_all_img_names
);
}
else
if
(
FLAGS_type
==
"structure"
)
{
structure
(
cv_all_img_names
);
}
else
{
std
::
cout
<<
"only value in ['ocr','structure'] is supported"
<<
endl
;
std
::
cout
<<
"only value in ['ocr','structure'] is supported"
<<
std
::
endl
;
}
}
deploy/cpp_infer/src/ocr_cls.cpp
浏览文件 @
0663b039
...
...
@@ -32,7 +32,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
for
(
int
beg_img_no
=
0
;
beg_img_no
<
img_num
;
beg_img_no
+=
this
->
cls_batch_num_
)
{
auto
preprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
int
end_img_no
=
min
(
img_num
,
beg_img_no
+
this
->
cls_batch_num_
);
int
end_img_no
=
std
::
min
(
img_num
,
beg_img_no
+
this
->
cls_batch_num_
);
int
batch_num
=
end_img_no
-
beg_img_no
;
// preprocess
std
::
vector
<
cv
::
Mat
>
norm_img_batch
;
...
...
@@ -97,7 +97,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
}
void
Classifier
::
LoadModel
(
const
std
::
string
&
model_dir
)
{
Analysis
Config
config
;
paddle_infer
::
Config
config
;
config
.
SetModel
(
model_dir
+
"/inference.pdmodel"
,
model_dir
+
"/inference.pdiparams"
);
...
...
@@ -112,9 +112,9 @@ void Classifier::LoadModel(const std::string &model_dir) {
precision
=
paddle_infer
::
Config
::
Precision
::
kInt8
;
}
config
.
EnableTensorRtEngine
(
1
<<
20
,
10
,
3
,
precision
,
false
,
false
);
if
(
!
Utility
::
PathExists
(
"./trt_cls_shape.txt"
)){
if
(
!
Utility
::
PathExists
(
"./trt_cls_shape.txt"
))
{
config
.
CollectShapeRangeInfo
(
"./trt_cls_shape.txt"
);
}
else
{
}
else
{
config
.
EnableTunedTensorRtDynamicShape
(
"./trt_cls_shape.txt"
,
true
);
}
}
...
...
@@ -136,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) {
config
.
EnableMemoryOptim
();
config
.
DisableGlogInfo
();
this
->
predictor_
=
CreatePredictor
(
config
);
this
->
predictor_
=
paddle_infer
::
CreatePredictor
(
config
);
}
}
// namespace PaddleOCR
deploy/cpp_infer/src/ocr_det.cpp
浏览文件 @
0663b039
...
...
@@ -33,12 +33,11 @@ void DBDetector::LoadModel(const std::string &model_dir) {
precision
=
paddle_infer
::
Config
::
Precision
::
kInt8
;
}
config
.
EnableTensorRtEngine
(
1
<<
30
,
1
,
20
,
precision
,
false
,
false
);
if
(
!
Utility
::
PathExists
(
"./trt_det_shape.txt"
)){
if
(
!
Utility
::
PathExists
(
"./trt_det_shape.txt"
))
{
config
.
CollectShapeRangeInfo
(
"./trt_det_shape.txt"
);
}
else
{
}
else
{
config
.
EnableTunedTensorRtDynamicShape
(
"./trt_det_shape.txt"
,
true
);
}
}
}
else
{
config
.
DisableGpu
();
...
...
@@ -59,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
config
.
EnableMemoryOptim
();
// config.DisableGlogInfo();
this
->
predictor_
=
CreatePredictor
(
config
);
this
->
predictor_
=
paddle_infer
::
CreatePredictor
(
config
);
}
void
DBDetector
::
Run
(
cv
::
Mat
&
img
,
...
...
deploy/cpp_infer/src/ocr_rec.cpp
浏览文件 @
0663b039
...
...
@@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
for
(
int
beg_img_no
=
0
;
beg_img_no
<
img_num
;
beg_img_no
+=
this
->
rec_batch_num_
)
{
auto
preprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
int
end_img_no
=
min
(
img_num
,
beg_img_no
+
this
->
rec_batch_num_
);
int
end_img_no
=
std
::
min
(
img_num
,
beg_img_no
+
this
->
rec_batch_num_
);
int
batch_num
=
end_img_no
-
beg_img_no
;
int
imgH
=
this
->
rec_image_shape_
[
1
];
int
imgW
=
this
->
rec_image_shape_
[
2
];
...
...
@@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
int
h
=
img_list
[
indices
[
ino
]].
rows
;
int
w
=
img_list
[
indices
[
ino
]].
cols
;
float
wh_ratio
=
w
*
1.0
/
h
;
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
);
max_wh_ratio
=
std
::
max
(
max_wh_ratio
,
wh_ratio
);
}
int
batch_width
=
imgW
;
...
...
@@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
this
->
normalize_op_
.
Run
(
&
resize_img
,
this
->
mean_
,
this
->
scale_
,
this
->
is_scale_
);
norm_img_batch
.
push_back
(
resize_img
);
batch_width
=
max
(
resize_img
.
cols
,
batch_width
);
batch_width
=
std
::
max
(
resize_img
.
cols
,
batch_width
);
}
std
::
vector
<
float
>
input
(
batch_num
*
3
*
imgH
*
batch_width
,
0.0
f
);
...
...
@@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
last_index
=
argmax_idx
;
}
score
/=
count
;
if
(
isnan
(
score
))
{
if
(
std
::
isnan
(
score
))
{
continue
;
}
rec_texts
[
indices
[
beg_img_no
+
m
]]
=
str_res
;
...
...
@@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
}
void
CRNNRecognizer
::
LoadModel
(
const
std
::
string
&
model_dir
)
{
// AnalysisConfig config;
paddle_infer
::
Config
config
;
config
.
SetModel
(
model_dir
+
"/inference.pdmodel"
,
model_dir
+
"/inference.pdiparams"
);
...
...
@@ -147,12 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if
(
this
->
precision_
==
"int8"
)
{
precision
=
paddle_infer
::
Config
::
Precision
::
kInt8
;
}
if
(
!
Utility
::
PathExists
(
"./trt_rec_shape.txt"
)){
if
(
!
Utility
::
PathExists
(
"./trt_rec_shape.txt"
))
{
config
.
CollectShapeRangeInfo
(
"./trt_rec_shape.txt"
);
}
else
{
}
else
{
config
.
EnableTunedTensorRtDynamicShape
(
"./trt_rec_shape.txt"
,
true
);
}
}
}
else
{
config
.
DisableGpu
();
...
...
@@ -177,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
config
.
EnableMemoryOptim
();
// config.DisableGlogInfo();
this
->
predictor_
=
CreatePredictor
(
config
);
this
->
predictor_
=
paddle_infer
::
CreatePredictor
(
config
);
}
}
// namespace PaddleOCR
deploy/cpp_infer/src/paddleocr.cpp
浏览文件 @
0663b039
...
...
@@ -16,7 +16,7 @@
#include <include/paddleocr.h>
#include "auto_log/autolog.h"
#include <numeric>
namespace
PaddleOCR
{
PPOCR
::
PPOCR
()
{
...
...
@@ -44,8 +44,71 @@ PPOCR::PPOCR() {
}
};
void
PPOCR
::
det
(
cv
::
Mat
img
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
,
std
::
vector
<
double
>
&
times
)
{
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
PPOCR
::
ocr
(
std
::
vector
<
cv
::
Mat
>
img_list
,
bool
det
,
bool
rec
,
bool
cls
)
{
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
ocr_results
;
if
(
!
det
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result
;
ocr_result
.
resize
(
img_list
.
size
());
if
(
cls
&&
this
->
classifier_
!=
nullptr
)
{
this
->
cls
(
img_list
,
ocr_result
);
for
(
int
i
=
0
;
i
<
img_list
.
size
();
i
++
)
{
if
(
ocr_result
[
i
].
cls_label
%
2
==
1
&&
ocr_result
[
i
].
cls_score
>
this
->
classifier_
->
cls_thresh
)
{
cv
::
rotate
(
img_list
[
i
],
img_list
[
i
],
1
);
}
}
}
if
(
rec
)
{
this
->
rec
(
img_list
,
ocr_result
);
}
for
(
int
i
=
0
;
i
<
ocr_result
.
size
();
++
i
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result_tmp
;
ocr_result_tmp
.
push_back
(
ocr_result
[
i
]);
ocr_results
.
push_back
(
ocr_result_tmp
);
}
}
else
{
for
(
int
i
=
0
;
i
<
img_list
.
size
();
++
i
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result
=
this
->
ocr
(
img_list
[
i
],
true
,
rec
,
cls
);
ocr_results
.
push_back
(
ocr_result
);
}
}
return
ocr_results
;
}
std
::
vector
<
OCRPredictResult
>
PPOCR
::
ocr
(
cv
::
Mat
img
,
bool
det
,
bool
rec
,
bool
cls
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result
;
// det
this
->
det
(
img
,
ocr_result
);
// crop image
std
::
vector
<
cv
::
Mat
>
img_list
;
for
(
int
j
=
0
;
j
<
ocr_result
.
size
();
j
++
)
{
cv
::
Mat
crop_img
;
crop_img
=
Utility
::
GetRotateCropImage
(
img
,
ocr_result
[
j
].
box
);
img_list
.
push_back
(
crop_img
);
}
// cls
if
(
cls
&&
this
->
classifier_
!=
nullptr
)
{
this
->
cls
(
img_list
,
ocr_result
);
for
(
int
i
=
0
;
i
<
img_list
.
size
();
i
++
)
{
if
(
ocr_result
[
i
].
cls_label
%
2
==
1
&&
ocr_result
[
i
].
cls_score
>
this
->
classifier_
->
cls_thresh
)
{
cv
::
rotate
(
img_list
[
i
],
img_list
[
i
],
1
);
}
}
}
// rec
if
(
rec
)
{
this
->
rec
(
img_list
,
ocr_result
);
}
return
ocr_result
;
}
void
PPOCR
::
det
(
cv
::
Mat
img
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
)
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
boxes
;
std
::
vector
<
double
>
det_times
;
...
...
@@ -58,14 +121,13 @@ void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
}
// sort boex from top to bottom, from left to right
Utility
::
sorted_boxes
(
ocr_results
);
t
imes
[
0
]
+=
det_times
[
0
];
t
imes
[
1
]
+=
det_times
[
1
];
t
imes
[
2
]
+=
det_times
[
2
];
t
his
->
time_info_det
[
0
]
+=
det_times
[
0
];
t
his
->
time_info_det
[
1
]
+=
det_times
[
1
];
t
his
->
time_info_det
[
2
]
+=
det_times
[
2
];
}
void
PPOCR
::
rec
(
std
::
vector
<
cv
::
Mat
>
img_list
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
,
std
::
vector
<
double
>
&
times
)
{
std
::
vector
<
OCRPredictResult
>
&
ocr_results
)
{
std
::
vector
<
std
::
string
>
rec_texts
(
img_list
.
size
(),
""
);
std
::
vector
<
float
>
rec_text_scores
(
img_list
.
size
(),
0
);
std
::
vector
<
double
>
rec_times
;
...
...
@@ -75,14 +137,13 @@ void PPOCR::rec(std::vector<cv::Mat> img_list,
ocr_results
[
i
].
text
=
rec_texts
[
i
];
ocr_results
[
i
].
score
=
rec_text_scores
[
i
];
}
t
imes
[
0
]
+=
rec_times
[
0
];
t
imes
[
1
]
+=
rec_times
[
1
];
t
imes
[
2
]
+=
rec_times
[
2
];
t
his
->
time_info_rec
[
0
]
+=
rec_times
[
0
];
t
his
->
time_info_rec
[
1
]
+=
rec_times
[
1
];
t
his
->
time_info_rec
[
2
]
+=
rec_times
[
2
];
}
void
PPOCR
::
cls
(
std
::
vector
<
cv
::
Mat
>
img_list
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
,
std
::
vector
<
double
>
&
times
)
{
std
::
vector
<
OCRPredictResult
>
&
ocr_results
)
{
std
::
vector
<
int
>
cls_labels
(
img_list
.
size
(),
0
);
std
::
vector
<
float
>
cls_scores
(
img_list
.
size
(),
0
);
std
::
vector
<
double
>
cls_times
;
...
...
@@ -92,125 +153,43 @@ void PPOCR::cls(std::vector<cv::Mat> img_list,
ocr_results
[
i
].
cls_label
=
cls_labels
[
i
];
ocr_results
[
i
].
cls_score
=
cls_scores
[
i
];
}
t
ime
s
[
0
]
+=
cls_times
[
0
];
t
ime
s
[
1
]
+=
cls_times
[
1
];
t
ime
s
[
2
]
+=
cls_times
[
2
];
t
his
->
time_info_cl
s
[
0
]
+=
cls_times
[
0
];
t
his
->
time_info_cl
s
[
1
]
+=
cls_times
[
1
];
t
his
->
time_info_cl
s
[
2
]
+=
cls_times
[
2
];
}
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
PPOCR
::
ocr
(
std
::
vector
<
cv
::
String
>
cv_all_img_names
,
bool
det
,
bool
rec
,
bool
cls
)
{
std
::
vector
<
double
>
time_info_det
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_rec
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_cls
=
{
0
,
0
,
0
};
std
::
vector
<
std
::
vector
<
OCRPredictResult
>>
ocr_results
;
if
(
!
det
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result
;
// read image
std
::
vector
<
cv
::
Mat
>
img_list
;
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
srcimg
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
endl
;
exit
(
1
);
}
img_list
.
push_back
(
srcimg
);
OCRPredictResult
res
;
ocr_result
.
push_back
(
res
);
}
if
(
cls
&&
this
->
classifier_
!=
nullptr
)
{
this
->
cls
(
img_list
,
ocr_result
,
time_info_cls
);
for
(
int
i
=
0
;
i
<
img_list
.
size
();
i
++
)
{
if
(
ocr_result
[
i
].
cls_label
%
2
==
1
&&
ocr_result
[
i
].
cls_score
>
this
->
classifier_
->
cls_thresh
)
{
cv
::
rotate
(
img_list
[
i
],
img_list
[
i
],
1
);
}
}
}
if
(
rec
)
{
this
->
rec
(
img_list
,
ocr_result
,
time_info_rec
);
}
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result_tmp
;
ocr_result_tmp
.
push_back
(
ocr_result
[
i
]);
ocr_results
.
push_back
(
ocr_result_tmp
);
}
}
else
{
if
(
!
Utility
::
PathExists
(
FLAGS_output
)
&&
FLAGS_det
)
{
Utility
::
CreateDir
(
FLAGS_output
);
}
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
std
::
vector
<
OCRPredictResult
>
ocr_result
;
if
(
!
FLAGS_benchmark
)
{
cout
<<
"predict img: "
<<
cv_all_img_names
[
i
]
<<
endl
;
}
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
srcimg
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
endl
;
exit
(
1
);
}
// det
this
->
det
(
srcimg
,
ocr_result
,
time_info_det
);
// crop image
std
::
vector
<
cv
::
Mat
>
img_list
;
for
(
int
j
=
0
;
j
<
ocr_result
.
size
();
j
++
)
{
cv
::
Mat
crop_img
;
crop_img
=
Utility
::
GetRotateCropImage
(
srcimg
,
ocr_result
[
j
].
box
);
img_list
.
push_back
(
crop_img
);
}
// cls
if
(
cls
&&
this
->
classifier_
!=
nullptr
)
{
this
->
cls
(
img_list
,
ocr_result
,
time_info_cls
);
for
(
int
i
=
0
;
i
<
img_list
.
size
();
i
++
)
{
if
(
ocr_result
[
i
].
cls_label
%
2
==
1
&&
ocr_result
[
i
].
cls_score
>
this
->
classifier_
->
cls_thresh
)
{
cv
::
rotate
(
img_list
[
i
],
img_list
[
i
],
1
);
}
}
}
// rec
if
(
rec
)
{
this
->
rec
(
img_list
,
ocr_result
,
time_info_rec
);
}
ocr_results
.
push_back
(
ocr_result
);
}
}
if
(
FLAGS_benchmark
)
{
this
->
log
(
time_info_det
,
time_info_rec
,
time_info_cls
,
cv_all_img_names
.
size
());
}
return
ocr_results
;
}
// namespace PaddleOCR
void
PPOCR
::
reset_timer
()
{
this
->
time_info_det
=
{
0
,
0
,
0
};
this
->
time_info_rec
=
{
0
,
0
,
0
};
this
->
time_info_cls
=
{
0
,
0
,
0
};
}
void
PPOCR
::
log
(
std
::
vector
<
double
>
&
det_times
,
std
::
vector
<
double
>
&
rec_times
,
std
::
vector
<
double
>
&
cls_times
,
int
img_num
)
{
if
(
det_times
[
0
]
+
det_times
[
1
]
+
det_times
[
2
]
>
0
)
{
void
PPOCR
::
benchmark_log
(
int
img_num
)
{
if
(
this
->
time_info_det
[
0
]
+
this
->
time_info_det
[
1
]
+
this
->
time_info_det
[
2
]
>
0
)
{
AutoLogger
autolog_det
(
"ocr_det"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
1
,
"dynamic"
,
FLAGS_precision
,
det_times
,
img_num
);
FLAGS_precision
,
this
->
time_info_det
,
img_num
);
autolog_det
.
report
();
}
if
(
rec_times
[
0
]
+
rec_times
[
1
]
+
rec_times
[
2
]
>
0
)
{
if
(
this
->
time_info_rec
[
0
]
+
this
->
time_info_rec
[
1
]
+
this
->
time_info_rec
[
2
]
>
0
)
{
AutoLogger
autolog_rec
(
"ocr_rec"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
FLAGS_rec_batch_num
,
"dynamic"
,
FLAGS_precision
,
rec_times
,
img_num
);
this
->
time_info_rec
,
img_num
);
autolog_rec
.
report
();
}
if
(
cls_times
[
0
]
+
cls_times
[
1
]
+
cls_times
[
2
]
>
0
)
{
if
(
this
->
time_info_cls
[
0
]
+
this
->
time_info_cls
[
1
]
+
this
->
time_info_cls
[
2
]
>
0
)
{
AutoLogger
autolog_cls
(
"ocr_cls"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
FLAGS_cls_batch_num
,
"dynamic"
,
FLAGS_precision
,
cls_time
s
,
img_num
);
this
->
time_info_cl
s
,
img_num
);
autolog_cls
.
report
();
}
}
PPOCR
::~
PPOCR
()
{
if
(
this
->
detector_
!=
nullptr
)
{
delete
this
->
detector_
;
...
...
deploy/cpp_infer/src/paddlestructure.cpp
浏览文件 @
0663b039
...
...
@@ -16,14 +16,19 @@
#include <include/paddlestructure.h>
#include "auto_log/autolog.h"
#include <numeric>
#include <sys/stat.h>
namespace
PaddleOCR
{
PaddleStructure
::
PaddleStructure
()
{
if
(
FLAGS_layout
)
{
this
->
layout_model_
=
new
StructureLayoutRecognizer
(
FLAGS_layout_model_dir
,
FLAGS_use_gpu
,
FLAGS_gpu_id
,
FLAGS_gpu_mem
,
FLAGS_cpu_threads
,
FLAGS_enable_mkldnn
,
FLAGS_layout_dict_path
,
FLAGS_use_tensorrt
,
FLAGS_precision
,
FLAGS_layout_score_threshold
,
FLAGS_layout_nms_threshold
);
}
if
(
FLAGS_table
)
{
this
->
recognizer
_
=
new
StructureTableRecognizer
(
this
->
table_model
_
=
new
StructureTableRecognizer
(
FLAGS_table_model_dir
,
FLAGS_use_gpu
,
FLAGS_gpu_id
,
FLAGS_gpu_mem
,
FLAGS_cpu_threads
,
FLAGS_enable_mkldnn
,
FLAGS_table_char_dict_path
,
FLAGS_use_tensorrt
,
FLAGS_precision
,
FLAGS_table_batch_num
,
...
...
@@ -31,68 +36,63 @@ PaddleStructure::PaddleStructure() {
}
};
std
::
vector
<
std
::
vector
<
StructurePredictResult
>>
PaddleStructure
::
structure
(
std
::
vector
<
cv
::
String
>
cv_all_img_names
,
bool
layout
,
bool
table
)
{
std
::
vector
<
double
>
time_info_det
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_rec
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_cls
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
time_info_table
=
{
0
,
0
,
0
};
std
::
vector
<
StructurePredictResult
>
PaddleStructure
::
structure
(
cv
::
Mat
srcimg
,
bool
layout
,
bool
table
,
bool
ocr
)
{
cv
::
Mat
img
;
srcimg
.
copyTo
(
img
);
std
::
vector
<
std
::
vector
<
StructurePredictResult
>
>
structure_results
;
std
::
vector
<
StructurePredictResult
>
structure_results
;
if
(
!
Utility
::
PathExists
(
FLAGS_output
)
&&
FLAGS_det
)
{
Utility
::
CreateDir
(
FLAGS_output
);
if
(
layout
)
{
this
->
layout
(
img
,
structure_results
);
}
else
{
StructurePredictResult
res
;
res
.
type
=
"table"
;
res
.
box
=
std
::
vector
<
float
>
(
4
,
0.0
);
res
.
box
[
2
]
=
img
.
cols
;
res
.
box
[
3
]
=
img
.
rows
;
structure_results
.
push_back
(
res
);
}
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
std
::
vector
<
StructurePredictResult
>
structure_result
;
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
srcimg
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
endl
;
exit
(
1
);
}
if
(
layout
)
{
}
else
{
StructurePredictResult
res
;
res
.
type
=
"table"
;
res
.
box
=
std
::
vector
<
int
>
(
4
,
0
);
res
.
box
[
2
]
=
srcimg
.
cols
;
res
.
box
[
3
]
=
srcimg
.
rows
;
structure_result
.
push_back
(
res
);
}
cv
::
Mat
roi_img
;
for
(
int
i
=
0
;
i
<
structure_result
.
size
();
i
++
)
{
// crop image
roi_img
=
Utility
::
crop_image
(
srcimg
,
structure_result
[
i
].
box
);
if
(
structure_result
[
i
].
type
==
"table"
)
{
this
->
table
(
roi_img
,
structure_result
[
i
],
time_info_table
,
time_info_det
,
time_info_rec
,
time_info_cls
);
}
cv
::
Mat
roi_img
;
for
(
int
i
=
0
;
i
<
structure_results
.
size
();
i
++
)
{
// crop image
roi_img
=
Utility
::
crop_image
(
img
,
structure_results
[
i
].
box
);
if
(
structure_results
[
i
].
type
==
"table"
&&
table
)
{
this
->
table
(
roi_img
,
structure_results
[
i
]);
}
else
if
(
ocr
)
{
structure_results
[
i
].
text_res
=
this
->
ocr
(
roi_img
,
true
,
true
,
false
);
}
structure_results
.
push_back
(
structure_result
);
}
return
structure_results
;
};
void
PaddleStructure
::
layout
(
cv
::
Mat
img
,
std
::
vector
<
StructurePredictResult
>
&
structure_result
)
{
std
::
vector
<
double
>
layout_times
;
this
->
layout_model_
->
Run
(
img
,
structure_result
,
layout_times
);
this
->
time_info_layout
[
0
]
+=
layout_times
[
0
];
this
->
time_info_layout
[
1
]
+=
layout_times
[
1
];
this
->
time_info_layout
[
2
]
+=
layout_times
[
2
];
}
void
PaddleStructure
::
table
(
cv
::
Mat
img
,
StructurePredictResult
&
structure_result
,
std
::
vector
<
double
>
&
time_info_table
,
std
::
vector
<
double
>
&
time_info_det
,
std
::
vector
<
double
>
&
time_info_rec
,
std
::
vector
<
double
>
&
time_info_cls
)
{
StructurePredictResult
&
structure_result
)
{
// predict structure
std
::
vector
<
std
::
vector
<
std
::
string
>>
structure_html_tags
;
std
::
vector
<
float
>
structure_scores
(
1
,
0
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
structure_boxes
;
std
::
vector
<
double
>
structure_imes
;
std
::
vector
<
double
>
structure_
t
imes
;
std
::
vector
<
cv
::
Mat
>
img_list
;
img_list
.
push_back
(
img
);
this
->
recognizer_
->
Run
(
img_list
,
structure_html_tags
,
structure_scores
,
structure_boxes
,
structure_imes
);
time_info_table
[
0
]
+=
structure_imes
[
0
];
time_info_table
[
1
]
+=
structure_imes
[
1
];
time_info_table
[
2
]
+=
structure_imes
[
2
];
this
->
table_model_
->
Run
(
img_list
,
structure_html_tags
,
structure_scores
,
structure_boxes
,
structure_times
);
this
->
time_info_table
[
0
]
+=
structure_times
[
0
];
this
->
time_info_table
[
1
]
+=
structure_times
[
1
];
this
->
time_info_table
[
2
]
+=
structure_times
[
2
];
std
::
vector
<
OCRPredictResult
>
ocr_result
;
std
::
string
html
;
...
...
@@ -100,22 +100,22 @@ void PaddleStructure::table(cv::Mat img,
for
(
int
i
=
0
;
i
<
img_list
.
size
();
i
++
)
{
// det
this
->
det
(
img_list
[
i
],
ocr_result
,
time_info_det
);
this
->
det
(
img_list
[
i
],
ocr_result
);
// crop image
std
::
vector
<
cv
::
Mat
>
rec_img_list
;
std
::
vector
<
int
>
ocr_box
;
for
(
int
j
=
0
;
j
<
ocr_result
.
size
();
j
++
)
{
ocr_box
=
Utility
::
xyxyxyxy2xyxy
(
ocr_result
[
j
].
box
);
ocr_box
[
0
]
=
max
(
0
,
ocr_box
[
0
]
-
expand_pixel
);
ocr_box
[
1
]
=
max
(
0
,
ocr_box
[
1
]
-
expand_pixel
),
ocr_box
[
2
]
=
min
(
img_list
[
i
].
cols
,
ocr_box
[
2
]
+
expand_pixel
);
ocr_box
[
3
]
=
min
(
img_list
[
i
].
rows
,
ocr_box
[
3
]
+
expand_pixel
);
ocr_box
[
0
]
=
std
::
max
(
0
,
ocr_box
[
0
]
-
expand_pixel
);
ocr_box
[
1
]
=
std
::
max
(
0
,
ocr_box
[
1
]
-
expand_pixel
),
ocr_box
[
2
]
=
std
::
min
(
img_list
[
i
].
cols
,
ocr_box
[
2
]
+
expand_pixel
);
ocr_box
[
3
]
=
std
::
min
(
img_list
[
i
].
rows
,
ocr_box
[
3
]
+
expand_pixel
);
cv
::
Mat
crop_img
=
Utility
::
crop_image
(
img_list
[
i
],
ocr_box
);
rec_img_list
.
push_back
(
crop_img
);
}
// rec
this
->
rec
(
rec_img_list
,
ocr_result
,
time_info_rec
);
this
->
rec
(
rec_img_list
,
ocr_result
);
// rebuild table
html
=
this
->
rebuild_table
(
structure_html_tags
[
i
],
structure_boxes
[
i
],
ocr_result
);
...
...
@@ -130,8 +130,8 @@ PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
std
::
vector
<
std
::
vector
<
int
>>
structure_boxes
,
std
::
vector
<
OCRPredictResult
>
&
ocr_result
)
{
// match text in same cell
std
::
vector
<
std
::
vector
<
string
>>
matched
(
structure_boxes
.
size
(),
std
::
vector
<
std
::
string
>
());
std
::
vector
<
std
::
vector
<
st
d
::
st
ring
>>
matched
(
structure_boxes
.
size
(),
std
::
vector
<
std
::
string
>
());
std
::
vector
<
int
>
ocr_box
;
std
::
vector
<
int
>
structure_box
;
...
...
@@ -150,7 +150,7 @@ PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
structure_box
=
structure_boxes
[
j
];
}
dis_list
[
j
][
0
]
=
this
->
dis
(
ocr_box
,
structure_box
);
dis_list
[
j
][
1
]
=
1
-
this
->
iou
(
ocr_box
,
structure_box
);
dis_list
[
j
][
1
]
=
1
-
Utility
::
iou
(
ocr_box
,
structure_box
);
dis_list
[
j
][
2
]
=
j
;
}
// find min dis idx
...
...
@@ -216,28 +216,6 @@ PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
return
html_str
;
}
float
PaddleStructure
::
iou
(
std
::
vector
<
int
>
&
box1
,
std
::
vector
<
int
>
&
box2
)
{
int
area1
=
max
(
0
,
box1
[
2
]
-
box1
[
0
])
*
max
(
0
,
box1
[
3
]
-
box1
[
1
]);
int
area2
=
max
(
0
,
box2
[
2
]
-
box2
[
0
])
*
max
(
0
,
box2
[
3
]
-
box2
[
1
]);
// computing the sum_area
int
sum_area
=
area1
+
area2
;
// find the each point of intersect rectangle
int
x1
=
max
(
box1
[
0
],
box2
[
0
]);
int
y1
=
max
(
box1
[
1
],
box2
[
1
]);
int
x2
=
min
(
box1
[
2
],
box2
[
2
]);
int
y2
=
min
(
box1
[
3
],
box2
[
3
]);
// judge if there is an intersect
if
(
y1
>=
y2
||
x1
>=
x2
)
{
return
0.0
;
}
else
{
int
intersect
=
(
x2
-
x1
)
*
(
y2
-
y1
);
return
intersect
/
(
sum_area
-
intersect
+
0.00000001
);
}
}
float
PaddleStructure
::
dis
(
std
::
vector
<
int
>
&
box1
,
std
::
vector
<
int
>
&
box2
)
{
int
x1_1
=
box1
[
0
];
int
y1_1
=
box1
[
1
];
...
...
@@ -253,12 +231,64 @@ float PaddleStructure::dis(std::vector<int> &box1, std::vector<int> &box2) {
abs
(
x1_2
-
x1_1
)
+
abs
(
y1_2
-
y1_1
)
+
abs
(
x2_2
-
x2_1
)
+
abs
(
y2_2
-
y2_1
);
float
dis_2
=
abs
(
x1_2
-
x1_1
)
+
abs
(
y1_2
-
y1_1
);
float
dis_3
=
abs
(
x2_2
-
x2_1
)
+
abs
(
y2_2
-
y2_1
);
return
dis
+
min
(
dis_2
,
dis_3
);
return
dis
+
std
::
min
(
dis_2
,
dis_3
);
}
void
PaddleStructure
::
reset_timer
()
{
this
->
time_info_det
=
{
0
,
0
,
0
};
this
->
time_info_rec
=
{
0
,
0
,
0
};
this
->
time_info_cls
=
{
0
,
0
,
0
};
this
->
time_info_table
=
{
0
,
0
,
0
};
this
->
time_info_layout
=
{
0
,
0
,
0
};
}
void
PaddleStructure
::
benchmark_log
(
int
img_num
)
{
if
(
this
->
time_info_det
[
0
]
+
this
->
time_info_det
[
1
]
+
this
->
time_info_det
[
2
]
>
0
)
{
AutoLogger
autolog_det
(
"ocr_det"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
1
,
"dynamic"
,
FLAGS_precision
,
this
->
time_info_det
,
img_num
);
autolog_det
.
report
();
}
if
(
this
->
time_info_rec
[
0
]
+
this
->
time_info_rec
[
1
]
+
this
->
time_info_rec
[
2
]
>
0
)
{
AutoLogger
autolog_rec
(
"ocr_rec"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
FLAGS_rec_batch_num
,
"dynamic"
,
FLAGS_precision
,
this
->
time_info_rec
,
img_num
);
autolog_rec
.
report
();
}
if
(
this
->
time_info_cls
[
0
]
+
this
->
time_info_cls
[
1
]
+
this
->
time_info_cls
[
2
]
>
0
)
{
AutoLogger
autolog_cls
(
"ocr_cls"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
FLAGS_cls_batch_num
,
"dynamic"
,
FLAGS_precision
,
this
->
time_info_cls
,
img_num
);
autolog_cls
.
report
();
}
if
(
this
->
time_info_table
[
0
]
+
this
->
time_info_table
[
1
]
+
this
->
time_info_table
[
2
]
>
0
)
{
AutoLogger
autolog_table
(
"table"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
FLAGS_cls_batch_num
,
"dynamic"
,
FLAGS_precision
,
this
->
time_info_table
,
img_num
);
autolog_table
.
report
();
}
if
(
this
->
time_info_layout
[
0
]
+
this
->
time_info_layout
[
1
]
+
this
->
time_info_layout
[
2
]
>
0
)
{
AutoLogger
autolog_layout
(
"layout"
,
FLAGS_use_gpu
,
FLAGS_use_tensorrt
,
FLAGS_enable_mkldnn
,
FLAGS_cpu_threads
,
FLAGS_cls_batch_num
,
"dynamic"
,
FLAGS_precision
,
this
->
time_info_layout
,
img_num
);
autolog_layout
.
report
();
}
}
PaddleStructure
::~
PaddleStructure
()
{
if
(
this
->
recognizer
_
!=
nullptr
)
{
delete
this
->
recognizer
_
;
if
(
this
->
table_model
_
!=
nullptr
)
{
delete
this
->
table_model
_
;
}
};
...
...
deploy/cpp_infer/src/postprocess_op.cpp
浏览文件 @
0663b039
...
...
@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/clipper.h>
#include <include/postprocess_op.h>
namespace
PaddleOCR
{
...
...
@@ -431,7 +430,7 @@ void TablePostProcessor::Run(
}
}
score
/=
count
;
if
(
isnan
(
score
)
||
rec_boxes
.
size
()
==
0
)
{
if
(
std
::
isnan
(
score
)
||
rec_boxes
.
size
()
==
0
)
{
score
=
-
1
;
}
rec_scores
.
push_back
(
score
);
...
...
@@ -440,4 +439,137 @@ void TablePostProcessor::Run(
}
}
void
PicodetPostProcessor
::
init
(
std
::
string
label_path
,
const
double
score_threshold
,
const
double
nms_threshold
,
const
std
::
vector
<
int
>
&
fpn_stride
)
{
this
->
label_list_
=
Utility
::
ReadDict
(
label_path
);
this
->
score_threshold_
=
score_threshold
;
this
->
nms_threshold_
=
nms_threshold
;
this
->
num_class_
=
label_list_
.
size
();
this
->
fpn_stride_
=
fpn_stride
;
}
void
PicodetPostProcessor
::
Run
(
std
::
vector
<
StructurePredictResult
>
&
results
,
std
::
vector
<
std
::
vector
<
float
>>
outs
,
std
::
vector
<
int
>
ori_shape
,
std
::
vector
<
int
>
resize_shape
,
int
reg_max
)
{
int
in_h
=
resize_shape
[
0
];
int
in_w
=
resize_shape
[
1
];
float
scale_factor_h
=
resize_shape
[
0
]
/
float
(
ori_shape
[
0
]);
float
scale_factor_w
=
resize_shape
[
1
]
/
float
(
ori_shape
[
1
]);
std
::
vector
<
std
::
vector
<
StructurePredictResult
>>
bbox_results
;
bbox_results
.
resize
(
this
->
num_class_
);
for
(
int
i
=
0
;
i
<
this
->
fpn_stride_
.
size
();
++
i
)
{
int
feature_h
=
std
::
ceil
((
float
)
in_h
/
this
->
fpn_stride_
[
i
]);
int
feature_w
=
std
::
ceil
((
float
)
in_w
/
this
->
fpn_stride_
[
i
]);
for
(
int
idx
=
0
;
idx
<
feature_h
*
feature_w
;
idx
++
)
{
// score and label
float
score
=
0
;
int
cur_label
=
0
;
for
(
int
label
=
0
;
label
<
this
->
num_class_
;
label
++
)
{
if
(
outs
[
i
][
idx
*
this
->
num_class_
+
label
]
>
score
)
{
score
=
outs
[
i
][
idx
*
this
->
num_class_
+
label
];
cur_label
=
label
;
}
}
// bbox
if
(
score
>
this
->
score_threshold_
)
{
int
row
=
idx
/
feature_w
;
int
col
=
idx
%
feature_w
;
std
::
vector
<
float
>
bbox_pred
(
outs
[
i
+
this
->
fpn_stride_
.
size
()].
begin
()
+
idx
*
4
*
reg_max
,
outs
[
i
+
this
->
fpn_stride_
.
size
()].
begin
()
+
(
idx
+
1
)
*
4
*
reg_max
);
bbox_results
[
cur_label
].
push_back
(
this
->
disPred2Bbox
(
bbox_pred
,
cur_label
,
score
,
col
,
row
,
this
->
fpn_stride_
[
i
],
resize_shape
,
reg_max
));
}
}
}
for
(
int
i
=
0
;
i
<
bbox_results
.
size
();
i
++
)
{
bool
flag
=
bbox_results
[
i
].
size
()
<=
0
;
}
for
(
int
i
=
0
;
i
<
bbox_results
.
size
();
i
++
)
{
bool
flag
=
bbox_results
[
i
].
size
()
<=
0
;
if
(
bbox_results
[
i
].
size
()
<=
0
)
{
continue
;
}
this
->
nms
(
bbox_results
[
i
],
this
->
nms_threshold_
);
for
(
auto
box
:
bbox_results
[
i
])
{
box
.
box
[
0
]
=
box
.
box
[
0
]
/
scale_factor_w
;
box
.
box
[
2
]
=
box
.
box
[
2
]
/
scale_factor_w
;
box
.
box
[
1
]
=
box
.
box
[
1
]
/
scale_factor_h
;
box
.
box
[
3
]
=
box
.
box
[
3
]
/
scale_factor_h
;
results
.
push_back
(
box
);
}
}
}
StructurePredictResult
PicodetPostProcessor
::
disPred2Bbox
(
std
::
vector
<
float
>
bbox_pred
,
int
label
,
float
score
,
int
x
,
int
y
,
int
stride
,
std
::
vector
<
int
>
im_shape
,
int
reg_max
)
{
float
ct_x
=
(
x
+
0.5
)
*
stride
;
float
ct_y
=
(
y
+
0.5
)
*
stride
;
std
::
vector
<
float
>
dis_pred
;
dis_pred
.
resize
(
4
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
float
dis
=
0
;
std
::
vector
<
float
>
bbox_pred_i
(
bbox_pred
.
begin
()
+
i
*
reg_max
,
bbox_pred
.
begin
()
+
(
i
+
1
)
*
reg_max
);
std
::
vector
<
float
>
dis_after_sm
=
Utility
::
activation_function_softmax
(
bbox_pred_i
);
for
(
int
j
=
0
;
j
<
reg_max
;
j
++
)
{
dis
+=
j
*
dis_after_sm
[
j
];
}
dis
*=
stride
;
dis_pred
[
i
]
=
dis
;
}
float
xmin
=
(
std
::
max
)(
ct_x
-
dis_pred
[
0
],
.0
f
);
float
ymin
=
(
std
::
max
)(
ct_y
-
dis_pred
[
1
],
.0
f
);
float
xmax
=
(
std
::
min
)(
ct_x
+
dis_pred
[
2
],
(
float
)
im_shape
[
1
]);
float
ymax
=
(
std
::
min
)(
ct_y
+
dis_pred
[
3
],
(
float
)
im_shape
[
0
]);
StructurePredictResult
result_item
;
result_item
.
box
=
{
xmin
,
ymin
,
xmax
,
ymax
};
result_item
.
type
=
this
->
label_list_
[
label
];
result_item
.
confidence
=
score
;
return
result_item
;
}
void
PicodetPostProcessor
::
nms
(
std
::
vector
<
StructurePredictResult
>
&
input_boxes
,
float
nms_threshold
)
{
std
::
sort
(
input_boxes
.
begin
(),
input_boxes
.
end
(),
[](
StructurePredictResult
a
,
StructurePredictResult
b
)
{
return
a
.
confidence
>
b
.
confidence
;
});
std
::
vector
<
int
>
picked
(
input_boxes
.
size
(),
1
);
for
(
int
i
=
0
;
i
<
input_boxes
.
size
();
++
i
)
{
if
(
picked
[
i
]
==
0
)
{
continue
;
}
for
(
int
j
=
i
+
1
;
j
<
input_boxes
.
size
();
++
j
)
{
if
(
picked
[
j
]
==
0
)
{
continue
;
}
float
iou
=
Utility
::
iou
(
input_boxes
[
i
].
box
,
input_boxes
[
j
].
box
);
if
(
iou
>
nms_threshold
)
{
picked
[
j
]
=
0
;
}
}
}
std
::
vector
<
StructurePredictResult
>
input_boxes_nms
;
for
(
int
i
=
0
;
i
<
input_boxes
.
size
();
++
i
)
{
if
(
picked
[
i
]
==
1
)
{
input_boxes_nms
.
push_back
(
input_boxes
[
i
]);
}
}
input_boxes
=
input_boxes_nms
;
}
}
// namespace PaddleOCR
deploy/cpp_infer/src/preprocess_op.cpp
浏览文件 @
0663b039
...
...
@@ -12,21 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h>
namespace
PaddleOCR
{
...
...
@@ -69,13 +54,13 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
}
void
ResizeImgType0
::
Run
(
const
cv
::
Mat
&
img
,
cv
::
Mat
&
resize_img
,
st
ring
limit_type
,
int
limit_side_len
,
float
&
ratio_h
,
float
&
ratio_w
,
bool
use_tensorrt
)
{
st
d
::
string
limit_type
,
int
limit_side_len
,
float
&
ratio_
h
,
float
&
ratio_
w
,
bool
use_tensorrt
)
{
int
w
=
img
.
cols
;
int
h
=
img
.
rows
;
float
ratio
=
1.
f
;
if
(
limit_type
==
"min"
)
{
int
min_wh
=
min
(
h
,
w
);
int
min_wh
=
std
::
min
(
h
,
w
);
if
(
min_wh
<
limit_side_len
)
{
if
(
h
<
w
)
{
ratio
=
float
(
limit_side_len
)
/
float
(
h
);
...
...
@@ -84,7 +69,7 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
}
}
}
else
{
int
max_wh
=
max
(
h
,
w
);
int
max_wh
=
std
::
max
(
h
,
w
);
if
(
max_wh
>
limit_side_len
)
{
if
(
h
>
w
)
{
ratio
=
float
(
limit_side_len
)
/
float
(
h
);
...
...
@@ -97,8 +82,8 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
int
resize_h
=
int
(
float
(
h
)
*
ratio
);
int
resize_w
=
int
(
float
(
w
)
*
ratio
);
resize_h
=
max
(
int
(
round
(
float
(
resize_h
)
/
32
)
*
32
),
32
);
resize_w
=
max
(
int
(
round
(
float
(
resize_w
)
/
32
)
*
32
),
32
);
resize_h
=
std
::
max
(
int
(
round
(
float
(
resize_h
)
/
32
)
*
32
),
32
);
resize_w
=
std
::
max
(
int
(
round
(
float
(
resize_w
)
/
32
)
*
32
),
32
);
cv
::
resize
(
img
,
resize_img
,
cv
::
Size
(
resize_w
,
resize_h
));
ratio_h
=
float
(
resize_h
)
/
float
(
h
);
...
...
@@ -175,4 +160,9 @@ void TablePadImg::Run(const cv::Mat &img, cv::Mat &resize_img,
cv
::
BORDER_CONSTANT
,
cv
::
Scalar
(
0
,
0
,
0
));
}
void
Resize
::
Run
(
const
cv
::
Mat
&
img
,
cv
::
Mat
&
resize_img
,
const
int
h
,
const
int
w
)
{
cv
::
resize
(
img
,
resize_img
,
cv
::
Size
(
w
,
h
));
}
}
// namespace PaddleOCR
deploy/cpp_infer/src/structure_layout.cpp
0 → 100644
浏览文件 @
0663b039
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/structure_layout.h>
namespace
PaddleOCR
{
void
StructureLayoutRecognizer
::
Run
(
cv
::
Mat
img
,
std
::
vector
<
StructurePredictResult
>
&
result
,
std
::
vector
<
double
>
&
times
)
{
std
::
chrono
::
duration
<
float
>
preprocess_diff
=
std
::
chrono
::
steady_clock
::
now
()
-
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
float
>
inference_diff
=
std
::
chrono
::
steady_clock
::
now
()
-
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
float
>
postprocess_diff
=
std
::
chrono
::
steady_clock
::
now
()
-
std
::
chrono
::
steady_clock
::
now
();
// preprocess
auto
preprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
cv
::
Mat
srcimg
;
img
.
copyTo
(
srcimg
);
cv
::
Mat
resize_img
;
this
->
resize_op_
.
Run
(
srcimg
,
resize_img
,
800
,
608
);
this
->
normalize_op_
.
Run
(
&
resize_img
,
this
->
mean_
,
this
->
scale_
,
this
->
is_scale_
);
std
::
vector
<
float
>
input
(
1
*
3
*
resize_img
.
rows
*
resize_img
.
cols
,
0.0
f
);
this
->
permute_op_
.
Run
(
&
resize_img
,
input
.
data
());
auto
preprocess_end
=
std
::
chrono
::
steady_clock
::
now
();
preprocess_diff
+=
preprocess_end
-
preprocess_start
;
// inference.
auto
input_names
=
this
->
predictor_
->
GetInputNames
();
auto
input_t
=
this
->
predictor_
->
GetInputHandle
(
input_names
[
0
]);
input_t
->
Reshape
({
1
,
3
,
resize_img
.
rows
,
resize_img
.
cols
});
auto
inference_start
=
std
::
chrono
::
steady_clock
::
now
();
input_t
->
CopyFromCpu
(
input
.
data
());
this
->
predictor_
->
Run
();
// Get output tensor
std
::
vector
<
std
::
vector
<
float
>>
out_tensor_list
;
std
::
vector
<
std
::
vector
<
int
>>
output_shape_list
;
auto
output_names
=
this
->
predictor_
->
GetOutputNames
();
for
(
int
j
=
0
;
j
<
output_names
.
size
();
j
++
)
{
auto
output_tensor
=
this
->
predictor_
->
GetOutputHandle
(
output_names
[
j
]);
std
::
vector
<
int
>
output_shape
=
output_tensor
->
shape
();
int
out_num
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
output_shape_list
.
push_back
(
output_shape
);
std
::
vector
<
float
>
out_data
;
out_data
.
resize
(
out_num
);
output_tensor
->
CopyToCpu
(
out_data
.
data
());
out_tensor_list
.
push_back
(
out_data
);
}
auto
inference_end
=
std
::
chrono
::
steady_clock
::
now
();
inference_diff
+=
inference_end
-
inference_start
;
// postprocess
auto
postprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
std
::
vector
<
int
>
bbox_num
;
int
reg_max
=
0
;
for
(
int
i
=
0
;
i
<
out_tensor_list
.
size
();
i
++
)
{
if
(
i
==
this
->
post_processor_
.
fpn_stride_
.
size
())
{
reg_max
=
output_shape_list
[
i
][
2
]
/
4
;
break
;
}
}
std
::
vector
<
int
>
ori_shape
=
{
srcimg
.
rows
,
srcimg
.
cols
};
std
::
vector
<
int
>
resize_shape
=
{
resize_img
.
rows
,
resize_img
.
cols
};
this
->
post_processor_
.
Run
(
result
,
out_tensor_list
,
ori_shape
,
resize_shape
,
reg_max
);
bbox_num
.
push_back
(
result
.
size
());
auto
postprocess_end
=
std
::
chrono
::
steady_clock
::
now
();
postprocess_diff
+=
postprocess_end
-
postprocess_start
;
times
.
push_back
(
double
(
preprocess_diff
.
count
()
*
1000
));
times
.
push_back
(
double
(
inference_diff
.
count
()
*
1000
));
times
.
push_back
(
double
(
postprocess_diff
.
count
()
*
1000
));
}
void
StructureLayoutRecognizer
::
LoadModel
(
const
std
::
string
&
model_dir
)
{
paddle_infer
::
Config
config
;
if
(
Utility
::
PathExists
(
model_dir
+
"/inference.pdmodel"
)
&&
Utility
::
PathExists
(
model_dir
+
"/inference.pdiparams"
))
{
config
.
SetModel
(
model_dir
+
"/inference.pdmodel"
,
model_dir
+
"/inference.pdiparams"
);
}
else
if
(
Utility
::
PathExists
(
model_dir
+
"/model.pdmodel"
)
&&
Utility
::
PathExists
(
model_dir
+
"/model.pdiparams"
))
{
config
.
SetModel
(
model_dir
+
"/model.pdmodel"
,
model_dir
+
"/model.pdiparams"
);
}
else
{
std
::
cerr
<<
"[ERROR] not find model.pdiparams or inference.pdiparams in "
<<
model_dir
<<
std
::
endl
;
exit
(
1
);
}
if
(
this
->
use_gpu_
)
{
config
.
EnableUseGpu
(
this
->
gpu_mem_
,
this
->
gpu_id_
);
if
(
this
->
use_tensorrt_
)
{
auto
precision
=
paddle_infer
::
Config
::
Precision
::
kFloat32
;
if
(
this
->
precision_
==
"fp16"
)
{
precision
=
paddle_infer
::
Config
::
Precision
::
kHalf
;
}
if
(
this
->
precision_
==
"int8"
)
{
precision
=
paddle_infer
::
Config
::
Precision
::
kInt8
;
}
config
.
EnableTensorRtEngine
(
1
<<
20
,
10
,
3
,
precision
,
false
,
false
);
if
(
!
Utility
::
PathExists
(
"./trt_layout_shape.txt"
))
{
config
.
CollectShapeRangeInfo
(
"./trt_layout_shape.txt"
);
}
else
{
config
.
EnableTunedTensorRtDynamicShape
(
"./trt_layout_shape.txt"
,
true
);
}
}
}
else
{
config
.
DisableGpu
();
if
(
this
->
use_mkldnn_
)
{
config
.
EnableMKLDNN
();
}
config
.
SetCpuMathLibraryNumThreads
(
this
->
cpu_math_library_num_threads_
);
}
// false for zero copy tensor
config
.
SwitchUseFeedFetchOps
(
false
);
// true for multiple input
config
.
SwitchSpecifyInputNames
(
true
);
config
.
SwitchIrOptim
(
true
);
config
.
EnableMemoryOptim
();
config
.
DisableGlogInfo
();
this
->
predictor_
=
paddle_infer
::
CreatePredictor
(
config
);
}
}
// namespace PaddleOCR
deploy/cpp_infer/src/structure_table.cpp
浏览文件 @
0663b039
...
...
@@ -34,7 +34,7 @@ void StructureTableRecognizer::Run(
beg_img_no
+=
this
->
table_batch_num_
)
{
// preprocess
auto
preprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
int
end_img_no
=
min
(
img_num
,
beg_img_no
+
this
->
table_batch_num_
);
int
end_img_no
=
std
::
min
(
img_num
,
beg_img_no
+
this
->
table_batch_num_
);
int
batch_num
=
end_img_no
-
beg_img_no
;
std
::
vector
<
cv
::
Mat
>
norm_img_batch
;
std
::
vector
<
int
>
width_list
;
...
...
@@ -118,7 +118,7 @@ void StructureTableRecognizer::Run(
}
void
StructureTableRecognizer
::
LoadModel
(
const
std
::
string
&
model_dir
)
{
Analysis
Config
config
;
paddle_infer
::
Config
config
;
config
.
SetModel
(
model_dir
+
"/inference.pdmodel"
,
model_dir
+
"/inference.pdiparams"
);
...
...
@@ -133,6 +133,11 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
precision
=
paddle_infer
::
Config
::
Precision
::
kInt8
;
}
config
.
EnableTensorRtEngine
(
1
<<
20
,
10
,
3
,
precision
,
false
,
false
);
if
(
!
Utility
::
PathExists
(
"./trt_table_shape.txt"
))
{
config
.
CollectShapeRangeInfo
(
"./trt_table_shape.txt"
);
}
else
{
config
.
EnableTunedTensorRtDynamicShape
(
"./trt_table_shape.txt"
,
true
);
}
}
}
else
{
config
.
DisableGpu
();
...
...
@@ -152,6 +157,6 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
config
.
EnableMemoryOptim
();
config
.
DisableGlogInfo
();
this
->
predictor_
=
CreatePredictor
(
config
);
this
->
predictor_
=
paddle_infer
::
CreatePredictor
(
config
);
}
}
// namespace PaddleOCR
deploy/cpp_infer/src/utility.cpp
浏览文件 @
0663b039
...
...
@@ -70,6 +70,7 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg,
const
std
::
string
&
save_path
)
{
cv
::
Mat
img_vis
;
srcimg
.
copyTo
(
img_vis
);
img_vis
=
crop_image
(
img_vis
,
structure_result
.
box
);
for
(
int
n
=
0
;
n
<
structure_result
.
cell_box
.
size
();
n
++
)
{
if
(
structure_result
.
cell_box
[
n
].
size
()
==
8
)
{
cv
::
Point
rook_points
[
4
];
...
...
@@ -280,23 +281,29 @@ void Utility::print_result(const std::vector<OCRPredictResult> &ocr_result) {
}
}
cv
::
Mat
Utility
::
crop_image
(
cv
::
Mat
&
img
,
std
::
vector
<
int
>
&
area
)
{
cv
::
Mat
Utility
::
crop_image
(
cv
::
Mat
&
img
,
const
std
::
vector
<
int
>
&
box
)
{
cv
::
Mat
crop_im
;
int
crop_x1
=
std
::
max
(
0
,
area
[
0
]);
int
crop_y1
=
std
::
max
(
0
,
area
[
1
]);
int
crop_x2
=
std
::
min
(
img
.
cols
-
1
,
area
[
2
]
-
1
);
int
crop_y2
=
std
::
min
(
img
.
rows
-
1
,
area
[
3
]
-
1
);
int
crop_x1
=
std
::
max
(
0
,
box
[
0
]);
int
crop_y1
=
std
::
max
(
0
,
box
[
1
]);
int
crop_x2
=
std
::
min
(
img
.
cols
-
1
,
box
[
2
]
-
1
);
int
crop_y2
=
std
::
min
(
img
.
rows
-
1
,
box
[
3
]
-
1
);
crop_im
=
cv
::
Mat
::
zeros
(
area
[
3
]
-
area
[
1
],
area
[
2
]
-
area
[
0
],
16
);
crop_im
=
cv
::
Mat
::
zeros
(
box
[
3
]
-
box
[
1
],
box
[
2
]
-
box
[
0
],
16
);
cv
::
Mat
crop_im_window
=
crop_im
(
cv
::
Range
(
crop_y1
-
area
[
1
],
crop_y2
+
1
-
area
[
1
]),
cv
::
Range
(
crop_x1
-
area
[
0
],
crop_x2
+
1
-
area
[
0
]));
crop_im
(
cv
::
Range
(
crop_y1
-
box
[
1
],
crop_y2
+
1
-
box
[
1
]),
cv
::
Range
(
crop_x1
-
box
[
0
],
crop_x2
+
1
-
box
[
0
]));
cv
::
Mat
roi_img
=
img
(
cv
::
Range
(
crop_y1
,
crop_y2
+
1
),
cv
::
Range
(
crop_x1
,
crop_x2
+
1
));
crop_im_window
+=
roi_img
;
return
crop_im
;
}
cv
::
Mat
Utility
::
crop_image
(
cv
::
Mat
&
img
,
const
std
::
vector
<
float
>
&
box
)
{
std
::
vector
<
int
>
box_int
=
{(
int
)
box
[
0
],
(
int
)
box
[
1
],
(
int
)
box
[
2
],
(
int
)
box
[
3
]};
return
crop_image
(
img
,
box_int
);
}
void
Utility
::
sorted_boxes
(
std
::
vector
<
OCRPredictResult
>
&
ocr_result
)
{
std
::
sort
(
ocr_result
.
begin
(),
ocr_result
.
end
(),
Utility
::
comparison_box
);
if
(
ocr_result
.
size
()
>
0
)
{
...
...
@@ -341,4 +348,78 @@ std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<int> &box) {
return
box1
;
}
float
Utility
::
fast_exp
(
float
x
)
{
union
{
uint32_t
i
;
float
f
;
}
v
{};
v
.
i
=
(
1
<<
23
)
*
(
1.4426950409
*
x
+
126.93490512
f
);
return
v
.
f
;
}
std
::
vector
<
float
>
Utility
::
activation_function_softmax
(
std
::
vector
<
float
>
&
src
)
{
int
length
=
src
.
size
();
std
::
vector
<
float
>
dst
;
dst
.
resize
(
length
);
const
float
alpha
=
float
(
*
std
::
max_element
(
&
src
[
0
],
&
src
[
0
+
length
]));
float
denominator
{
0
};
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
dst
[
i
]
=
fast_exp
(
src
[
i
]
-
alpha
);
denominator
+=
dst
[
i
];
}
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
dst
[
i
]
/=
denominator
;
}
return
dst
;
}
float
Utility
::
iou
(
std
::
vector
<
int
>
&
box1
,
std
::
vector
<
int
>
&
box2
)
{
int
area1
=
std
::
max
(
0
,
box1
[
2
]
-
box1
[
0
])
*
std
::
max
(
0
,
box1
[
3
]
-
box1
[
1
]);
int
area2
=
std
::
max
(
0
,
box2
[
2
]
-
box2
[
0
])
*
std
::
max
(
0
,
box2
[
3
]
-
box2
[
1
]);
// computing the sum_area
int
sum_area
=
area1
+
area2
;
// find the each point of intersect rectangle
int
x1
=
std
::
max
(
box1
[
0
],
box2
[
0
]);
int
y1
=
std
::
max
(
box1
[
1
],
box2
[
1
]);
int
x2
=
std
::
min
(
box1
[
2
],
box2
[
2
]);
int
y2
=
std
::
min
(
box1
[
3
],
box2
[
3
]);
// judge if there is an intersect
if
(
y1
>=
y2
||
x1
>=
x2
)
{
return
0.0
;
}
else
{
int
intersect
=
(
x2
-
x1
)
*
(
y2
-
y1
);
return
intersect
/
(
sum_area
-
intersect
+
0.00000001
);
}
}
float
Utility
::
iou
(
std
::
vector
<
float
>
&
box1
,
std
::
vector
<
float
>
&
box2
)
{
float
area1
=
std
::
max
((
float
)
0.0
,
box1
[
2
]
-
box1
[
0
])
*
std
::
max
((
float
)
0.0
,
box1
[
3
]
-
box1
[
1
]);
float
area2
=
std
::
max
((
float
)
0.0
,
box2
[
2
]
-
box2
[
0
])
*
std
::
max
((
float
)
0.0
,
box2
[
3
]
-
box2
[
1
]);
// computing the sum_area
float
sum_area
=
area1
+
area2
;
// find the each point of intersect rectangle
float
x1
=
std
::
max
(
box1
[
0
],
box2
[
0
]);
float
y1
=
std
::
max
(
box1
[
1
],
box2
[
1
]);
float
x2
=
std
::
min
(
box1
[
2
],
box2
[
2
]);
float
y2
=
std
::
min
(
box1
[
3
],
box2
[
3
]);
// judge if there is an intersect
if
(
y1
>=
y2
||
x1
>=
x2
)
{
return
0.0
;
}
else
{
float
intersect
=
(
x2
-
x1
)
*
(
y2
-
y1
);
return
intersect
/
(
sum_area
-
intersect
+
0.00000001
);
}
}
}
// namespace PaddleOCR
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录