Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
fb9be201
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fb9be201
编写于
8月 09, 2022
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rm layout files
上级
9277f8c4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
0 addition
and
382 deletion
+0
-382
ppstructure/layout/picodet_postprocess.py
ppstructure/layout/picodet_postprocess.py
+0
-227
ppstructure/layout/predict_layout.py
ppstructure/layout/predict_layout.py
+0
-155
未找到文件。
ppstructure/layout/picodet_postprocess.py
已删除
100644 → 0
浏览文件 @
9277f8c4
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
from
scipy.special
import
softmax
def
hard_nms
(
box_scores
,
iou_threshold
,
top_k
=-
1
,
candidate_size
=
200
):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores
=
box_scores
[:,
-
1
]
boxes
=
box_scores
[:,
:
-
1
]
picked
=
[]
indexes
=
np
.
argsort
(
scores
)
indexes
=
indexes
[
-
candidate_size
:]
while
len
(
indexes
)
>
0
:
current
=
indexes
[
-
1
]
picked
.
append
(
current
)
if
0
<
top_k
==
len
(
picked
)
or
len
(
indexes
)
==
1
:
break
current_box
=
boxes
[
current
,
:]
indexes
=
indexes
[:
-
1
]
rest_boxes
=
boxes
[
indexes
,
:]
iou
=
iou_of
(
rest_boxes
,
np
.
expand_dims
(
current_box
,
axis
=
0
),
)
indexes
=
indexes
[
iou
<=
iou_threshold
]
return
box_scores
[
picked
,
:]
def
iou_of
(
boxes0
,
boxes1
,
eps
=
1e-5
):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top
=
np
.
maximum
(
boxes0
[...,
:
2
],
boxes1
[...,
:
2
])
overlap_right_bottom
=
np
.
minimum
(
boxes0
[...,
2
:],
boxes1
[...,
2
:])
overlap_area
=
area_of
(
overlap_left_top
,
overlap_right_bottom
)
area0
=
area_of
(
boxes0
[...,
:
2
],
boxes0
[...,
2
:])
area1
=
area_of
(
boxes1
[...,
:
2
],
boxes1
[...,
2
:])
return
overlap_area
/
(
area0
+
area1
-
overlap_area
+
eps
)
def
area_of
(
left_top
,
right_bottom
):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw
=
np
.
clip
(
right_bottom
-
left_top
,
0.0
,
None
)
return
hw
[...,
0
]
*
hw
[...,
1
]
class
PicoDetPostProcess
(
object
):
"""
Args:
input_shape (int): network input image size
ori_shape (int): ori image shape of before padding
scale_factor (float): scale factor of ori image
enable_mkldnn (bool): whether to open MKLDNN
"""
def
__init__
(
self
,
input_shape
,
ori_shape
,
scale_factor
,
strides
=
[
8
,
16
,
32
,
64
],
score_threshold
=
0.4
,
nms_threshold
=
0.5
,
nms_top_k
=
1000
,
keep_top_k
=
100
):
self
.
ori_shape
=
ori_shape
self
.
input_shape
=
input_shape
self
.
scale_factor
=
scale_factor
self
.
strides
=
strides
self
.
score_threshold
=
score_threshold
self
.
nms_threshold
=
nms_threshold
self
.
nms_top_k
=
nms_top_k
self
.
keep_top_k
=
keep_top_k
def
warp_boxes
(
self
,
boxes
,
ori_shape
):
"""Apply transform to boxes
"""
width
,
height
=
ori_shape
[
1
],
ori_shape
[
0
]
n
=
len
(
boxes
)
if
n
:
# warp points
xy
=
np
.
ones
((
n
*
4
,
3
))
xy
[:,
:
2
]
=
boxes
[:,
[
0
,
1
,
2
,
3
,
0
,
3
,
2
,
1
]].
reshape
(
n
*
4
,
2
)
# x1y1, x2y2, x1y2, x2y1
# xy = xy @ M.T # transform
xy
=
(
xy
[:,
:
2
]
/
xy
[:,
2
:
3
]).
reshape
(
n
,
8
)
# rescale
# create new boxes
x
=
xy
[:,
[
0
,
2
,
4
,
6
]]
y
=
xy
[:,
[
1
,
3
,
5
,
7
]]
xy
=
np
.
concatenate
(
(
x
.
min
(
1
),
y
.
min
(
1
),
x
.
max
(
1
),
y
.
max
(
1
))).
reshape
(
4
,
n
).
T
# clip boxes
xy
[:,
[
0
,
2
]]
=
xy
[:,
[
0
,
2
]].
clip
(
0
,
width
)
xy
[:,
[
1
,
3
]]
=
xy
[:,
[
1
,
3
]].
clip
(
0
,
height
)
return
xy
.
astype
(
np
.
float32
)
else
:
return
boxes
def
__call__
(
self
,
scores
,
raw_boxes
):
batch_size
=
raw_boxes
[
0
].
shape
[
0
]
reg_max
=
int
(
raw_boxes
[
0
].
shape
[
-
1
]
/
4
-
1
)
out_boxes_num
=
[]
out_boxes_list
=
[]
for
batch_id
in
range
(
batch_size
):
# generate centers
decode_boxes
=
[]
select_scores
=
[]
for
stride
,
box_distribute
,
score
in
zip
(
self
.
strides
,
raw_boxes
,
scores
):
box_distribute
=
box_distribute
[
batch_id
]
score
=
score
[
batch_id
]
# centers
fm_h
=
self
.
input_shape
[
0
]
/
stride
fm_w
=
self
.
input_shape
[
1
]
/
stride
h_range
=
np
.
arange
(
fm_h
)
w_range
=
np
.
arange
(
fm_w
)
ww
,
hh
=
np
.
meshgrid
(
w_range
,
h_range
)
ct_row
=
(
hh
.
flatten
()
+
0.5
)
*
stride
ct_col
=
(
ww
.
flatten
()
+
0.5
)
*
stride
center
=
np
.
stack
((
ct_col
,
ct_row
,
ct_col
,
ct_row
),
axis
=
1
)
# box distribution to distance
reg_range
=
np
.
arange
(
reg_max
+
1
)
box_distance
=
box_distribute
.
reshape
((
-
1
,
reg_max
+
1
))
box_distance
=
softmax
(
box_distance
,
axis
=
1
)
box_distance
=
box_distance
*
np
.
expand_dims
(
reg_range
,
axis
=
0
)
box_distance
=
np
.
sum
(
box_distance
,
axis
=
1
).
reshape
((
-
1
,
4
))
box_distance
=
box_distance
*
stride
# top K candidate
topk_idx
=
np
.
argsort
(
score
.
max
(
axis
=
1
))[::
-
1
]
topk_idx
=
topk_idx
[:
self
.
nms_top_k
]
center
=
center
[
topk_idx
]
score
=
score
[
topk_idx
]
box_distance
=
box_distance
[
topk_idx
]
# decode box
decode_box
=
center
+
[
-
1
,
-
1
,
1
,
1
]
*
box_distance
select_scores
.
append
(
score
)
decode_boxes
.
append
(
decode_box
)
# nms
bboxes
=
np
.
concatenate
(
decode_boxes
,
axis
=
0
)
confidences
=
np
.
concatenate
(
select_scores
,
axis
=
0
)
picked_box_probs
=
[]
picked_labels
=
[]
for
class_index
in
range
(
0
,
confidences
.
shape
[
1
]):
probs
=
confidences
[:,
class_index
]
mask
=
probs
>
self
.
score_threshold
probs
=
probs
[
mask
]
if
probs
.
shape
[
0
]
==
0
:
continue
subset_boxes
=
bboxes
[
mask
,
:]
box_probs
=
np
.
concatenate
(
[
subset_boxes
,
probs
.
reshape
(
-
1
,
1
)],
axis
=
1
)
box_probs
=
hard_nms
(
box_probs
,
iou_threshold
=
self
.
nms_threshold
,
top_k
=
self
.
keep_top_k
,
)
picked_box_probs
.
append
(
box_probs
)
picked_labels
.
extend
([
class_index
]
*
box_probs
.
shape
[
0
])
if
len
(
picked_box_probs
)
==
0
:
out_boxes_list
.
append
(
np
.
empty
((
0
,
4
)))
out_boxes_num
.
append
(
0
)
else
:
picked_box_probs
=
np
.
concatenate
(
picked_box_probs
)
# resize output boxes
picked_box_probs
[:,
:
4
]
=
self
.
warp_boxes
(
picked_box_probs
[:,
:
4
],
self
.
ori_shape
[
batch_id
])
im_scale
=
np
.
concatenate
([
self
.
scale_factor
[
batch_id
][::
-
1
],
self
.
scale_factor
[
batch_id
][::
-
1
]
])
picked_box_probs
[:,
:
4
]
/=
im_scale
# clas score box
out_boxes_list
.
append
(
np
.
concatenate
(
[
np
.
expand_dims
(
np
.
array
(
picked_labels
),
axis
=-
1
),
np
.
expand_dims
(
picked_box_probs
[:,
4
],
axis
=-
1
),
picked_box_probs
[:,
:
4
]
],
axis
=
1
))
out_boxes_num
.
append
(
len
(
picked_labels
))
out_boxes_list
=
np
.
concatenate
(
out_boxes_list
,
axis
=
0
)
out_boxes_num
=
np
.
asarray
(
out_boxes_num
).
astype
(
np
.
int32
)
return
out_boxes_list
,
out_boxes_num
ppstructure/layout/predict_layout.py
已删除
100644 → 0
浏览文件 @
9277f8c4
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
insert
(
0
,
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
os
.
environ
[
"FLAGS_allocator_strategy"
]
=
'auto_growth'
import
cv2
import
numpy
as
np
import
time
import
tools.infer.utility
as
utility
from
ppocr.data
import
create_operators
,
transform
from
ppocr.postprocess
import
build_post_process
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.utility
import
get_image_file_list
,
check_and_read_gif
from
ppstructure.utility
import
parse_args
from
picodet_postprocess
import
PicoDetPostProcess
logger
=
get_logger
()
class
LayoutPredictor
(
object
):
def
__init__
(
self
,
args
):
pre_process_list
=
[{
'Resize'
:
{
'size'
:
[
800
,
608
]
}
},
{
'NormalizeImage'
:
{
'std'
:
[
0.229
,
0.224
,
0.225
],
'mean'
:
[
0.485
,
0.456
,
0.406
],
'scale'
:
'1./255.'
,
'order'
:
'hwc'
}
},
{
'ToCHWImage'
:
None
},
{
'KeepKeys'
:
{
'keep_keys'
:
[
'image'
]
}
}]
# postprocess_params = {
# 'name': 'LayoutPostProcess',
# "character_dict_path": args.layout_dict_path,
# }
self
.
preprocess_op
=
create_operators
(
pre_process_list
)
# self.postprocess_op = build_post_process(postprocess_params)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
,
self
.
config
=
\
utility
.
create_predictor
(
args
,
'layout'
,
logger
)
def
__call__
(
self
,
img
):
ori_im
=
img
.
copy
()
data
=
{
'image'
:
img
}
data
=
transform
(
data
,
self
.
preprocess_op
)
img
=
data
[
0
]
if
img
is
None
:
return
None
,
0
img
=
np
.
expand_dims
(
img
,
axis
=
0
)
img
=
img
.
copy
()
preds
,
elapse
=
0
,
1
starttime
=
time
.
time
()
self
.
input_tensor
.
copy_from_cpu
(
img
)
self
.
predictor
.
run
()
# outputs = []
# for output_tensor in self.output_tensors:
# output = output_tensor.copy_to_cpu()
# outputs.append(output)
np_score_list
,
np_boxes_list
=
[],
[]
output_names
=
self
.
predictor
.
get_output_names
()
num_outs
=
int
(
len
(
output_names
)
/
2
)
for
out_idx
in
range
(
num_outs
):
np_score_list
.
append
(
self
.
predictor
.
get_output_handle
(
output_names
[
out_idx
])
.
copy_to_cpu
())
np_boxes_list
.
append
(
self
.
predictor
.
get_output_handle
(
output_names
[
out_idx
+
num_outs
]).
copy_to_cpu
())
# result = dict(boxes=np_score_list, boxes_num=np_boxes_list)
postprocessor
=
PicoDetPostProcess
(
(
800
,
608
),
[[
800.
,
608.
]],
np
.
array
([[
1.010101
,
0.99346405
]]),
strides
=
[
8
,
16
,
32
,
64
],
nms_threshold
=
0.5
)
np_boxes
,
np_boxes_num
=
postprocessor
(
np_score_list
,
np_boxes_list
)
result
=
dict
(
boxes
=
np_boxes
,
boxes_num
=
np_boxes_num
)
# print(result)
im_bboxes_num
=
result
[
'boxes_num'
][
0
]
# print('im_bboxes_num:',im_bboxes_num)
bboxs
=
result
[
'boxes'
][
0
:
0
+
im_bboxes_num
,
:]
threshold
=
0.5
expect_boxes
=
(
np_boxes
[:,
1
]
>
threshold
)
&
(
np_boxes
[:,
0
]
>
-
1
)
np_boxes
=
np_boxes
[
expect_boxes
,
:]
preds
=
[]
id2label
=
{
1
:
'text'
,
2
:
'title'
,
3
:
'list'
,
4
:
'table'
,
5
:
'figure'
}
for
dt
in
np_boxes
:
clsid
,
bbox
,
score
=
int
(
dt
[
0
]),
dt
[
2
:],
dt
[
1
]
label
=
id2label
[
clsid
+
1
]
result_di
=
{
'bbox'
:
bbox
,
'label'
:
label
}
preds
.
append
(
result_di
)
# print('result_di',result_di)
# print('clsid, bbox, score:',clsid, bbox, score)
elapse
=
time
.
time
()
-
starttime
return
preds
,
elapse
def
main
(
args
):
image_file_list
=
get_image_file_list
(
args
.
image_dir
)
layout_predictor
=
LayoutPredictor
(
args
)
count
=
0
total_time
=
0
for
image_file
in
image_file_list
:
img
,
flag
=
check_and_read_gif
(
image_file
)
if
not
flag
:
img
=
cv2
.
imread
(
image_file
)
if
img
is
None
:
logger
.
info
(
"error in loading image:{}"
.
format
(
image_file
))
continue
layout_res
,
elapse
=
layout_predictor
(
img
)
logger
.
info
(
"result: {}"
.
format
(
layout_res
))
if
count
>
0
:
total_time
+=
elapse
count
+=
1
logger
.
info
(
"Predict time of {}: {}"
.
format
(
image_file
,
elapse
))
if
__name__
==
"__main__"
:
main
(
parse_args
())
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录