Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
92f85521
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
92f85521
编写于
8月 09, 2022
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add "<td></td>" to dict when "<td></td>" not in file
上级
fb9be201
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
42 addition
and
9 deletion
+42
-9
configs/table/SLANet.yml
configs/table/SLANet.yml
+3
-2
configs/table/table_master.yml
configs/table/table_master.yml
+4
-3
ppocr/data/imaug/label_ops.py
ppocr/data/imaug/label_ops.py
+6
-0
ppocr/postprocess/table_postprocess.py
ppocr/postprocess/table_postprocess.py
+29
-4
未找到文件。
configs/table/SLANet.yml
浏览文件 @
92f85521
...
...
@@ -58,6 +58,7 @@ Loss:
PostProcess
:
name
:
TableLabelDecode
merge_no_span_structure
:
&merge_no_span_structure
False
Metric
:
name
:
TableMetric
...
...
@@ -77,7 +78,7 @@ Train:
channel_first
:
False
-
TableLabelEncode
:
learn_empty_box
:
False
merge_no_span_structure
:
Fals
e
merge_no_span_structure
:
*merge_no_span_structur
e
replace_empty_cell_token
:
False
loc_reg_num
:
*loc_reg_num
max_text_length
:
*max_text_length
...
...
@@ -112,7 +113,7 @@ Eval:
channel_first
:
False
-
TableLabelEncode
:
learn_empty_box
:
False
merge_no_span_structure
:
Fals
e
merge_no_span_structure
:
*merge_no_span_structur
e
replace_empty_cell_token
:
False
loc_reg_num
:
*loc_reg_num
max_text_length
:
*max_text_length
...
...
configs/table/table_master.yml
浏览文件 @
92f85521
...
...
@@ -8,7 +8,7 @@ Global:
eval_batch_step
:
[
0
,
6259
]
cal_metric_during_train
:
true
pretrained_model
:
null
checkpoints
:
checkpoints
:
save_inference_dir
:
output/table_master/infer
use_visualdl
:
false
infer_img
:
ppstructure/docs/table/table.jpg
...
...
@@ -61,6 +61,7 @@ Loss:
PostProcess
:
name
:
TableMasterLabelDecode
box_shape
:
pad
merge_no_span_structure
:
&merge_no_span_structure
True
Metric
:
name
:
TableMetric
...
...
@@ -79,7 +80,7 @@ Train:
channel_first
:
False
-
TableMasterLabelEncode
:
learn_empty_box
:
False
merge_no_span_structure
:
Tru
e
merge_no_span_structure
:
*merge_no_span_structur
e
replace_empty_cell_token
:
True
loc_reg_num
:
*loc_reg_num
max_text_length
:
*max_text_length
...
...
@@ -115,7 +116,7 @@ Eval:
channel_first
:
False
-
TableMasterLabelEncode
:
learn_empty_box
:
False
merge_no_span_structure
:
Tru
e
merge_no_span_structure
:
*merge_no_span_structur
e
replace_empty_cell_token
:
True
loc_reg_num
:
*loc_reg_num
max_text_length
:
*max_text_length
...
...
ppocr/data/imaug/label_ops.py
浏览文件 @
92f85521
...
...
@@ -587,6 +587,12 @@ class TableLabelEncode(AttnLabelEncode):
line
=
line
.
decode
(
'utf-8'
).
strip
(
"
\n
"
).
strip
(
"
\r\n
"
)
dict_character
.
append
(
line
)
if
self
.
merge_no_span_structure
:
if
"<td></td>"
not
in
dict_character
:
dict_character
.
append
(
"<td></td>"
)
if
"<td>"
in
dict_character
:
dict_character
.
remove
(
"<td>"
)
dict_character
=
self
.
add_special_char
(
dict_character
)
self
.
dict
=
{}
for
i
,
char
in
enumerate
(
dict_character
):
...
...
ppocr/postprocess/table_postprocess.py
浏览文件 @
92f85521
...
...
@@ -21,8 +21,28 @@ from .rec_postprocess import AttnLabelDecode
class
TableLabelDecode
(
AttnLabelDecode
):
""" """
def
__init__
(
self
,
character_dict_path
,
**
kwargs
):
super
(
TableLabelDecode
,
self
).
__init__
(
character_dict_path
)
def
__init__
(
self
,
character_dict_path
,
merge_no_span_structure
=
False
,
**
kwargs
):
dict_character
=
[]
with
open
(
character_dict_path
,
"rb"
)
as
fin
:
lines
=
fin
.
readlines
()
for
line
in
lines
:
line
=
line
.
decode
(
'utf-8'
).
strip
(
"
\n
"
).
strip
(
"
\r\n
"
)
dict_character
.
append
(
line
)
if
merge_no_span_structure
:
if
"<td></td>"
not
in
dict_character
:
dict_character
.
append
(
"<td></td>"
)
if
"<td>"
in
dict_character
:
dict_character
.
remove
(
"<td>"
)
dict_character
=
self
.
add_special_char
(
dict_character
)
self
.
dict
=
{}
for
i
,
char
in
enumerate
(
dict_character
):
self
.
dict
[
char
]
=
i
self
.
character
=
dict_character
self
.
td_token
=
[
'<td>'
,
'<td'
,
'<td></td>'
]
def
__call__
(
self
,
preds
,
batch
=
None
):
...
...
@@ -122,8 +142,13 @@ class TableLabelDecode(AttnLabelDecode):
class
TableMasterLabelDecode
(
TableLabelDecode
):
""" """
def
__init__
(
self
,
character_dict_path
,
box_shape
=
'ori'
,
**
kwargs
):
super
(
TableMasterLabelDecode
,
self
).
__init__
(
character_dict_path
)
def
__init__
(
self
,
character_dict_path
,
box_shape
=
'ori'
,
merge_no_span_structure
=
True
,
**
kwargs
):
super
(
TableMasterLabelDecode
,
self
).
__init__
(
character_dict_path
,
merge_no_span_structure
)
self
.
box_shape
=
box_shape
assert
box_shape
in
[
'ori'
,
'pad'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录