Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
dc51469b
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
1 年多 前同步成功
通知
1534
Star
32963
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dc51469b
编写于
12月 19, 2021
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add encoding='utf-8'
上级
3ffaf7f2
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
27 addition
and
13 deletion
+27
-13
ppstructure/vqa/eval_ser.py
ppstructure/vqa/eval_ser.py
+6
-2
ppstructure/vqa/helper/eval_with_label_end2end.py
ppstructure/vqa/helper/eval_with_label_end2end.py
+1
-1
ppstructure/vqa/helper/trans_xfun_data.py
ppstructure/vqa/helper/trans_xfun_data.py
+2
-2
ppstructure/vqa/infer_re.py
ppstructure/vqa/infer_re.py
+1
-1
ppstructure/vqa/infer_ser.py
ppstructure/vqa/infer_ser.py
+7
-3
ppstructure/vqa/infer_ser_e2e.py
ppstructure/vqa/infer_ser_e2e.py
+4
-1
ppstructure/vqa/infer_ser_re_e2e.py
ppstructure/vqa/infer_ser_re_e2e.py
+4
-1
ppstructure/vqa/utils.py
ppstructure/vqa/utils.py
+1
-1
ppstructure/vqa/xfun.py
ppstructure/vqa/xfun.py
+1
-1
未找到文件。
ppstructure/vqa/eval_ser.py
浏览文件 @
dc51469b
...
...
@@ -128,12 +128,16 @@ def evaluate(args,
"f1"
:
f1_score
(
out_label_list
,
preds_list
),
}
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"test_gt.txt"
),
"w"
)
as
fout
:
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"test_gt.txt"
),
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
lbl
in
out_label_list
:
for
l
in
lbl
:
fout
.
write
(
l
+
"
\t
"
)
fout
.
write
(
"
\n
"
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"test_pred.txt"
),
"w"
)
as
fout
:
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"test_pred.txt"
),
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
lbl
in
preds_list
:
for
l
in
lbl
:
fout
.
write
(
l
+
"
\t
"
)
...
...
ppstructure/vqa/helper/eval_with_label_end2end.py
浏览文件 @
dc51469b
...
...
@@ -37,7 +37,7 @@ def parse_ser_results_fp(fp, fp_type="gt", ignore_background=True):
assert
fp_type
in
[
"gt"
,
"pred"
]
key
=
"label"
if
fp_type
==
"gt"
else
"pred"
res_dict
=
dict
()
with
open
(
fp
,
"r"
)
as
fin
:
with
open
(
fp
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
for
_
,
line
in
enumerate
(
lines
):
...
...
ppstructure/vqa/helper/trans_xfun_data.py
浏览文件 @
dc51469b
...
...
@@ -16,13 +16,13 @@ import json
def
transfer_xfun_data
(
json_path
=
None
,
output_file
=
None
):
with
open
(
json_path
,
"r"
)
as
fin
:
with
open
(
json_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
json_info
=
json
.
loads
(
lines
[
0
])
documents
=
json_info
[
"documents"
]
label_info
=
{}
with
open
(
output_file
,
"w"
)
as
fout
:
with
open
(
output_file
,
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
idx
,
document
in
enumerate
(
documents
):
img_info
=
document
[
"img"
]
document
=
document
[
"document"
]
...
...
ppstructure/vqa/infer_re.py
浏览文件 @
dc51469b
...
...
@@ -92,7 +92,7 @@ def infer(args):
def
load_ocr
(
img_folder
,
json_path
):
import
json
d
=
[]
with
open
(
json_path
,
"r"
)
as
fin
:
with
open
(
json_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
for
line
in
lines
:
image_name
,
info_str
=
line
.
split
(
"
\t
"
)
...
...
ppstructure/vqa/infer_ser.py
浏览文件 @
dc51469b
...
...
@@ -59,7 +59,8 @@ def pad_sentences(tokenizer,
encoded_inputs
[
"bbox"
]
=
encoded_inputs
[
"bbox"
]
+
[[
0
,
0
,
0
,
0
]
]
*
difference
else
:
assert
False
,
f
"padding_side of tokenizer just supports [
\"
right
\"
] but got
{
tokenizer
.
padding_side
}
"
assert
False
,
"padding_side of tokenizer just supports [
\"
right
\"
] but got {}"
.
format
(
tokenizer
.
padding_side
)
else
:
if
return_attention_mask
:
encoded_inputs
[
"attention_mask"
]
=
[
1
]
*
len
(
encoded_inputs
[
...
...
@@ -224,7 +225,7 @@ def infer(args):
# load ocr results json
ocr_results
=
dict
()
with
open
(
args
.
ocr_json_path
,
"r"
)
as
fin
:
with
open
(
args
.
ocr_json_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
for
line
in
lines
:
img_name
,
json_info
=
line
.
split
(
"
\t
"
)
...
...
@@ -234,7 +235,10 @@ def infer(args):
infer_imgs
=
get_image_file_list
(
args
.
infer_imgs
)
# loop for infer
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
)
as
fout
:
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}]"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
...
...
ppstructure/vqa/infer_ser_e2e.py
浏览文件 @
dc51469b
...
...
@@ -113,7 +113,10 @@ if __name__ == "__main__":
# loop for infer
ser_engine
=
SerPredictor
(
args
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
)
as
fout
:
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}], {}"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
...
...
ppstructure/vqa/infer_ser_re_e2e.py
浏览文件 @
dc51469b
...
...
@@ -112,7 +112,10 @@ if __name__ == "__main__":
# loop for infer
ser_re_engine
=
SerReSystem
(
args
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
)
as
fout
:
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}], {}"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
...
...
ppstructure/vqa/utils.py
浏览文件 @
dc51469b
...
...
@@ -32,7 +32,7 @@ def set_seed(seed):
def
get_bio_label_maps
(
label_map_path
):
with
open
(
label_map_path
,
"r"
)
as
fin
:
with
open
(
label_map_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
lines
=
[
line
.
strip
()
for
line
in
lines
]
if
"O"
not
in
lines
:
...
...
ppstructure/vqa/xfun.py
浏览文件 @
dc51469b
...
...
@@ -162,7 +162,7 @@ class XFUNDataset(Dataset):
return
encoded_inputs
def
read_all_lines
(
self
,
):
with
open
(
self
.
label_path
,
"r"
)
as
fin
:
with
open
(
self
.
label_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
return
lines
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录