Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
9131c4a7
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9131c4a7
编写于
12月 20, 2021
作者:
文幕地方
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add LayoutLM ser
上级
f01dbb56
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
79 addition
and
17 deletion
+79
-17
ppstructure/vqa/README.md
ppstructure/vqa/README.md
+5
-5
ppstructure/vqa/infer.sh
ppstructure/vqa/infer.sh
+61
-0
ppstructure/vqa/infer_re.py
ppstructure/vqa/infer_re.py
+5
-5
ppstructure/vqa/infer_ser_e2e.py
ppstructure/vqa/infer_ser_e2e.py
+2
-2
ppstructure/vqa/infer_ser_re_e2e.py
ppstructure/vqa/infer_ser_re_e2e.py
+6
-5
未找到文件。
ppstructure/vqa/README.md
浏览文件 @
9131c4a7
...
...
@@ -195,7 +195,7 @@ export CUDA_VISIBLE_DEVICES=0
python3.7 infer_ser.py
\
--model_name_or_path
"./PP-Layout_v1.0_ser_pretrained/"
\
--ser_model_type
"LayoutXLM"
\
--output_dir
"output
_res
/"
\
--output_dir
"output
/ser
/"
\
--infer_imgs
"XFUND/zh_val/image/"
\
--ocr_json_path
"XFUND/zh_val/xfun_normalize_val.json"
```
...
...
@@ -210,7 +210,7 @@ python3.7 infer_ser_e2e.py \
--model_name_or_path
"./output/PP-Layout_v1.0_ser_pretrained/"
\
--ser_model_type
"LayoutXLM"
\
--max_seq_length
512
\
--output_dir
"output
_res
_e2e/"
\
--output_dir
"output
/ser
_e2e/"
\
--infer_imgs
"images/input/zh_val_0.jpg"
```
...
...
@@ -284,7 +284,7 @@ python3 eval_re.py \
--eval_data_dir
"XFUND/zh_val/image"
\
--eval_label_path
"XFUND/zh_val/xfun_normalize_val.json"
\
--label_map_path
'labels/labels_ser.txt'
\
--output_dir
"output/re
_test
/"
\
--output_dir
"output/re/"
\
--per_gpu_eval_batch_size
8
\
--num_workers
8
\
--seed
2048
...
...
@@ -302,7 +302,7 @@ python3 infer_re.py \
--eval_data_dir
"XFUND/zh_val/image"
\
--eval_label_path
"XFUND/zh_val/xfun_normalize_val.json"
\
--label_map_path
'labels/labels_ser.txt'
\
--output_dir
"output
_res
"
\
--output_dir
"output
/re/
"
\
--per_gpu_eval_batch_size
1
\
--seed
2048
```
...
...
@@ -317,7 +317,7 @@ python3.7 infer_ser_re_e2e.py \
--model_name_or_path
"./PP-Layout_v1.0_ser_pretrained/"
\
--re_model_name_or_path
"./PP-Layout_v1.0_re_pretrained/"
\
--max_seq_length
512
\
--output_dir
"output
_ser_re_e2e_train
/"
\
--output_dir
"output
/ser_re_e2e
/"
\
--infer_imgs
"images/input/zh_val_21.jpg"
```
...
...
ppstructure/vqa/infer.sh
0 → 100644
浏览文件 @
9131c4a7
export
CUDA_VISIBLE_DEVICES
=
6
# python3.7 infer_ser_e2e.py \
# --model_name_or_path "output/ser_distributed/best_model" \
# --max_seq_length 512 \
# --output_dir "output_res_e2e/" \
# --infer_imgs "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/zh.val/zh_val_0.jpg"
# python3.7 infer_ser_re_e2e.py \
# --model_name_or_path "output/ser_distributed/best_model" \
# --re_model_name_or_path "output/re_test/best_model" \
# --max_seq_length 512 \
# --output_dir "output_ser_re_e2e_train/" \
# --infer_imgs "images/input/zh_val_21.jpg"
# python3.7 infer_ser.py \
# --model_name_or_path "output/ser_LayoutLM/best_model" \
# --ser_model_type "LayoutLM" \
# --output_dir "ser_LayoutLM/" \
# --infer_imgs "images/input/zh_val_21.jpg" \
# --ocr_json_path "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/xfun_normalize_val.json"
python3.7 infer_ser.py
\
--model_name_or_path
"output/ser_new/best_model"
\
--ser_model_type
"LayoutXLM"
\
--output_dir
"ser_new/"
\
--infer_imgs
"images/input/zh_val_21.jpg"
\
--ocr_json_path
"/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/xfun_normalize_val.json"
# python3.7 infer_ser_e2e.py \
# --model_name_or_path "output/ser_new/best_model" \
# --ser_model_type "LayoutXLM" \
# --max_seq_length 512 \
# --output_dir "output/ser_new/" \
# --infer_imgs "images/input/zh_val_0.jpg"
# python3.7 infer_ser_e2e.py \
# --model_name_or_path "output/ser_LayoutLM/best_model" \
# --ser_model_type "LayoutLM" \
# --max_seq_length 512 \
# --output_dir "output/ser_LayoutLM/" \
# --infer_imgs "images/input/zh_val_0.jpg"
# python3 infer_re.py \
# --model_name_or_path "/ssd1/zhoujun20/VQA/PaddleOCR/ppstructure/vqa/output/re_test/best_model/" \
# --max_seq_length 512 \
# --eval_data_dir "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/zh.val" \
# --eval_label_path "/ssd1/zhoujun20/VQA/data/XFUN_v1.0_data/xfun_normalize_val.json" \
# --label_map_path 'labels/labels_ser.txt' \
# --output_dir "output_res" \
# --per_gpu_eval_batch_size 1 \
# --seed 2048
# python3.7 infer_ser_re_e2e.py \
# --model_name_or_path "output/ser_LayoutLM/best_model" \
# --ser_model_type "LayoutLM" \
# --re_model_name_or_path "output/re_new/best_model" \
# --max_seq_length 512 \
# --output_dir "output_ser_re_e2e/" \
# --infer_imgs "images/input/zh_val_21.jpg"
\ No newline at end of file
ppstructure/vqa/infer_re.py
浏览文件 @
9131c4a7
...
...
@@ -56,19 +56,19 @@ def infer(args):
ocr_info_list
=
load_ocr
(
args
.
eval_data_dir
,
args
.
eval_label_path
)
for
idx
,
batch
in
enumerate
(
eval_dataloader
):
ocr_info
=
ocr_info_list
[
idx
]
image_path
=
ocr_info
[
'image_path'
]
ocr_info
=
ocr_info
[
'ocr_info'
]
save_img_path
=
os
.
path
.
join
(
args
.
output_dir
,
os
.
path
.
splitext
(
os
.
path
.
basename
(
im
g
_path
))[
0
]
+
"_re.jpg"
)
os
.
path
.
splitext
(
os
.
path
.
basename
(
im
age
_path
))[
0
]
+
"_re.jpg"
)
logger
.
info
(
"[Infer] process: {}/{}, save_result to {}"
.
format
(
idx
,
len
(
eval_dataloader
),
save_img_path
))
with
paddle
.
no_grad
():
outputs
=
model
(
**
batch
)
pred_relations
=
outputs
[
'pred_relations'
]
ocr_info
=
ocr_info_list
[
idx
]
image_path
=
ocr_info
[
'image_path'
]
ocr_info
=
ocr_info
[
'ocr_info'
]
# 根据entity里的信息,做token解码后去过滤不要的ocr_info
ocr_info
=
filter_bg_by_txt
(
ocr_info
,
batch
,
tokenizer
)
...
...
ppstructure/vqa/infer_ser_e2e.py
浏览文件 @
9131c4a7
...
...
@@ -98,13 +98,13 @@ class SerPredictor(object):
ocr_info
=
ocr_info
,
max_seq_len
=
self
.
max_seq_length
)
if
args
.
ser_model_type
==
'LayoutLM'
:
if
self
.
args
.
ser_model_type
==
'LayoutLM'
:
preds
=
self
.
model
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
attention_mask
=
inputs
[
"attention_mask"
])
elif
args
.
ser_model_type
==
'LayoutXLM'
:
elif
self
.
args
.
ser_model_type
==
'LayoutXLM'
:
preds
=
self
.
model
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
...
...
ppstructure/vqa/infer_ser_re_e2e.py
浏览文件 @
9131c4a7
...
...
@@ -117,7 +117,11 @@ if __name__ == "__main__":
"w"
,
encoding
=
'utf-8'
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}], {}"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
save_img_path
=
os
.
path
.
join
(
args
.
output_dir
,
os
.
path
.
splitext
(
os
.
path
.
basename
(
img_path
))[
0
]
+
"_re.jpg"
)
print
(
"process: [{}/{}], save_result to {}"
.
format
(
idx
,
len
(
infer_imgs
),
save_img_path
))
img
=
cv2
.
imread
(
img_path
)
...
...
@@ -128,7 +132,4 @@ if __name__ == "__main__":
},
ensure_ascii
=
False
)
+
"
\n
"
)
img_res
=
draw_re_results
(
img
,
result
)
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
output_dir
,
os
.
path
.
splitext
(
os
.
path
.
basename
(
img_path
))[
0
]
+
"_re.jpg"
),
img_res
)
cv2
.
imwrite
(
save_img_path
,
img_res
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录