提交 d8571bdb 编写于 作者: 文幕地方's avatar 文幕地方

move imgs to doc

上级 0d7ee968
...@@ -6,12 +6,12 @@ Global: ...@@ -6,12 +6,12 @@ Global:
save_model_dir: ./output/re_layoutxlm/ save_model_dir: ./output/re_layoutxlm/
save_epoch_step: 2000 save_epoch_step: 2000
# evaluation is run every 10 iterations after the 0th iteration # evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 38 ] eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False cal_metric_during_train: False
pretrained_model: &pretrained_model layoutxlm-base-uncased pretrained_model: &pretrained_model layoutxlm-base-uncased # This field can only be changed by modifying the configuration file
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: ppstructure/vqa/images/input/zh_val_21.jpg infer_img: doc/vqa/input/zh_val_21.jpg
save_res_path: ./output/re/ save_res_path: ./output/re/
Architecture: Architecture:
......
...@@ -8,10 +8,10 @@ Global: ...@@ -8,10 +8,10 @@ Global:
# evaluation is run every 10 iterations after the 0th iteration # evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 19 ] eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False cal_metric_during_train: False
pretrained_model: &pretrained_model layoutlm-base-uncased pretrained_model: &pretrained_model layoutlm-base-uncased # This field can only be changed by modifying the configuration file
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: ppstructure/vqa/images/input/zh_val_0.jpg infer_img: doc/vqa/input/zh_val_0.jpg
save_res_path: ./output/ser/predicts_layoutlm.txt save_res_path: ./output/ser/predicts_layoutlm.txt
Architecture: Architecture:
......
...@@ -8,10 +8,10 @@ Global: ...@@ -8,10 +8,10 @@ Global:
# evaluation is run every 10 iterations after the 0th iteration # evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 19 ] eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False cal_metric_during_train: False
pretrained_model: &pretrained_model layoutxlm-base-uncased pretrained_model: &pretrained_model layoutxlm-base-uncased # This field can only be changed by modifying the configuration file
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: ppstructure/vqa/images/input/zh_val_42.jpg infer_img: doc/vqa/input/zh_val_42.jpg
save_res_path: ./output/ser save_res_path: ./output/ser
Architecture: Architecture:
......
...@@ -787,7 +787,7 @@ class SARLabelEncode(BaseRecLabelEncode): ...@@ -787,7 +787,7 @@ class SARLabelEncode(BaseRecLabelEncode):
class VQATokenLabelEncode(object): class VQATokenLabelEncode(object):
""" """
基于NLP的标签编码 Label encode for NLP VQA methods
""" """
def __init__(self, def __init__(self,
......
...@@ -122,7 +122,7 @@ class SimpleDataSet(Dataset): ...@@ -122,7 +122,7 @@ class SimpleDataSet(Dataset):
self.logger.error( self.logger.error(
"When parsing line {}, error happened with msg: {}".format( "When parsing line {}, error happened with msg: {}".format(
data_line, traceback.format_exc())) data_line, traceback.format_exc()))
# outs = None outs = None
if outs is None: if outs is None:
# during evaluation, we should fix the idx to get same results for many times of evaluation. # during evaluation, we should fix the idx to get same results for many times of evaluation.
rnd_idx = np.random.randint(self.__len__( rnd_idx = np.random.randint(self.__len__(
......
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
...@@ -34,7 +34,7 @@ class VQAReTokenLayoutLMPostProcess(object): ...@@ -34,7 +34,7 @@ class VQAReTokenLayoutLMPostProcess(object):
entity_idx_dict_batch = kwargs['entity_idx_dict_batch'] entity_idx_dict_batch = kwargs['entity_idx_dict_batch']
pred_relations = preds['pred_relations'] pred_relations = preds['pred_relations']
# 进行 relations 到 ocr信息的转换 # merge relations and ocr info
results = [] results = []
for pred_relation, ser_result, entity_idx_dict in zip( for pred_relation, ser_result, entity_idx_dict in zip(
pred_relations, ser_results, entity_idx_dict_batch): pred_relations, ser_results, entity_idx_dict_batch):
......
...@@ -34,7 +34,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进 ...@@ -34,7 +34,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
### 2.1 SER ### 2.1 SER
![](./images/result_ser/zh_val_0_ser.jpg) | ![](./images/result_ser/zh_val_42_ser.jpg) ![](../../doc/vqa/result_ser/zh_val_0_ser.jpg) | ![](../../doc/vqa/result_ser/zh_val_42_ser.jpg)
---|--- ---|---
图中不同颜色的框表示不同的类别,对于XFUN数据集,有`QUESTION`, `ANSWER`, `HEADER` 3种类别 图中不同颜色的框表示不同的类别,对于XFUN数据集,有`QUESTION`, `ANSWER`, `HEADER` 3种类别
...@@ -48,7 +48,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进 ...@@ -48,7 +48,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
### 2.2 RE ### 2.2 RE
![](./images/result_re/zh_val_21_re.jpg) | ![](./images/result_re/zh_val_40_re.jpg) ![](../../doc/vqa/result_re/zh_val_21_re.jpg) | ![](../../doc/vqa/result_re/zh_val_40_re.jpg)
---|--- ---|---
...@@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/ser/layoutxlm.yml -o ...@@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/ser/layoutxlm.yml -o
使用如下命令即可完成`OCR引擎 + SER`的串联预测 使用如下命令即可完成`OCR引擎 + SER`的串联预测
```shell ```shell
CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser.py -c configs/vqa/ser/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ Global.infer_img=ppstructure/vqa/images/input/zh_val_42.jpg CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser.py -c configs/vqa/ser/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ Global.infer_img=doc/vqa/input/zh_val_42.jpg
``` ```
最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt` 最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt`
...@@ -219,7 +219,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/re/layoutxlm.yml -o ...@@ -219,7 +219,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/re/layoutxlm.yml -o
使用如下命令即可完成`OCR引擎 + SER + RE`的串联预测 使用如下命令即可完成`OCR引擎 + SER + RE`的串联预测
```shell ```shell
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_re_pretrained/ Global.infer_img=ppstructure/vqa/images/input/zh_val_21.jpg -c_ser configs/vqa/ser/layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_re_pretrained/ Global.infer_img=doc/vqa/input/zh_val_21.jpg -c_ser configs/vqa/ser/layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/
``` ```
最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt` 最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt`
......
...@@ -104,7 +104,7 @@ def make_input(ser_inputs, ser_results): ...@@ -104,7 +104,7 @@ def make_input(ser_inputs, ser_results):
ser_inputs[8] = entities_batch ser_inputs[8] = entities_batch
ser_inputs.append(relations_batch) ser_inputs.append(relations_batch)
# remove ocr_info segment_offset_id and label in ser input
ser_inputs.pop(7) ser_inputs.pop(7)
ser_inputs.pop(6) ser_inputs.pop(6)
ser_inputs.pop(1) ser_inputs.pop(1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册