From d8571bdb2ae5551466b1e56782580f0a304dd9cd Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Wed, 5 Jan 2022 14:35:21 +0000 Subject: [PATCH] move imgs to doc --- configs/vqa/re/layoutxlm.yml | 6 +++--- configs/vqa/ser/layoutlm.yml | 4 ++-- configs/vqa/ser/layoutxlm.yml | 4 ++-- .../vqa/images => doc/vqa}/input/zh_val_0.jpg | Bin .../vqa/images => doc/vqa}/input/zh_val_21.jpg | Bin .../vqa/images => doc/vqa}/input/zh_val_40.jpg | Bin .../vqa/images => doc/vqa}/input/zh_val_42.jpg | Bin .../images => doc/vqa}/result_re/zh_val_21_re.jpg | Bin .../images => doc/vqa}/result_re/zh_val_40_re.jpg | Bin .../images => doc/vqa}/result_ser/zh_val_0_ser.jpg | Bin .../images => doc/vqa}/result_ser/zh_val_42_ser.jpg | Bin ppocr/data/imaug/label_ops.py | 2 +- ppocr/data/simple_dataset.py | 2 +- ppocr/losses/vqa_token_layoutlm_loss.py | 2 +- .../vqa_token_re_layoutlm_postprocess.py | 2 +- ppstructure/vqa/README.md | 8 ++++---- tools/infer_vqa_token_ser_re.py | 2 +- 17 files changed, 16 insertions(+), 16 deletions(-) rename {ppstructure/vqa/images => doc/vqa}/input/zh_val_0.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/input/zh_val_21.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/input/zh_val_40.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/input/zh_val_42.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/result_re/zh_val_21_re.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/result_re/zh_val_40_re.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/result_ser/zh_val_0_ser.jpg (100%) rename {ppstructure/vqa/images => doc/vqa}/result_ser/zh_val_42_ser.jpg (100%) diff --git a/configs/vqa/re/layoutxlm.yml b/configs/vqa/re/layoutxlm.yml index 06f4bf97..bb367f0e 100644 --- a/configs/vqa/re/layoutxlm.yml +++ b/configs/vqa/re/layoutxlm.yml @@ -6,12 +6,12 @@ Global: save_model_dir: ./output/re_layoutxlm/ save_epoch_step: 2000 # evaluation is run every 10 iterations after the 0th iteration - eval_batch_step: [ 0, 38 ] + eval_batch_step: [ 0, 19 ] cal_metric_during_train: False - pretrained_model: &pretrained_model layoutxlm-base-uncased + pretrained_model: &pretrained_model layoutxlm-base-uncased # This field can only be changed by modifying the configuration file save_inference_dir: use_visualdl: False - infer_img: ppstructure/vqa/images/input/zh_val_21.jpg + infer_img: doc/vqa/input/zh_val_21.jpg save_res_path: ./output/re/ Architecture: diff --git a/configs/vqa/ser/layoutlm.yml b/configs/vqa/ser/layoutlm.yml index e33a6a23..a635fc7d 100644 --- a/configs/vqa/ser/layoutlm.yml +++ b/configs/vqa/ser/layoutlm.yml @@ -8,10 +8,10 @@ Global: # evaluation is run every 10 iterations after the 0th iteration eval_batch_step: [ 0, 19 ] cal_metric_during_train: False - pretrained_model: &pretrained_model layoutlm-base-uncased + pretrained_model: &pretrained_model layoutlm-base-uncased # This field can only be changed by modifying the configuration file save_inference_dir: use_visualdl: False - infer_img: ppstructure/vqa/images/input/zh_val_0.jpg + infer_img: doc/vqa/input/zh_val_0.jpg save_res_path: ./output/ser/predicts_layoutlm.txt Architecture: diff --git a/configs/vqa/ser/layoutxlm.yml b/configs/vqa/ser/layoutxlm.yml index 1197e0ae..1c1eac22 100644 --- a/configs/vqa/ser/layoutxlm.yml +++ b/configs/vqa/ser/layoutxlm.yml @@ -8,10 +8,10 @@ Global: # evaluation is run every 10 iterations after the 0th iteration eval_batch_step: [ 0, 19 ] cal_metric_during_train: False - pretrained_model: &pretrained_model layoutxlm-base-uncased + pretrained_model: &pretrained_model layoutxlm-base-uncased # This field can only be changed by modifying the configuration file save_inference_dir: use_visualdl: False - infer_img: ppstructure/vqa/images/input/zh_val_42.jpg + infer_img: doc/vqa/input/zh_val_42.jpg save_res_path: ./output/ser Architecture: diff --git a/ppstructure/vqa/images/input/zh_val_0.jpg b/doc/vqa/input/zh_val_0.jpg similarity index 100% rename from ppstructure/vqa/images/input/zh_val_0.jpg rename to doc/vqa/input/zh_val_0.jpg diff --git a/ppstructure/vqa/images/input/zh_val_21.jpg b/doc/vqa/input/zh_val_21.jpg similarity index 100% rename from ppstructure/vqa/images/input/zh_val_21.jpg rename to doc/vqa/input/zh_val_21.jpg diff --git a/ppstructure/vqa/images/input/zh_val_40.jpg b/doc/vqa/input/zh_val_40.jpg similarity index 100% rename from ppstructure/vqa/images/input/zh_val_40.jpg rename to doc/vqa/input/zh_val_40.jpg diff --git a/ppstructure/vqa/images/input/zh_val_42.jpg b/doc/vqa/input/zh_val_42.jpg similarity index 100% rename from ppstructure/vqa/images/input/zh_val_42.jpg rename to doc/vqa/input/zh_val_42.jpg diff --git a/ppstructure/vqa/images/result_re/zh_val_21_re.jpg b/doc/vqa/result_re/zh_val_21_re.jpg similarity index 100% rename from ppstructure/vqa/images/result_re/zh_val_21_re.jpg rename to doc/vqa/result_re/zh_val_21_re.jpg diff --git a/ppstructure/vqa/images/result_re/zh_val_40_re.jpg b/doc/vqa/result_re/zh_val_40_re.jpg similarity index 100% rename from ppstructure/vqa/images/result_re/zh_val_40_re.jpg rename to doc/vqa/result_re/zh_val_40_re.jpg diff --git a/ppstructure/vqa/images/result_ser/zh_val_0_ser.jpg b/doc/vqa/result_ser/zh_val_0_ser.jpg similarity index 100% rename from ppstructure/vqa/images/result_ser/zh_val_0_ser.jpg rename to doc/vqa/result_ser/zh_val_0_ser.jpg diff --git a/ppstructure/vqa/images/result_ser/zh_val_42_ser.jpg b/doc/vqa/result_ser/zh_val_42_ser.jpg similarity index 100% rename from ppstructure/vqa/images/result_ser/zh_val_42_ser.jpg rename to doc/vqa/result_ser/zh_val_42_ser.jpg diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index d80f4ec7..4cbd7900 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -787,7 +787,7 @@ class SARLabelEncode(BaseRecLabelEncode): class VQATokenLabelEncode(object): """ - 基于NLP的标签编码 + Label encode for NLP VQA methods """ def __init__(self, diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py index 1dc86d6d..08b00d09 100644 --- a/ppocr/data/simple_dataset.py +++ b/ppocr/data/simple_dataset.py @@ -122,7 +122,7 @@ class SimpleDataSet(Dataset): self.logger.error( "When parsing line {}, error happened with msg: {}".format( data_line, traceback.format_exc())) - # outs = None + outs = None if outs is None: # during evaluation, we should fix the idx to get same results for many times of evaluation. rnd_idx = np.random.randint(self.__len__( diff --git a/ppocr/losses/vqa_token_layoutlm_loss.py b/ppocr/losses/vqa_token_layoutlm_loss.py index 7ad311f5..244893d9 100755 --- a/ppocr/losses/vqa_token_layoutlm_loss.py +++ b/ppocr/losses/vqa_token_layoutlm_loss.py @@ -1,4 +1,4 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py b/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py index 1fbea0fa..1d55d13d 100644 --- a/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py +++ b/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py @@ -34,7 +34,7 @@ class VQAReTokenLayoutLMPostProcess(object): entity_idx_dict_batch = kwargs['entity_idx_dict_batch'] pred_relations = preds['pred_relations'] - # 进行 relations 到 ocr信息的转换 + # merge relations and ocr info results = [] for pred_relation, ser_result, entity_idx_dict in zip( pred_relations, ser_results, entity_idx_dict_batch): diff --git a/ppstructure/vqa/README.md b/ppstructure/vqa/README.md index 58095665..ca3e2bcc 100644 --- a/ppstructure/vqa/README.md +++ b/ppstructure/vqa/README.md @@ -34,7 +34,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进 ### 2.1 SER -![](./images/result_ser/zh_val_0_ser.jpg) | ![](./images/result_ser/zh_val_42_ser.jpg) +![](../../doc/vqa/result_ser/zh_val_0_ser.jpg) | ![](../../doc/vqa/result_ser/zh_val_42_ser.jpg) ---|--- 图中不同颜色的框表示不同的类别,对于XFUN数据集,有`QUESTION`, `ANSWER`, `HEADER` 3种类别 @@ -48,7 +48,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进 ### 2.2 RE -![](./images/result_re/zh_val_21_re.jpg) | ![](./images/result_re/zh_val_40_re.jpg) +![](../../doc/vqa/result_re/zh_val_21_re.jpg) | ![](../../doc/vqa/result_re/zh_val_40_re.jpg) ---|--- @@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/ser/layoutxlm.yml -o 使用如下命令即可完成`OCR引擎 + SER`的串联预测 ```shell -CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser.py -c configs/vqa/ser/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ Global.infer_img=ppstructure/vqa/images/input/zh_val_42.jpg +CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser.py -c configs/vqa/ser/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ Global.infer_img=doc/vqa/input/zh_val_42.jpg ``` 最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt`。 @@ -219,7 +219,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/re/layoutxlm.yml -o 使用如下命令即可完成`OCR引擎 + SER + RE`的串联预测 ```shell export CUDA_VISIBLE_DEVICES=0 -python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_re_pretrained/ Global.infer_img=ppstructure/vqa/images/input/zh_val_21.jpg -c_ser configs/vqa/ser/layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ +python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_re_pretrained/ Global.infer_img=doc/vqa/input/zh_val_21.jpg -c_ser configs/vqa/ser/layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ ``` 最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt`。 diff --git a/tools/infer_vqa_token_ser_re.py b/tools/infer_vqa_token_ser_re.py index ce9a36c5..fd62ace8 100755 --- a/tools/infer_vqa_token_ser_re.py +++ b/tools/infer_vqa_token_ser_re.py @@ -104,7 +104,7 @@ def make_input(ser_inputs, ser_results): ser_inputs[8] = entities_batch ser_inputs.append(relations_batch) - + # remove ocr_info segment_offset_id and label in ser input ser_inputs.pop(7) ser_inputs.pop(6) ser_inputs.pop(1) -- GitLab