diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md index 7016f0fcb6c10176cf6f9d30457a5ff98d2b06e1..52e0c77dd1d9716827e06819cc957e36f02ee1f8 100644 --- a/ppstructure/docs/quickstart.md +++ b/ppstructure/docs/quickstart.md @@ -100,7 +100,9 @@ dict 里各个字段说明如下 | output | excel和识别结果保存的地址 | ./output/table | | table_max_len | 表格结构模型预测时,图像的长边resize尺度 | 488 | | table_model_dir | 表格结构模型 inference 模型地址 | None | -| table_char_type | 表格结构模型所用字典地址 | ../ppocr/utils/dict/table_structure_dict.txt | +| table_char_dict_path | 表格结构模型所用字典地址 | ../ppocr/utils/dict/table_structure_dict.txt | +| layout_path_model | 版面分析模型模型地址,可以为在线地址或者本地地址,当为本地地址时,需要指定 layout_label_map, 命令行模式下可通过--layout_label_map='{0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}' 指定 | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config | +| layout_label_map | 版面分析模型模型label映射字典 | None | | model_name_or_path | VQA SER模型地址 | None | | max_seq_length | VQA SER模型最大支持token长度 | 512 | | label_map_path | VQA SER 标签文件地址 | ./vqa/labels/labels_ser.txt | diff --git a/ppstructure/layout/README.md b/ppstructure/layout/README.md index 0931702a7cf411e6589a1375e014a7374442f9f0..3a4f5291763e34c8aec2c5b327d40a459bb4be1e 100644 --- a/ppstructure/layout/README.md +++ b/ppstructure/layout/README.md @@ -52,7 +52,7 @@ The following figure shows the result, with different colored detection boxes re | threshold | threshold of prediction score | 0.5 | \ | | input_shape | picture size of reshape | [3,640,640] | \ | | batch_size | testing batch size | 1 | \ | -| label_map | category mapping table | None | Setting config_ path, it can be none, and the label is automatically obtained according to the dataset name_ map | +| label_map | category mapping table | None | Setting config_ path, it can be none, and the label is automatically obtained according to the dataset name_ map, You need to specify it manually when setting model_path | | enforce_cpu | whether to use CPU | False | False to use GPU, and True to force the use of CPU | | enforce_mkldnn | whether mkldnn acceleration is enabled in CPU prediction | True | \ | | thread_num | the number of CPU threads | 10 | \ | diff --git a/ppstructure/layout/README_ch.md b/ppstructure/layout/README_ch.md index 6fec748b7683264f5b4a7d29c0e51c84773425ba..825ff62b116171fda277528017292434bd75b941 100644 --- a/ppstructure/layout/README_ch.md +++ b/ppstructure/layout/README_ch.md @@ -52,7 +52,7 @@ show_img.show() | threshold | 预测得分的阈值 | 0.5 | \ | | input_shape | reshape之后图片尺寸 | [3,640,640] | \ | | batch_size | 测试batch size | 1 | \ | -| label_map | 类别映射表 | None | 设置config_path时,可以为None,根据数据集名称自动获取label_map | +| label_map | 类别映射表 | None | 设置config_path时,可以为None,根据数据集名称自动获取label_map,设置model_path时需要手动指定 | | enforce_cpu | 代码是否使用CPU运行 | False | 设置为False表示使用GPU,True表示强制使用CPU | | enforce_mkldnn | CPU预测中是否开启MKLDNN加速 | True | \ | | thread_num | 设置CPU线程数 | 10 | \ | diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py index 3f3dc65875a20b3f66403afecfd60f04e3d83d61..3ae52fdd703670c4250f1b4a440004fa8b9082ad 100644 --- a/ppstructure/predict_system.py +++ b/ppstructure/predict_system.py @@ -58,6 +58,7 @@ class OCRSystem(object): self.table_layout = lp.PaddleDetectionLayoutModel( config_path=config_path, model_path=model_path, + label_map=args.layout_label_map, threshold=0.5, enable_mkldnn=args.enable_mkldnn, enforce_cpu=not args.use_gpu, diff --git a/ppstructure/utility.py b/ppstructure/utility.py index ce7a801b1bb4094d3f4d2ba467332c6763ad6287..43cb0b0873812baf3ce2dc689fb62f1d0ca2c551 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import ast from PIL import Image import numpy as np from tools.infer.utility import draw_ocr_box_txt, init_args as infer_args @@ -34,7 +35,11 @@ def init_args(): "--layout_path_model", type=str, default="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config") - + parser.add_argument( + "--layout_label_map", + type=ast.literal_eval, + default=None, + help='label map according to ppstructure/layout/README_ch.md') # params for ser parser.add_argument("--model_name_or_path", type=str) parser.add_argument("--max_seq_length", type=int, default=512)