PP-Structure-layout-table.yml

ENV:
  min_subgraph_size: 3
  trt_calib_mode: False
  cpu_threads: 1
  trt_use_static: False
  save_img: True
  save_res: True
  return_res: True

MODEL:
  - DetectionOp:
      name: layout
      param_path: paddlecv://models/ch_PP-StructureV2_picodet_lcnet_x1_0_fgd_layout_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-StructureV2_picodet_lcnet_x1_0_fgd_layout_infer/inference.pdmodel
      batch_size: 1
      image_shape: [ 3, 800, 608 ]
      PreProcess:
        - Resize:
            interp: 2
            keep_ratio: false
            target_size: [ 800, 608 ]
        - NormalizeImage:
            is_scale: true
            mean: [ 0.485, 0.456, 0.406 ]
            std: [ 0.229, 0.224, 0.225 ]
        - RGB2BGR:
        - Permute:
      PostProcess:
        - ParserDetResults:
            label_list: paddlecv://dict/ocr/layout_publaynet_dict.txt
            threshold: 0.5
      Inputs:
        - input.image
  - BboxCropOp:
      name: bbox_crop
      Inputs:
        - input.image
        - layout.dt_bboxes
  - OcrDbDetOp:
      name: det
      param_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdmodel
      batch_size: 1
      PreProcess:
        - RGB2BGR:
        - DetResizeForTest:
            limit_side_len: 960
            limit_type: "max"
        - NormalizeImage:
            std: [0.229, 0.224, 0.225]
            mean: [0.485, 0.456, 0.406]
            scale: '1./255.'
            order: 'hwc'
        - ToCHWImage:
        - ExpandDim:
            axis: 0
        - KeepKeys:
            keep_keys: ['image', 'shape']
      PostProcess:
        - DBPostProcess:
            thresh: 0.3
            box_thresh: 0.6
            max_candidates: 1000
            unclip_ratio: 1.5
            use_dilation: False
            score_mode: "fast"
            box_type: "quad"
      Inputs:
        - bbox_crop.crop_image
  - PolyCropOp:
      name: crop
      Inputs:
        - bbox_crop.crop_image
        - det.dt_polys
  - OcrCrnnRecOp:
      name: rec
      param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel
      batch_size: 6
      PreProcess:
        - RGB2BGR:
        - ReisizeNormImg:
            rec_image_shape: [3, 48, 320]
      PostProcess:
        - CTCLabelDecode:
            character_dict_path: paddlecv://dict/ocr/ch_dict.txt
            use_space_char: true
      Inputs:
        - crop.crop_image
  - PPStructureFilterOp:
      keep_keys: [table]
      name: filter_table
      Inputs:
        - layout.dt_cls_names
        - bbox_crop.crop_image
        - det.dt_polys
        - rec.rec_text
  - PPStructureFilterOp:
      keep_keys: [ text, title, list, figure ]
      name: filter_txts
      Inputs:
        - layout.dt_cls_names
        - bbox_crop.crop_image
        - det.dt_polys
        - rec.rec_text
  - PPStructureTableStructureOp:
      name: table
      param_path: paddlecv://models/ch_PP-StructureV2_SLANet_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-StructureV2_SLANet_infer/inference.pdmodel
      batch_size: 1
      PreProcess:
        - RGB2BGR:
        - ResizeTableImage:
            max_len: 488
        - NormalizeImage:
            scale: 1./255.
            mean: [ 0.485, 0.456, 0.406 ]
            std: [ 0.229, 0.224, 0.225 ]
            order: 'hwc'
        - PaddingTableImage:
            size: [ 488, 488 ]
        - ToCHWImage:
        - ExpandDim:
            axis: 0
      PostProcess:
        - TableLabelDecode:
            character_dict_path: paddlecv://dict/ocr/table_structure_dict_ch.txt
            merge_no_span_structure: true
      Inputs:
        - filter_table.image
  - TableMatcherOp:
      name: Matcher
      Inputs:
        - table.dt_bboxes
        - table.structures
        - filter_table.dt_polys
        - filter_table.rec_text
  - PPStructureResultConcatOp:
      name: concat
      Inputs:
        - table.structures
        - Matcher.html
        - layout.dt_bboxes
        - table.dt_bboxes
        - filter_table.dt_polys
        - filter_table.rec_text
        - filter_txts.dt_polys
        - filter_txts.rec_text
  - PPStructureOutput:
      name: vis
      Inputs:
        - input.fn
        - input.image
        - concat.dt_polys
        - concat.rec_text
        - concat.dt_bboxes
        - concat.html
        - concat.cell_bbox
        - concat.structures