PP-Structure-layout-table.yml 4.4 KB
Newer Older
W
wangguanzhong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
ENV:
  min_subgraph_size: 3
  trt_calib_mode: False
  cpu_threads: 1
  trt_use_static: False
  save_img: True
  save_res: True
  return_res: True

MODEL:
  - DetectionOp:
      name: layout
      param_path: paddlecv://models/ch_PP-StructureV2_picodet_lcnet_x1_0_fgd_layout_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-StructureV2_picodet_lcnet_x1_0_fgd_layout_infer/inference.pdmodel
      batch_size: 1
      image_shape: [ 3, 800, 608 ]
      PreProcess:
        - Resize:
            interp: 2
            keep_ratio: false
            target_size: [ 800, 608 ]
        - NormalizeImage:
            is_scale: true
            mean: [ 0.485, 0.456, 0.406 ]
            std: [ 0.229, 0.224, 0.225 ]
        - RGB2BGR:
        - Permute:
      PostProcess:
        - ParserDetResults:
            label_list: paddlecv://dict/ocr/layout_publaynet_dict.txt
            threshold: 0.5
      Inputs:
        - input.image
  - BboxCropOp:
      name: bbox_crop
      Inputs:
        - input.image
        - layout.dt_bboxes
  - OcrDbDetOp:
      name: det
      param_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdmodel
      batch_size: 1
      PreProcess:
        - RGB2BGR:
        - DetResizeForTest:
            limit_side_len: 960
            limit_type: "max"
        - NormalizeImage:
            std: [0.229, 0.224, 0.225]
            mean: [0.485, 0.456, 0.406]
            scale: '1./255.'
            order: 'hwc'
        - ToCHWImage:
        - ExpandDim:
            axis: 0
        - KeepKeys:
            keep_keys: ['image', 'shape']
      PostProcess:
        - DBPostProcess:
            thresh: 0.3
            box_thresh: 0.6
            max_candidates: 1000
            unclip_ratio: 1.5
            use_dilation: False
            score_mode: "fast"
            box_type: "quad"
      Inputs:
        - bbox_crop.crop_image
  - PolyCropOp:
      name: crop
      Inputs:
        - bbox_crop.crop_image
        - det.dt_polys
  - OcrCrnnRecOp:
      name: rec
      param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel
      batch_size: 6
      PreProcess:
        - RGB2BGR:
        - ReisizeNormImg:
            rec_image_shape: [3, 48, 320]
      PostProcess:
        - CTCLabelDecode:
            character_dict_path: paddlecv://dict/ocr/ch_dict.txt
            use_space_char: true
      Inputs:
        - crop.crop_image
  - PPStructureFilterOp:
      keep_keys: [table]
      name: filter_table
      Inputs:
        - layout.dt_cls_names
        - bbox_crop.crop_image
        - det.dt_polys
        - rec.rec_text
  - PPStructureFilterOp:
      keep_keys: [ text, title, list, figure ]
      name: filter_txts
      Inputs:
        - layout.dt_cls_names
        - bbox_crop.crop_image
        - det.dt_polys
        - rec.rec_text
  - PPStructureTableStructureOp:
      name: table
      param_path: paddlecv://models/ch_PP-StructureV2_SLANet_infer/inference.pdiparams
      model_path: paddlecv://models/ch_PP-StructureV2_SLANet_infer/inference.pdmodel
      batch_size: 1
      PreProcess:
        - RGB2BGR:
        - ResizeTableImage:
            max_len: 488
        - NormalizeImage:
            scale: 1./255.
            mean: [ 0.485, 0.456, 0.406 ]
            std: [ 0.229, 0.224, 0.225 ]
            order: 'hwc'
        - PaddingTableImage:
            size: [ 488, 488 ]
        - ToCHWImage:
        - ExpandDim:
            axis: 0
      PostProcess:
        - TableLabelDecode:
            character_dict_path: paddlecv://dict/ocr/table_structure_dict_ch.txt
            merge_no_span_structure: true
      Inputs:
        - filter_table.image
  - TableMatcherOp:
      name: Matcher
      Inputs:
        - table.dt_bboxes
        - table.structures
        - filter_table.dt_polys
        - filter_table.rec_text
  - PPStructureResultConcatOp:
      name: concat
      Inputs:
        - table.structures
        - Matcher.html
        - layout.dt_bboxes
        - table.dt_bboxes
        - filter_table.dt_polys
        - filter_table.rec_text
        - filter_txts.dt_polys
        - filter_txts.rec_text
  - PPStructureOutput:
      name: vis
      Inputs:
        - input.fn
        - input.image
        - concat.dt_polys
        - concat.rec_text
        - concat.dt_bboxes
        - concat.html
        - concat.cell_bbox
        - concat.structures