paddleocr.py 30.4 KB
Newer Older
W
WenmuZhou 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
文幕地方's avatar
文幕地方 已提交
17
import importlib
W
WenmuZhou 已提交
18 19

__dir__ = os.path.dirname(__file__)
Z
zhoujun 已提交
20 21 22

import paddle

W
WenmuZhou 已提交
23 24 25
sys.path.append(os.path.join(__dir__, ''))

import cv2
W
WenmuZhou 已提交
26
import logging
W
WenmuZhou 已提交
27 28
import numpy as np
from pathlib import Path
A
andyj 已提交
29 30 31
import base64
from io import BytesIO
from PIL import Image
W
WenmuZhou 已提交
32

文幕地方's avatar
文幕地方 已提交
33 34 35 36
tools = importlib.import_module('.', 'tools')
ppocr = importlib.import_module('.', 'ppocr')
ppstructure = importlib.import_module('.', 'ppstructure')

W
WenmuZhou 已提交
37
from tools.infer import predict_system
W
WenmuZhou 已提交
38
from ppocr.utils.logging import get_logger
W
WenmuZhou 已提交
39

W
WenmuZhou 已提交
40
logger = get_logger()
41
from ppocr.utils.utility import check_and_read, get_image_file_list
42
from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
Z
zhoujun 已提交
43
from tools.infer.utility import draw_ocr, str2bool, check_gpu
44
from ppstructure.utility import init_args, draw_structure_result
qq_25193841's avatar
qq_25193841 已提交
45
from ppstructure.predict_system import StructureSystem, save_structure_res, to_excel
W
WenmuZhou 已提交
46

文幕地方's avatar
文幕地方 已提交
47 48
__all__ = [
    'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
qq_25193841's avatar
qq_25193841 已提交
49
    'save_structure_res', 'download_with_progressbar', 'to_excel'
文幕地方's avatar
文幕地方 已提交
50 51 52
]

SUPPORT_DET_MODEL = ['DB']
53
VERSION = '2.6.1.0'
A
andyjpaddle 已提交
54
SUPPORT_REC_MODEL = ['CRNN', 'SVTR_LCNet']
文幕地方's avatar
文幕地方 已提交
55 56
BASE_DIR = os.path.expanduser("~/.paddleocr/")

57 58
DEFAULT_OCR_MODEL_VERSION = 'PP-OCRv3'
SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2', 'PP-OCRv3']
文幕地方's avatar
文幕地方 已提交
59 60
DEFAULT_STRUCTURE_MODEL_VERSION = 'PP-StructureV2'
SUPPORT_STRUCTURE_MODEL_VERSION = ['PP-Structure', 'PP-StructureV2']
文幕地方's avatar
文幕地方 已提交
61
MODEL_URLS = {
Z
zhoujun 已提交
62
    'OCR': {
63 64 65 66 67 68 69 70 71 72
        'PP-OCRv3': {
            'det': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar',
                },
                'en': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar',
                },
A
andyjpaddle 已提交
73 74 75 76
                'ml': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar'
                }
77 78 79 80 81 82 83 84 85 86 87 88
            },
            'rec': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
                },
                'en': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/en_dict.txt'
                },
A
andyjpaddle 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
                'korean': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/korean_dict.txt'
                },
                'japan': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/japan_dict.txt'
                },
                'chinese_cht': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
                },
                'ta': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/ta_dict.txt'
                },
                'te': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/te_dict.txt'
                },
                'ka': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/ka_dict.txt'
                },
                'latin': {
                    'url':
A
andyjpaddle 已提交
121
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar',
A
andyjpaddle 已提交
122 123 124 125
                    'dict_path': './ppocr/utils/dict/latin_dict.txt'
                },
                'arabic': {
                    'url':
A
andyjpaddle 已提交
126
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tar',
A
andyjpaddle 已提交
127 128 129 130
                    'dict_path': './ppocr/utils/dict/arabic_dict.txt'
                },
                'cyrillic': {
                    'url':
A
andyjpaddle 已提交
131
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar',
A
andyjpaddle 已提交
132 133 134 135
                    'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
                },
                'devanagari': {
                    'url':
A
andyjpaddle 已提交
136
                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar',
A
andyjpaddle 已提交
137 138
                    'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
                },
139 140 141 142 143 144 145 146
            },
            'cls': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
                }
            },
        },
Z
zhoujun 已提交
147 148 149 150 151 152
        'PP-OCRv2': {
            'det': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar',
                },
文幕地方's avatar
文幕地方 已提交
153
            },
Z
zhoujun 已提交
154 155 156 157 158 159
            'rec': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar',
                    'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
                }
A
andyjpaddle 已提交
160 161 162 163 164 165 166
            },
            'cls': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
                }
            },
W
WenmuZhou 已提交
167
        },
168
        'PP-OCR': {
Z
zhoujun 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181
            'det': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
                },
                'en': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
                },
                'structure': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
                }
文幕地方's avatar
文幕地方 已提交
182
            },
Z
zhoujun 已提交
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
            'rec': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
                },
                'en': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/en_dict.txt'
                },
                'french': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/french_dict.txt'
                },
                'german': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/german_dict.txt'
                },
                'korean': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/korean_dict.txt'
                },
                'japan': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/japan_dict.txt'
                },
                'chinese_cht': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
                },
                'ta': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/ta_dict.txt'
                },
                'te': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/te_dict.txt'
                },
                'ka': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/ka_dict.txt'
                },
                'latin': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/latin_dict.txt'
                },
                'arabic': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/arabic_dict.txt'
                },
                'cyrillic': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
                },
                'devanagari': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
                    'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
                },
                'structure': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
                    'dict_path': 'ppocr/utils/dict/table_dict.txt'
                }
文幕地方's avatar
文幕地方 已提交
259
            },
Z
zhoujun 已提交
260 261 262 263 264
            'cls': {
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
                }
文幕地方's avatar
文幕地方 已提交
265
            },
Z
zhoujun 已提交
266 267 268
        }
    },
    'STRUCTURE': {
文幕地方's avatar
文幕地方 已提交
269
        'PP-Structure': {
Z
zhoujun 已提交
270 271 272 273 274 275
            'table': {
                'en': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
                    'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
                }
文幕地方's avatar
文幕地方 已提交
276
            }
文幕地方's avatar
文幕地方 已提交
277
        },
文幕地方's avatar
文幕地方 已提交
278
        'PP-StructureV2': {
文幕地方's avatar
文幕地方 已提交
279 280
            'table': {
                'en': {
文幕地方's avatar
文幕地方 已提交
281 282
                    'url':
                    'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar',
文幕地方's avatar
文幕地方 已提交
283 284 285
                    'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
                },
                'ch': {
文幕地方's avatar
文幕地方 已提交
286 287 288
                    'url':
                    'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar',
                    'dict_path': 'ppocr/utils/dict/table_structure_dict_ch.txt'
文幕地方's avatar
文幕地方 已提交
289 290 291
                }
            },
            'layout': {
A
an1018 已提交
292
                'en': {
文幕地方's avatar
文幕地方 已提交
293
                    'url':
A
an1018 已提交
294
                    'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar',
Z
zhoujun 已提交
295 296
                    'dict_path':
                    'ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt'
A
an1018 已提交
297 298 299 300 301 302
                },
                'ch': {
                    'url':
                    'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar',
                    'dict_path':
                    'ppocr/utils/dict/layout_dict/layout_cdla_dict.txt'
文幕地方's avatar
文幕地方 已提交
303 304
                }
            }
W
WenmuZhou 已提交
305
        }
306
    }
W
WenmuZhou 已提交
307 308 309
}


W
WenmuZhou 已提交
310
def parse_args(mMain=True):
W
WenmuZhou 已提交
311
    import argparse
W
WenmuZhou 已提交
312 313 314 315 316
    parser = init_args()
    parser.add_help = mMain
    parser.add_argument("--lang", type=str, default='ch')
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
317
    parser.add_argument("--type", type=str, default='ocr')
Z
zhoujun 已提交
318 319 320
    parser.add_argument(
        "--ocr_version",
        type=str,
文幕地方's avatar
add hit  
文幕地方 已提交
321
        choices=SUPPORT_OCR_MODEL_VERSION,
322
        default='PP-OCRv3',
Z
zhoujun 已提交
323
        help='OCR Model version, the current model support list is as follows: '
324 325 326
        '1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model'
        '2. PP-OCRv2 Support Chinese detection and recognition model. '
        '3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
Z
zhoujun 已提交
327 328 329 330
    )
    parser.add_argument(
        "--structure_version",
        type=str,
文幕地方's avatar
add hit  
文幕地方 已提交
331
        choices=SUPPORT_STRUCTURE_MODEL_VERSION,
文幕地方's avatar
文幕地方 已提交
332
        default='PP-StructureV2',
Z
zhoujun 已提交
333
        help='Model version, the current model support list is as follows:'
文幕地方's avatar
文幕地方 已提交
334
        ' 1. PP-Structure Support en table structure model.'
文幕地方's avatar
文幕地方 已提交
335
        ' 2. PP-StructureV2 Support ch and en table structure model.')
W
WenmuZhou 已提交
336 337

    for action in parser._actions:
文幕地方's avatar
文幕地方 已提交
338 339 340
        if action.dest in [
                'rec_char_dict_path', 'table_char_dict_path', 'layout_dict_path'
        ]:
W
WenmuZhou 已提交
341
            action.default = None
W
WenmuZhou 已提交
342
    if mMain:
W
WenmuZhou 已提交
343
        return parser.parse_args()
W
WenmuZhou 已提交
344
    else:
345
        inference_args_dict = {}
W
WenmuZhou 已提交
346 347
        for action in parser._actions:
            inference_args_dict[action.dest] = action.default
348
        return argparse.Namespace(**inference_args_dict)
W
WenmuZhou 已提交
349 350


W
WenmuZhou 已提交
351 352
def parse_lang(lang):
    latin_lang = [
文幕地方's avatar
文幕地方 已提交
353 354 355
        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
A
andyjpaddle 已提交
356
        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
W
WenmuZhou 已提交
357 358 359
    ]
    arabic_lang = ['ar', 'fa', 'ug', 'ur']
    cyrillic_lang = [
文幕地方's avatar
文幕地方 已提交
360 361
        'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
W
WenmuZhou 已提交
362 363
    ]
    devanagari_lang = [
文幕地方's avatar
文幕地方 已提交
364 365
        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
        'sa', 'bgc'
W
WenmuZhou 已提交
366 367 368 369 370 371 372 373 374
    ]
    if lang in latin_lang:
        lang = "latin"
    elif lang in arabic_lang:
        lang = "arabic"
    elif lang in cyrillic_lang:
        lang = "cyrillic"
    elif lang in devanagari_lang:
        lang = "devanagari"
Z
zhoujun 已提交
375
    assert lang in MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION][
W
WenmuZhou 已提交
376
        'rec'], 'param lang must in {}, but got {}'.format(
Z
zhoujun 已提交
377
            MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION]['rec'].keys(), lang)
W
WenmuZhou 已提交
378 379 380 381
    if lang == "ch":
        det_lang = "ch"
    elif lang == 'structure':
        det_lang = 'structure'
A
andyjpaddle 已提交
382
    elif lang in ["en", "latin"]:
W
WenmuZhou 已提交
383
        det_lang = "en"
A
andyjpaddle 已提交
384 385
    else:
        det_lang = "ml"
W
WenmuZhou 已提交
386 387 388
    return lang, det_lang


Z
zhoujun 已提交
389 390 391 392 393 394 395
def get_model_config(type, version, model_type, lang):
    if type == 'OCR':
        DEFAULT_MODEL_VERSION = DEFAULT_OCR_MODEL_VERSION
    elif type == 'STRUCTURE':
        DEFAULT_MODEL_VERSION = DEFAULT_STRUCTURE_MODEL_VERSION
    else:
        raise NotImplementedError
文幕地方's avatar
文幕地方 已提交
396

Z
zhoujun 已提交
397 398
    model_urls = MODEL_URLS[type]
    if version not in model_urls:
文幕地方's avatar
文幕地方 已提交
399
        version = DEFAULT_MODEL_VERSION
Z
zhoujun 已提交
400 401
    if model_type not in model_urls[version]:
        if model_type in model_urls[DEFAULT_MODEL_VERSION]:
文幕地方's avatar
文幕地方 已提交
402 403 404
            version = DEFAULT_MODEL_VERSION
        else:
            logger.error('{} models is not support, we only support {}'.format(
Z
zhoujun 已提交
405
                model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
文幕地方's avatar
文幕地方 已提交
406
            sys.exit(-1)
文幕地方's avatar
add hit  
文幕地方 已提交
407

Z
zhoujun 已提交
408 409
    if lang not in model_urls[version][model_type]:
        if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
文幕地方's avatar
文幕地方 已提交
410 411 412 413
            version = DEFAULT_MODEL_VERSION
        else:
            logger.error(
                'lang {} is not support, we only support {} for {} models'.
Z
zhoujun 已提交
414
                format(lang, model_urls[DEFAULT_MODEL_VERSION][model_type].keys(
文幕地方's avatar
文幕地方 已提交
415 416
                ), model_type))
            sys.exit(-1)
Z
zhoujun 已提交
417
    return model_urls[version][model_type][lang]
文幕地方's avatar
文幕地方 已提交
418 419


V
vivien 已提交
420 421 422 423 424 425 426 427 428 429 430 431 432 433
def img_decode(content: bytes):
    np_arr = np.frombuffer(content, dtype=np.uint8)
    return cv2.imdecode(np_arr, cv2.IMREAD_COLOR)


def check_img(img):
    if isinstance(img, bytes):
        img = img_decode(img)
    if isinstance(img, str):
        # download net image
        if is_link(img):
            download_with_progressbar(img, 'tmp.jpg')
            img = 'tmp.jpg'
        image_file = img
A
andyjpaddle 已提交
434 435
        img, flag_gif, flag_pdf = check_and_read(image_file)
        if not flag_gif and not flag_pdf:
V
vivien 已提交
436
            with open(image_file, 'rb') as f:
A
andyj 已提交
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
                img_str = f.read()
                img = img_decode(img_str)
            if img is None:
                try:
                    buf = BytesIO()
                    image = BytesIO(img_str)
                    im = Image.open(image)
                    rgb = im.convert('RGB')
                    rgb.save(buf, 'jpeg')
                    buf.seek(0)
                    image_bytes = buf.read()
                    data_base64 = str(base64.b64encode(image_bytes),
                                      encoding="utf-8")
                    image_decode = base64.b64decode(data_base64)
                    img_array = np.frombuffer(image_decode, np.uint8)
                    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
                except:
                    logger.error("error in loading image:{}".format(image_file))
                    return None
V
vivien 已提交
456 457 458 459 460 461 462 463 464
        if img is None:
            logger.error("error in loading image:{}".format(image_file))
            return None
    if isinstance(img, np.ndarray) and len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

    return img


W
WenmuZhou 已提交
465
class PaddleOCR(predict_system.TextSystem):
466
    def __init__(self, **kwargs):
W
WenmuZhou 已提交
467 468 469 470 471
        """
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        """
W
WenmuZhou 已提交
472 473
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
文幕地方's avatar
add hit  
文幕地方 已提交
474 475
        assert params.ocr_version in SUPPORT_OCR_MODEL_VERSION, "ocr_version must in {}, but get {}".format(
            SUPPORT_OCR_MODEL_VERSION, params.ocr_version)
Z
zhoujun 已提交
476 477
        params.use_gpu = check_gpu(params.use_gpu)

W
WenmuZhou 已提交
478 479
        if not params.show_log:
            logger.setLevel(logging.INFO)
W
WenmuZhou 已提交
480
        self.use_angle_cls = params.use_angle_cls
W
WenmuZhou 已提交
481
        lang, det_lang = parse_lang(params.lang)
W
WenmuZhou 已提交
482

483
        # init model dir
Z
zhoujun 已提交
484 485
        det_model_config = get_model_config('OCR', params.ocr_version, 'det',
                                            det_lang)
文幕地方's avatar
文幕地方 已提交
486 487
        params.det_model_dir, det_url = confirm_model_dir_url(
            params.det_model_dir,
文幕地方's avatar
文幕地方 已提交
488
            os.path.join(BASE_DIR, 'whl', 'det', det_lang),
文幕地方's avatar
文幕地方 已提交
489
            det_model_config['url'])
Z
zhoujun 已提交
490 491
        rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
                                            lang)
文幕地方's avatar
文幕地方 已提交
492 493
        params.rec_model_dir, rec_url = confirm_model_dir_url(
            params.rec_model_dir,
文幕地方's avatar
文幕地方 已提交
494
            os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
Z
zhoujun 已提交
495 496
        cls_model_config = get_model_config('OCR', params.ocr_version, 'cls',
                                            'ch')
文幕地方's avatar
文幕地方 已提交
497 498
        params.cls_model_dir, cls_url = confirm_model_dir_url(
            params.cls_model_dir,
文幕地方's avatar
文幕地方 已提交
499
            os.path.join(BASE_DIR, 'whl', 'cls'), cls_model_config['url'])
A
andyjpaddle 已提交
500 501 502 503
        if params.ocr_version == 'PP-OCRv3':
            params.rec_image_shape = "3, 48, 320"
        else:
            params.rec_image_shape = "3, 32, 320"
X
Xie Xuyang 已提交
504 505 506 507 508
        # download model if using paddle infer
        if not params.use_onnx:
            maybe_download(params.det_model_dir, det_url)
            maybe_download(params.rec_model_dir, rec_url)
            maybe_download(params.cls_model_dir, cls_url)
W
WenmuZhou 已提交
509

W
WenmuZhou 已提交
510
        if params.det_algorithm not in SUPPORT_DET_MODEL:
W
WenmuZhou 已提交
511 512
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
            sys.exit(0)
W
WenmuZhou 已提交
513
        if params.rec_algorithm not in SUPPORT_REC_MODEL:
W
WenmuZhou 已提交
514 515
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
W
WenmuZhou 已提交
516 517

        if params.rec_char_dict_path is None:
文幕地方's avatar
文幕地方 已提交
518 519
            params.rec_char_dict_path = str(
                Path(__file__).parent / rec_model_config['dict_path'])
W
WenmuZhou 已提交
520

文幕地方's avatar
文幕地方 已提交
521
        logger.debug(params)
W
WenmuZhou 已提交
522
        # init det_model and rec_model
W
WenmuZhou 已提交
523
        super().__init__(params)
A
andyjpaddle 已提交
524
        self.page_num = params.page_num
W
WenmuZhou 已提交
525

526
    def ocr(self, img, det=True, rec=True, cls=True):
W
WenmuZhou 已提交
527 528 529 530
        """
        ocr with paddleocr
        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
fanruinet's avatar
fanruinet 已提交
531 532 533
            det: use text detection or not. If false, only rec will be exec. Default is True
            rec: use text recognition or not. If false, only det will be exec. Default is True
            cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
W
WenmuZhou 已提交
534
        """
V
vivien 已提交
535
        assert isinstance(img, (np.ndarray, list, str, bytes))
W
WenmuZhou 已提交
536 537 538
        if isinstance(img, list) and det == True:
            logger.error('When input a list of images, det must be false')
            exit(0)
539
        if cls == True and self.use_angle_cls == False:
W
WenmuZhou 已提交
540 541 542
            logger.warning(
                'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
            )
W
WenmuZhou 已提交
543

V
vivien 已提交
544
        img = check_img(img)
A
andyjpaddle 已提交
545 546 547 548 549 550 551
        # for infer pdf file
        if isinstance(img, list):
            if self.page_num > len(img) or self.page_num == 0:
                self.page_num = len(img)
            imgs = img[:self.page_num]
        else:
            imgs = [img]
W
WenmuZhou 已提交
552
        if det and rec:
A
andyjpaddle 已提交
553 554 555 556 557 558 559
            ocr_res = []
            for idx, img in enumerate(imgs):
                dt_boxes, rec_res, _ = self.__call__(img, cls)
                tmp_res = [[box.tolist(), res]
                           for box, res in zip(dt_boxes, rec_res)]
                ocr_res.append(tmp_res)
            return ocr_res
W
WenmuZhou 已提交
560
        elif det and not rec:
A
andyjpaddle 已提交
561 562 563 564 565 566
            ocr_res = []
            for idx, img in enumerate(imgs):
                dt_boxes, elapse = self.text_detector(img)
                tmp_res = [box.tolist() for box in dt_boxes]
                ocr_res.append(tmp_res)
            return ocr_res
W
WenmuZhou 已提交
567
        else:
A
andyjpaddle 已提交
568 569 570 571 572 573 574 575 576 577 578 579 580 581
            ocr_res = []
            cls_res = []
            for idx, img in enumerate(imgs):
                if not isinstance(img, list):
                    img = [img]
                if self.use_angle_cls and cls:
                    img, cls_res_tmp, elapse = self.text_classifier(img)
                    if not rec:
                        cls_res.append(cls_res_tmp)
                rec_res, elapse = self.text_recognizer(img)
                ocr_res.append(rec_res)
            if not rec:
                return cls_res
            return ocr_res
582 583


584
class PPStructure(StructureSystem):
585 586 587
    def __init__(self, **kwargs):
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
文幕地方's avatar
add hit  
文幕地方 已提交
588
        assert params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION, "structure_version must in {}, but get {}".format(
文幕地方's avatar
add hit  
文幕地方 已提交
589
            SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version)
Z
zhoujun 已提交
590
        params.use_gpu = check_gpu(params.use_gpu)
文幕地方's avatar
文幕地方 已提交
591
        params.mode = 'structure'
Z
zhoujun 已提交
592

593 594
        if not params.show_log:
            logger.setLevel(logging.INFO)
W
WenmuZhou 已提交
595
        lang, det_lang = parse_lang(params.lang)
文幕地方's avatar
文幕地方 已提交
596 597 598 599 600 601
        if lang == 'ch':
            table_lang = 'ch'
        else:
            table_lang = 'en'
        if params.structure_version == 'PP-Structure':
            params.merge_no_span_structure = False
W
WenmuZhou 已提交
602

603
        # init model dir
Z
zhoujun 已提交
604 605
        det_model_config = get_model_config('OCR', params.ocr_version, 'det',
                                            det_lang)
文幕地方's avatar
文幕地方 已提交
606 607
        params.det_model_dir, det_url = confirm_model_dir_url(
            params.det_model_dir,
文幕地方's avatar
文幕地方 已提交
608
            os.path.join(BASE_DIR, 'whl', 'det', det_lang),
文幕地方's avatar
文幕地方 已提交
609
            det_model_config['url'])
Z
zhoujun 已提交
610 611
        rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
                                            lang)
文幕地方's avatar
文幕地方 已提交
612 613
        params.rec_model_dir, rec_url = confirm_model_dir_url(
            params.rec_model_dir,
文幕地方's avatar
文幕地方 已提交
614
            os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
Z
zhoujun 已提交
615
        table_model_config = get_model_config(
文幕地方's avatar
文幕地方 已提交
616
            'STRUCTURE', params.structure_version, 'table', table_lang)
文幕地方's avatar
文幕地方 已提交
617 618
        params.table_model_dir, table_url = confirm_model_dir_url(
            params.table_model_dir,
文幕地方's avatar
文幕地方 已提交
619
            os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url'])
文幕地方's avatar
文幕地方 已提交
620
        layout_model_config = get_model_config(
A
an1018 已提交
621
            'STRUCTURE', params.structure_version, 'layout', lang)
文幕地方's avatar
文幕地方 已提交
622 623 624
        params.layout_model_dir, layout_url = confirm_model_dir_url(
            params.layout_model_dir,
            os.path.join(BASE_DIR, 'whl', 'layout'), layout_model_config['url'])
625 626 627 628
        # download model
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.table_model_dir, table_url)
文幕地方's avatar
文幕地方 已提交
629
        maybe_download(params.layout_model_dir, layout_url)
630 631

        if params.rec_char_dict_path is None:
文幕地方's avatar
文幕地方 已提交
632 633
            params.rec_char_dict_path = str(
                Path(__file__).parent / rec_model_config['dict_path'])
634
        if params.table_char_dict_path is None:
文幕地方's avatar
文幕地方 已提交
635 636
            params.table_char_dict_path = str(
                Path(__file__).parent / table_model_config['dict_path'])
文幕地方's avatar
文幕地方 已提交
637 638 639
        if params.layout_dict_path is None:
            params.layout_dict_path = str(
                Path(__file__).parent / layout_model_config['dict_path'])
文幕地方's avatar
文幕地方 已提交
640
        logger.debug(params)
641 642
        super().__init__(params)

A
an1018 已提交
643
    def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
V
vivien 已提交
644
        img = check_img(img)
A
an1018 已提交
645 646
        res, _ = super().__call__(
            img, return_ocr_result_in_table, img_idx=img_idx)
647 648 649
        return res


650
def main():
W
WenmuZhou 已提交
651
    # for cmd
W
WenmuZhou 已提交
652
    args = parse_args(mMain=True)
W
WenmuZhou 已提交
653
    image_dir = args.image_dir
654
    if is_link(image_dir):
W
WenmuZhou 已提交
655 656 657 658
        download_with_progressbar(image_dir, 'tmp.jpg')
        image_file_list = ['tmp.jpg']
    else:
        image_file_list = get_image_file_list(args.image_dir)
659 660 661
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
W
WenmuZhou 已提交
662
    if args.type == 'ocr':
663
        engine = PaddleOCR(**(args.__dict__))
W
WenmuZhou 已提交
664
    elif args.type == 'structure':
665 666 667
        engine = PPStructure(**(args.__dict__))
    else:
        raise NotImplementedError
W
WenmuZhou 已提交
668

669
    for img_path in image_file_list:
670
        img_name = os.path.basename(img_path).split('.')[0]
W
WenmuZhou 已提交
671
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
672 673
        if args.type == 'ocr':
            result = engine.ocr(img_path,
W
WenmuZhou 已提交
674 675 676
                                det=args.det,
                                rec=args.rec,
                                cls=args.use_angle_cls)
677
            if result is not None:
A
andyjpaddle 已提交
678 679 680 681
                for idx in range(len(result)):
                    res = result[idx]
                    for line in res:
                        logger.info(line)
682
        elif args.type == 'structure':
A
an1018 已提交
683 684 685
            img, flag_gif, flag_pdf = check_and_read(img_path)
            if not flag_gif and not flag_pdf:
                img = cv2.imread(img_path)
W
WenmuZhou 已提交
686

A
an1018 已提交
687 688 689 690 691 692 693 694 695 696
            if args.recovery and args.use_pdf2docx_api and flag_pdf:
                from pdf2docx.converter import Converter
                docx_file = os.path.join(args.output,
                                         '{}.docx'.format(img_name))
                cv = Converter(img_path)
                cv.convert(docx_file)
                cv.close()
                logger.info('docx save to {}'.format(docx_file))
                continue

A
an1018 已提交
697 698
            if not flag_pdf:
                if img is None:
V
vivien 已提交
699
                    logger.error("error in loading image:{}".format(img_path))
A
an1018 已提交
700 701 702 703 704 705 706
                    continue
                img_paths = [[img_path, img]]
            else:
                img_paths = []
                for index, pdf_img in enumerate(img):
                    os.makedirs(
                        os.path.join(args.output, img_name), exist_ok=True)
707 708 709
                    pdf_img_path = os.path.join(
                        args.output, img_name,
                        img_name + '_' + str(index) + '.jpg')
A
an1018 已提交
710 711 712 713 714 715 716 717 718 719 720 721 722 723
                    cv2.imwrite(pdf_img_path, pdf_img)
                    img_paths.append([pdf_img_path, pdf_img])

            all_res = []
            for index, (new_img_path, img) in enumerate(img_paths):
                logger.info('processing {}/{} page:'.format(index + 1,
                                                            len(img_paths)))
                new_img_name = os.path.basename(new_img_path).split('.')[0]
                result = engine(new_img_path, img_idx=index)
                save_structure_res(result, args.output, img_name, index)

                if args.recovery and result != []:
                    from copy import deepcopy
                    from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes
A
an1018 已提交
724
                    h, w, _ = img.shape
A
an1018 已提交
725 726 727 728 729 730 731
                    result_cp = deepcopy(result)
                    result_sorted = sorted_layout_boxes(result_cp, w)
                    all_res += result_sorted

            if args.recovery and all_res != []:
                try:
                    from ppstructure.recovery.recovery_to_doc import convert_info_docx
A
andyjpaddle 已提交
732
                    convert_info_docx(img, all_res, args.output, img_name)
A
an1018 已提交
733 734 735 736 737
                except Exception as ex:
                    logger.error(
                        "error in layout recovery image:{}, err msg: {}".format(
                            img_name, ex))
                    continue
A
andyjpaddle 已提交
738

A
an1018 已提交
739 740 741 742 743
            for item in all_res:
                item.pop('img')
                item.pop('res')
                logger.info(item)
            logger.info('result save to {}'.format(args.output))