paddleocr.py 15.3 KB
Newer Older
W
WenmuZhou 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys

__dir__ = os.path.dirname(__file__)
sys.path.append(os.path.join(__dir__, ''))

import cv2
W
WenmuZhou 已提交
22
import logging
W
WenmuZhou 已提交
23 24 25 26
import numpy as np
from pathlib import Path

from tools.infer import predict_system
W
WenmuZhou 已提交
27
from ppocr.utils.logging import get_logger
W
WenmuZhou 已提交
28

W
WenmuZhou 已提交
29
logger = get_logger()
30
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
31
from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
32 33 34
from tools.infer.utility import draw_ocr, str2bool
from ppstructure.utility import init_args, draw_structure_result
from ppstructure.predict_system import OCRSystem, save_structure_res
W
WenmuZhou 已提交
35

36
__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar']
W
WenmuZhou 已提交
37

W
WenmuZhou 已提交
38
model_urls = {
T
tink2123 已提交
39 40
    'det': {
        'ch':
W
WenmuZhou 已提交
41
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
T
tink2123 已提交
42
        'en':
43 44
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
        'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
T
tink2123 已提交
45
    },
W
WenmuZhou 已提交
46 47 48
    'rec': {
        'ch': {
            'url':
W
WenmuZhou 已提交
49
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
50 51 52 53
            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
        },
        'en': {
            'url':
W
WenmuZhou 已提交
54
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
55
            'dict_path': './ppocr/utils/en_dict.txt'
W
WenmuZhou 已提交
56 57 58
        },
        'french': {
            'url':
W
WenmuZhou 已提交
59
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
60 61 62 63
            'dict_path': './ppocr/utils/dict/french_dict.txt'
        },
        'german': {
            'url':
W
WenmuZhou 已提交
64
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
65 66 67 68
            'dict_path': './ppocr/utils/dict/german_dict.txt'
        },
        'korean': {
            'url':
W
WenmuZhou 已提交
69
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
70 71 72 73
            'dict_path': './ppocr/utils/dict/korean_dict.txt'
        },
        'japan': {
            'url':
W
WenmuZhou 已提交
74
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
75
            'dict_path': './ppocr/utils/dict/japan_dict.txt'
T
tink2123 已提交
76 77 78
        },
        'chinese_cht': {
            'url':
W
WenmuZhou 已提交
79
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
80 81 82 83
            'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
        },
        'ta': {
            'url':
W
WenmuZhou 已提交
84
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
85 86 87 88
            'dict_path': './ppocr/utils/dict/ta_dict.txt'
        },
        'te': {
            'url':
W
WenmuZhou 已提交
89
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
90 91 92 93
            'dict_path': './ppocr/utils/dict/te_dict.txt'
        },
        'ka': {
            'url':
W
WenmuZhou 已提交
94
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
95 96 97 98
            'dict_path': './ppocr/utils/dict/ka_dict.txt'
        },
        'latin': {
            'url':
W
WenmuZhou 已提交
99
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
100 101 102 103
            'dict_path': './ppocr/utils/dict/latin_dict.txt'
        },
        'arabic': {
            'url':
W
WenmuZhou 已提交
104
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
105 106 107 108
            'dict_path': './ppocr/utils/dict/arabic_dict.txt'
        },
        'cyrillic': {
            'url':
W
WenmuZhou 已提交
109
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
110 111 112 113
            'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
        },
        'devanagari': {
            'url':
W
WenmuZhou 已提交
114
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
115
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
116 117 118 119
        },
        'structure': {
            'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
            'dict_path': 'ppocr/utils/dict/table_dict.txt'
W
WenmuZhou 已提交
120 121
        }
    },
122 123 124 125 126
    'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
    'table': {
        'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
        'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
    }
W
WenmuZhou 已提交
127 128 129
}

SUPPORT_DET_MODEL = ['DB']
W
WenmuZhou 已提交
130
VERSION = '2.2.0.1'
131 132
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
W
WenmuZhou 已提交
133 134


W
WenmuZhou 已提交
135
def parse_args(mMain=True):
W
WenmuZhou 已提交
136
    import argparse
W
WenmuZhou 已提交
137 138 139 140 141
    parser = init_args()
    parser.add_help = mMain
    parser.add_argument("--lang", type=str, default='ch')
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
142
    parser.add_argument("--type", type=str, default='ocr')
W
WenmuZhou 已提交
143 144

    for action in parser._actions:
145
        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
W
WenmuZhou 已提交
146
            action.default = None
W
WenmuZhou 已提交
147
    if mMain:
W
WenmuZhou 已提交
148
        return parser.parse_args()
W
WenmuZhou 已提交
149
    else:
150
        inference_args_dict = {}
W
WenmuZhou 已提交
151 152
        for action in parser._actions:
            inference_args_dict[action.dest] = action.default
153
        return argparse.Namespace(**inference_args_dict)
W
WenmuZhou 已提交
154 155


W
WenmuZhou 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
def parse_lang(lang):
    latin_lang = [
        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
        'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
        'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk',
        'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
    ]
    arabic_lang = ['ar', 'fa', 'ug', 'ur']
    cyrillic_lang = [
        'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd',
        'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
    ]
    devanagari_lang = [
        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new',
        'gom', 'sa', 'bgc'
    ]
    if lang in latin_lang:
        lang = "latin"
    elif lang in arabic_lang:
        lang = "arabic"
    elif lang in cyrillic_lang:
        lang = "cyrillic"
    elif lang in devanagari_lang:
        lang = "devanagari"
    assert lang in model_urls[
        'rec'], 'param lang must in {}, but got {}'.format(
        model_urls['rec'].keys(), lang)
    if lang == "ch":
        det_lang = "ch"
    elif lang == 'structure':
        det_lang = 'structure'
    else:
        det_lang = "en"
    return lang, det_lang


W
WenmuZhou 已提交
192
class PaddleOCR(predict_system.TextSystem):
193
    def __init__(self, **kwargs):
W
WenmuZhou 已提交
194 195 196 197 198
        """
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        """
W
WenmuZhou 已提交
199 200
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
W
WenmuZhou 已提交
201 202
        if not params.show_log:
            logger.setLevel(logging.INFO)
W
WenmuZhou 已提交
203
        self.use_angle_cls = params.use_angle_cls
W
WenmuZhou 已提交
204
        lang, det_lang = parse_lang(params.lang)
W
WenmuZhou 已提交
205

206
        # init model dir
207
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
208
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
209 210
                                                              model_urls['det'][det_lang])
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
211
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
212 213
                                                              model_urls['rec'][lang]['url'])
        params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
214
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
215
                                                              model_urls['cls'])
W
WenmuZhou 已提交
216
        # download model
217 218 219
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.cls_model_dir, cls_url)
W
WenmuZhou 已提交
220

W
WenmuZhou 已提交
221
        if params.det_algorithm not in SUPPORT_DET_MODEL:
W
WenmuZhou 已提交
222 223
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
            sys.exit(0)
W
WenmuZhou 已提交
224
        if params.rec_algorithm not in SUPPORT_REC_MODEL:
W
WenmuZhou 已提交
225 226
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
W
WenmuZhou 已提交
227 228 229

        if params.rec_char_dict_path is None:
            params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path'])
W
WenmuZhou 已提交
230

W
WenmuZhou 已提交
231
        print(params)
W
WenmuZhou 已提交
232
        # init det_model and rec_model
W
WenmuZhou 已提交
233
        super().__init__(params)
W
WenmuZhou 已提交
234

235
    def ocr(self, img, det=True, rec=True, cls=True):
W
WenmuZhou 已提交
236 237 238 239 240 241 242 243
        """
        ocr with paddleocr
        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not, if false, only rec will be exec. default is True
            rec: use text recognition or not, if false, only det will be exec. default is True
        """
        assert isinstance(img, (np.ndarray, list, str))
W
WenmuZhou 已提交
244 245 246
        if isinstance(img, list) and det == True:
            logger.error('When input a list of images, det must be false')
            exit(0)
247
        if cls == True and self.use_angle_cls == False:
W
WenmuZhou 已提交
248 249 250
            logger.warning(
                'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
            )
W
WenmuZhou 已提交
251

W
WenmuZhou 已提交
252
        if isinstance(img, str):
W
WenmuZhou 已提交
253 254 255 256
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
W
WenmuZhou 已提交
257 258 259
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
260 261 262
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
W
WenmuZhou 已提交
263 264 265
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
W
WenmuZhou 已提交
266 267
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
W
WenmuZhou 已提交
268
        if det and rec:
269
            dt_boxes, rec_res = self.__call__(img, cls)
W
WenmuZhou 已提交
270 271 272 273 274 275 276 277 278
            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
        elif det and not rec:
            dt_boxes, elapse = self.text_detector(img)
            if dt_boxes is None:
                return None
            return [box.tolist() for box in dt_boxes]
        else:
            if not isinstance(img, list):
                img = [img]
279
            if self.use_angle_cls and cls:
W
WenmuZhou 已提交
280 281 282
                img, cls_res, elapse = self.text_classifier(img)
                if not rec:
                    return cls_res
W
WenmuZhou 已提交
283 284
            rec_res, elapse = self.text_recognizer(img)
            return rec_res
285 286


287 288 289 290 291 292
class PPStructure(OCRSystem):
    def __init__(self, **kwargs):
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
        if not params.show_log:
            logger.setLevel(logging.INFO)
W
WenmuZhou 已提交
293 294
        lang, det_lang = parse_lang(params.lang)

295 296
        # init model dir
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
W
WenmuZhou 已提交
297 298
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
                                                              model_urls['det'][det_lang])
299
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
W
WenmuZhou 已提交
300 301
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
                                                              model_urls['rec'][lang]['url'])
302
        params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
W
WenmuZhou 已提交
303
                                                                  os.path.join(BASE_DIR, VERSION, 'ocr', 'table'),
304 305 306 307 308 309 310
                                                                  model_urls['table']['url'])
        # download model
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.table_model_dir, table_url)

        if params.rec_char_dict_path is None:
W
WenmuZhou 已提交
311
            params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path'])
312
        if params.table_char_dict_path is None:
W
WenmuZhou 已提交
313
            params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path'])
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339

        print(params)
        super().__init__(params)

    def __call__(self, img):
        if isinstance(img, str):
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        res = super().__call__(img)
        return res


340
def main():
W
WenmuZhou 已提交
341
    # for cmd
W
WenmuZhou 已提交
342
    args = parse_args(mMain=True)
W
WenmuZhou 已提交
343
    image_dir = args.image_dir
344
    if is_link(image_dir):
W
WenmuZhou 已提交
345 346 347 348
        download_with_progressbar(image_dir, 'tmp.jpg')
        image_file_list = ['tmp.jpg']
    else:
        image_file_list = get_image_file_list(args.image_dir)
349 350 351
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
W
WenmuZhou 已提交
352
    if args.type == 'ocr':
353
        engine = PaddleOCR(**(args.__dict__))
W
WenmuZhou 已提交
354
    elif args.type == 'structure':
355 356 357
        engine = PPStructure(**(args.__dict__))
    else:
        raise NotImplementedError
W
WenmuZhou 已提交
358

359
    for img_path in image_file_list:
360
        img_name = os.path.basename(img_path).split('.')[0]
W
WenmuZhou 已提交
361
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
362 363
        if args.type == 'ocr':
            result = engine.ocr(img_path,
W
WenmuZhou 已提交
364 365 366
                                det=args.det,
                                rec=args.rec,
                                cls=args.use_angle_cls)
367 368 369 370 371
            if result is not None:
                for line in result:
                    logger.info(line)
        elif args.type == 'structure':
            result = engine(img_path)
W
WenmuZhou 已提交
372 373
            save_structure_res(result, args.output, img_name)

374
            for item in result:
W
WenmuZhou 已提交
375
                item.pop('img')
M
MissPenguin 已提交
376 377
                logger.info(item)