paddleocr.py 16.0 KB
Newer Older
W
WenmuZhou 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys

__dir__ = os.path.dirname(__file__)
sys.path.append(os.path.join(__dir__, ''))

import cv2
W
WenmuZhou 已提交
22
import logging
W
WenmuZhou 已提交
23 24 25 26
import numpy as np
from pathlib import Path

from tools.infer import predict_system
W
WenmuZhou 已提交
27
from ppocr.utils.logging import get_logger
W
WenmuZhou 已提交
28

W
WenmuZhou 已提交
29
logger = get_logger()
30
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
31
from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
32 33 34
from tools.infer.utility import draw_ocr, str2bool
from ppstructure.utility import init_args, draw_structure_result
from ppstructure.predict_system import OCRSystem, save_structure_res
W
WenmuZhou 已提交
35

36
__all__ = ['PaddleOCR','PPStructure','draw_ocr','draw_structure_result','save_structure_res']
W
WenmuZhou 已提交
37

W
WenmuZhou 已提交
38
model_urls = {
T
tink2123 已提交
39 40
    'det': {
        'ch':
W
WenmuZhou 已提交
41
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
T
tink2123 已提交
42
        'en':
43 44
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
        'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
T
tink2123 已提交
45
    },
W
WenmuZhou 已提交
46 47 48
    'rec': {
        'ch': {
            'url':
W
WenmuZhou 已提交
49
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
50 51 52 53
            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
        },
        'en': {
            'url':
W
WenmuZhou 已提交
54
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
55
            'dict_path': './ppocr/utils/en_dict.txt'
W
WenmuZhou 已提交
56 57 58
        },
        'french': {
            'url':
W
WenmuZhou 已提交
59
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
60 61 62 63
            'dict_path': './ppocr/utils/dict/french_dict.txt'
        },
        'german': {
            'url':
W
WenmuZhou 已提交
64
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
65 66 67 68
            'dict_path': './ppocr/utils/dict/german_dict.txt'
        },
        'korean': {
            'url':
W
WenmuZhou 已提交
69
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
70 71 72 73
            'dict_path': './ppocr/utils/dict/korean_dict.txt'
        },
        'japan': {
            'url':
W
WenmuZhou 已提交
74
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
W
WenmuZhou 已提交
75
            'dict_path': './ppocr/utils/dict/japan_dict.txt'
T
tink2123 已提交
76 77 78
        },
        'chinese_cht': {
            'url':
W
WenmuZhou 已提交
79
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
80 81 82 83
            'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
        },
        'ta': {
            'url':
W
WenmuZhou 已提交
84
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
85 86 87 88
            'dict_path': './ppocr/utils/dict/ta_dict.txt'
        },
        'te': {
            'url':
W
WenmuZhou 已提交
89
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
90 91 92 93
            'dict_path': './ppocr/utils/dict/te_dict.txt'
        },
        'ka': {
            'url':
W
WenmuZhou 已提交
94
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
95 96 97 98
            'dict_path': './ppocr/utils/dict/ka_dict.txt'
        },
        'latin': {
            'url':
W
WenmuZhou 已提交
99
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
100 101 102 103
            'dict_path': './ppocr/utils/dict/latin_dict.txt'
        },
        'arabic': {
            'url':
W
WenmuZhou 已提交
104
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
105 106 107 108
            'dict_path': './ppocr/utils/dict/arabic_dict.txt'
        },
        'cyrillic': {
            'url':
W
WenmuZhou 已提交
109
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
110 111 112 113
            'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
        },
        'devanagari': {
            'url':
W
WenmuZhou 已提交
114
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
T
tink2123 已提交
115
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
116 117 118 119
        },
        'structure': {
            'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
            'dict_path': 'ppocr/utils/dict/table_dict.txt'
W
WenmuZhou 已提交
120 121
        }
    },
122 123 124 125 126
    'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
    'table': {
        'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
        'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
    }
W
WenmuZhou 已提交
127 128 129
}

SUPPORT_DET_MODEL = ['DB']
T
tink2123 已提交
130
VERSION = '2.1'
131 132
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
W
WenmuZhou 已提交
133 134


W
WenmuZhou 已提交
135
def parse_args(mMain=True):
W
WenmuZhou 已提交
136
    import argparse
W
WenmuZhou 已提交
137 138 139 140 141
    parser = init_args()
    parser.add_help = mMain
    parser.add_argument("--lang", type=str, default='ch')
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
142
    parser.add_argument("--type", type=str, default='ocr')
W
WenmuZhou 已提交
143 144

    for action in parser._actions:
145
        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
W
WenmuZhou 已提交
146
            action.default = None
W
WenmuZhou 已提交
147
    if mMain:
W
WenmuZhou 已提交
148
        return parser.parse_args()
W
WenmuZhou 已提交
149
    else:
150
        inference_args_dict = {}
W
WenmuZhou 已提交
151 152
        for action in parser._actions:
            inference_args_dict[action.dest] = action.default
153
        return argparse.Namespace(**inference_args_dict)
W
WenmuZhou 已提交
154 155 156


class PaddleOCR(predict_system.TextSystem):
157
    def __init__(self, **kwargs):
W
WenmuZhou 已提交
158 159 160 161 162
        """
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        """
W
WenmuZhou 已提交
163 164
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
W
WenmuZhou 已提交
165 166
        if not params.show_log:
            logger.setLevel(logging.INFO)
W
WenmuZhou 已提交
167 168
        self.use_angle_cls = params.use_angle_cls
        lang = params.lang
T
tink2123 已提交
169
        latin_lang = [
T
tink2123 已提交
170 171 172 173
            'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
            'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
            'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk',
            'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
T
tink2123 已提交
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
        ]
        arabic_lang = ['ar', 'fa', 'ug', 'ur']
        cyrillic_lang = [
            'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd',
            'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
        ]
        devanagari_lang = [
            'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new',
            'gom', 'sa', 'bgc'
        ]
        if lang in latin_lang:
            lang = "latin"
        elif lang in arabic_lang:
            lang = "arabic"
        elif lang in cyrillic_lang:
            lang = "cyrillic"
        elif lang in devanagari_lang:
            lang = "devanagari"
W
WenmuZhou 已提交
192 193
        assert lang in model_urls[
            'rec'], 'param lang must in {}, but got {}'.format(
W
WenmuZhou 已提交
194
            model_urls['rec'].keys(), lang)
T
tink2123 已提交
195 196 197 198
        if lang == "ch":
            det_lang = "ch"
        else:
            det_lang = "en"
W
WenmuZhou 已提交
199
        use_inner_dict = False
W
WenmuZhou 已提交
200
        if params.rec_char_dict_path is None:
W
WenmuZhou 已提交
201
            use_inner_dict = True
W
WenmuZhou 已提交
202
            params.rec_char_dict_path = model_urls['rec'][lang][
W
WenmuZhou 已提交
203
                'dict_path']
W
WenmuZhou 已提交
204

205
        # init model dir
206
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
207
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
208 209
                                                              model_urls['det'][det_lang])
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
210
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
211 212
                                                              model_urls['rec'][lang]['url'])
        params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
213
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
214
                                                              model_urls['cls'])
W
WenmuZhou 已提交
215
        # download model
216 217 218
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.cls_model_dir, cls_url)
W
WenmuZhou 已提交
219

W
WenmuZhou 已提交
220
        if params.det_algorithm not in SUPPORT_DET_MODEL:
W
WenmuZhou 已提交
221 222
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
            sys.exit(0)
W
WenmuZhou 已提交
223
        if params.rec_algorithm not in SUPPORT_REC_MODEL:
W
WenmuZhou 已提交
224 225
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
W
WenmuZhou 已提交
226
        if use_inner_dict:
W
WenmuZhou 已提交
227 228
            params.rec_char_dict_path = str(
                Path(__file__).parent / params.rec_char_dict_path)
W
WenmuZhou 已提交
229

W
WenmuZhou 已提交
230
        print(params)
W
WenmuZhou 已提交
231
        # init det_model and rec_model
W
WenmuZhou 已提交
232
        super().__init__(params)
W
WenmuZhou 已提交
233

234
    def ocr(self, img, det=True, rec=True, cls=True):
W
WenmuZhou 已提交
235 236 237 238 239 240 241 242
        """
        ocr with paddleocr
        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not, if false, only rec will be exec. default is True
            rec: use text recognition or not, if false, only det will be exec. default is True
        """
        assert isinstance(img, (np.ndarray, list, str))
W
WenmuZhou 已提交
243 244 245
        if isinstance(img, list) and det == True:
            logger.error('When input a list of images, det must be false')
            exit(0)
246
        if cls == True and self.use_angle_cls == False:
W
WenmuZhou 已提交
247 248 249
            logger.warning(
                'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
            )
W
WenmuZhou 已提交
250

W
WenmuZhou 已提交
251
        if isinstance(img, str):
W
WenmuZhou 已提交
252 253 254 255
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
W
WenmuZhou 已提交
256 257 258
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
259 260 261
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
W
WenmuZhou 已提交
262 263 264
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
W
WenmuZhou 已提交
265 266
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
W
WenmuZhou 已提交
267
        if det and rec:
268
            dt_boxes, rec_res = self.__call__(img, cls)
W
WenmuZhou 已提交
269 270 271 272 273 274 275 276 277
            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
        elif det and not rec:
            dt_boxes, elapse = self.text_detector(img)
            if dt_boxes is None:
                return None
            return [box.tolist() for box in dt_boxes]
        else:
            if not isinstance(img, list):
                img = [img]
278
            if self.use_angle_cls and cls:
W
WenmuZhou 已提交
279 280 281
                img, cls_res, elapse = self.text_classifier(img)
                if not rec:
                    return cls_res
W
WenmuZhou 已提交
282 283
            rec_res, elapse = self.text_recognizer(img)
            return rec_res
284 285


286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
class PPStructure(OCRSystem):
    def __init__(self, **kwargs):
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
        if not params.show_log:
            logger.setLevel(logging.INFO)
        params.use_angle_cls = False
        # init model dir
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
                                                              os.path.join(BASE_DIR, VERSION, 'structure', 'det'),
                                                              model_urls['det']['structure'])
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
                                                              os.path.join(BASE_DIR, VERSION, 'structure', 'rec'),
                                                              model_urls['rec']['structure']['url'])
        params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
                                                                  os.path.join(BASE_DIR, VERSION, 'structure', 'table'),
                                                                  model_urls['table']['url'])
        # download model
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.table_model_dir, table_url)

        if params.rec_char_dict_path is None:
            params.rec_char_type = 'EN'
            if os.path.exists(str(Path(__file__).parent / model_urls['rec']['structure']['dict_path'])):
                params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec']['structure']['dict_path'])
            else:
                params.rec_char_dict_path = str(Path(__file__).parent.parent / model_urls['rec']['structure']['dict_path'])
        if params.table_char_dict_path is None:
            if os.path.exists(str(Path(__file__).parent / model_urls['table']['dict_path'])):
                params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path'])
            else:
                params.table_char_dict_path = str(Path(__file__).parent.parent / model_urls['table']['dict_path'])

        print(params)
        super().__init__(params)

    def __call__(self, img):
        if isinstance(img, str):
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        res = super().__call__(img)
        return res


345
def main():
W
WenmuZhou 已提交
346
    # for cmd
W
WenmuZhou 已提交
347
    args = parse_args(mMain=True)
W
WenmuZhou 已提交
348
    image_dir = args.image_dir
349
    if is_link(image_dir):
W
WenmuZhou 已提交
350 351 352 353
        download_with_progressbar(image_dir, 'tmp.jpg')
        image_file_list = ['tmp.jpg']
    else:
        image_file_list = get_image_file_list(args.image_dir)
354 355 356
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
357 358 359 360 361 362
    if args.type=='ocr':
        engine = PaddleOCR(**(args.__dict__))
    elif args.type=='structure':
        engine = PPStructure(**(args.__dict__))
    else:
        raise NotImplementedError
W
WenmuZhou 已提交
363

364
    for img_path in image_file_list:
365
        img_name = os.path.basename(img_path).split('.')[0]
W
WenmuZhou 已提交
366
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
367 368 369 370 371 372 373 374 375 376 377 378 379
        if args.type == 'ocr':
            result = engine.ocr(img_path,
                                    det=args.det,
                                    rec=args.rec,
                                    cls=args.use_angle_cls)
            if result is not None:
                for line in result:
                    logger.info(line)
        elif args.type == 'structure':
            result = engine(img_path)
            for item in result:
                logger.info(item['res'])
            save_structure_res(result, args.output, img_name)