From 3c912550b88e49f1a9bf6aaaeaea7cd449596a9d Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Mon, 6 Sep 2021 18:33:21 +0800 Subject: [PATCH] add 2.1 models to paddleocr whl --- paddleocr.py | 301 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 191 insertions(+), 110 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index 45c1a40d..de712442 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -33,104 +33,141 @@ from tools.infer.utility import draw_ocr, str2bool from ppstructure.utility import init_args, draw_structure_result from ppstructure.predict_system import OCRSystem, save_structure_res -__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar'] - -model_urls = { - 'det': { - 'ch': - 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar', - 'en': - 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar', - 'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar' +__all__ = [ + 'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', + 'save_structure_res', 'download_with_progressbar' +] + +SUPPORT_DET_MODEL = ['DB'] +VERSION = '2.2.1' +SUPPORT_REC_MODEL = ['CRNN'] +BASE_DIR = os.path.expanduser("~/.paddleocr/") + +DEFAULT_MODEL_VERSION = '2.0' +MODEL_URLS = { + '2.1': { + 'det': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.1/chinese/ch_ppocr_mobile_v2.1_det_infer.tar', + }, + }, + 'rec': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.1/chinese/ch_ppocr_mobile_v2.1_rec_infer.tar', + 'dict_path': './ppocr/utils/ppocr_keys_v1.txt' + } + } }, - 'rec': { - 'ch': { - 'url': - 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/ppocr_keys_v1.txt' + '2.0': { + 'det': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar', + }, + 'en': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar', + }, + 'structure': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar' + } }, - 'en': { - 'url': + 'rec': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar', + 'dict_path': './ppocr/utils/ppocr_keys_v1.txt' + }, + 'en': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/en_dict.txt' - }, - 'french': { - 'url': + 'dict_path': './ppocr/utils/en_dict.txt' + }, + 'french': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/french_dict.txt' - }, - 'german': { - 'url': + 'dict_path': './ppocr/utils/dict/french_dict.txt' + }, + 'german': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/german_dict.txt' - }, - 'korean': { - 'url': + 'dict_path': './ppocr/utils/dict/german_dict.txt' + }, + 'korean': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/korean_dict.txt' - }, - 'japan': { - 'url': + 'dict_path': './ppocr/utils/dict/korean_dict.txt' + }, + 'japan': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/japan_dict.txt' - }, - 'chinese_cht': { - 'url': + 'dict_path': './ppocr/utils/dict/japan_dict.txt' + }, + 'chinese_cht': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt' - }, - 'ta': { - 'url': + 'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt' + }, + 'ta': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/ta_dict.txt' - }, - 'te': { - 'url': + 'dict_path': './ppocr/utils/dict/ta_dict.txt' + }, + 'te': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/te_dict.txt' - }, - 'ka': { - 'url': + 'dict_path': './ppocr/utils/dict/te_dict.txt' + }, + 'ka': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/ka_dict.txt' - }, - 'latin': { - 'url': + 'dict_path': './ppocr/utils/dict/ka_dict.txt' + }, + 'latin': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/latin_dict.txt' - }, - 'arabic': { - 'url': + 'dict_path': './ppocr/utils/dict/latin_dict.txt' + }, + 'arabic': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/arabic_dict.txt' - }, - 'cyrillic': { - 'url': + 'dict_path': './ppocr/utils/dict/arabic_dict.txt' + }, + 'cyrillic': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/cyrillic_dict.txt' - }, - 'devanagari': { - 'url': + 'dict_path': './ppocr/utils/dict/cyrillic_dict.txt' + }, + 'devanagari': { + 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar', - 'dict_path': './ppocr/utils/dict/devanagari_dict.txt' + 'dict_path': './ppocr/utils/dict/devanagari_dict.txt' + }, + 'structure': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar', + 'dict_path': 'ppocr/utils/dict/table_dict.txt' + } + }, + 'cls': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar', + } }, - 'structure': { - 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar', - 'dict_path': 'ppocr/utils/dict/table_dict.txt' + 'table': { + 'en': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar', + 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt' + } } - }, - 'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar', - 'table': { - 'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar', - 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt' } } -SUPPORT_DET_MODEL = ['DB'] -VERSION = '2.2.0.1' -SUPPORT_REC_MODEL = ['CRNN'] -BASE_DIR = os.path.expanduser("~/.paddleocr/") - def parse_args(mMain=True): import argparse @@ -140,6 +177,7 @@ def parse_args(mMain=True): parser.add_argument("--det", type=str2bool, default=True) parser.add_argument("--rec", type=str2bool, default=True) parser.add_argument("--type", type=str, default='ocr') + parser.add_argument("--version", type=str, default='2.1') for action in parser._actions: if action.dest in ['rec_char_dict_path', 'table_char_dict_path']: @@ -155,19 +193,19 @@ def parse_args(mMain=True): def parse_lang(lang): latin_lang = [ - 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', - 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', - 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', - 'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi' + 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr', + 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl', + 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv', + 'sw', 'tl', 'tr', 'uz', 'vi' ] arabic_lang = ['ar', 'fa', 'ug', 'ur'] cyrillic_lang = [ - 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', - 'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab' + 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava', + 'dar', 'inh', 'che', 'lbe', 'lez', 'tab' ] devanagari_lang = [ - 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', - 'gom', 'sa', 'bgc' + 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', + 'sa', 'bgc' ] if lang in latin_lang: lang = "latin" @@ -177,9 +215,9 @@ def parse_lang(lang): lang = "cyrillic" elif lang in devanagari_lang: lang = "devanagari" - assert lang in model_urls[ + assert lang in MODEL_URLS[DEFAULT_MODEL_VERSION][ 'rec'], 'param lang must in {}, but got {}'.format( - model_urls['rec'].keys(), lang) + MODEL_URLS[DEFAULT_MODEL_VERSION]['rec'].keys(), lang) if lang == "ch": det_lang = "ch" elif lang == 'structure': @@ -189,6 +227,35 @@ def parse_lang(lang): return lang, det_lang +def get_model_config(version, model_type, lang): + if version not in MODEL_URLS: + logger.warning('version {} not in {}, use version {} instead'.format( + version, MODEL_URLS.keys(), DEFAULT_MODEL_VERSION)) + version = DEFAULT_MODEL_VERSION + if model_type not in MODEL_URLS[version]: + if model_type in MODEL_URLS[DEFAULT_MODEL_VERSION]: + logger.warning( + 'version {} not support {} models, use version {} instead'. + format(version, model_type, DEFAULT_MODEL_VERSION)) + version = DEFAULT_MODEL_VERSION + else: + logger.error('{} models is not support, we only support {}'.format( + model_type, MODEL_URLS[DEFAULT_MODEL_VERSION].keys())) + sys.exit(-1) + if lang not in MODEL_URLS[version][model_type]: + if lang in MODEL_URLS[DEFAULT_MODEL_VERSION][model_type]: + logger.warning('lang {} is not support in {}, use {} instead'. + format(lang, version, DEFAULT_MODEL_VERSION)) + version = DEFAULT_MODEL_VERSION + else: + logger.error( + 'lang {} is not support, we only support {} for {} models'. + format(lang, MODEL_URLS[DEFAULT_MODEL_VERSION][model_type].keys( + ), model_type)) + sys.exit(-1) + return MODEL_URLS[version][model_type][lang] + + class PaddleOCR(predict_system.TextSystem): def __init__(self, **kwargs): """ @@ -204,15 +271,21 @@ class PaddleOCR(predict_system.TextSystem): lang, det_lang = parse_lang(params.lang) # init model dir - params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang), - model_urls['det'][det_lang]) - params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang), - model_urls['rec'][lang]['url']) - params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'), - model_urls['cls']) + det_model_config = get_model_config(params.version, 'det', det_lang) + params.det_model_dir, det_url = confirm_model_dir_url( + params.det_model_dir, + os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang), + det_model_config['url']) + rec_model_config = get_model_config(params.version, 'rec', lang) + params.rec_model_dir, rec_url = confirm_model_dir_url( + params.rec_model_dir, + os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang), + rec_model_config['url']) + cls_model_config = get_model_config(params.version, 'cls', 'ch') + params.cls_model_dir, cls_url = confirm_model_dir_url( + params.cls_model_dir, + os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'), + cls_model_config['url']) # download model maybe_download(params.det_model_dir, det_url) maybe_download(params.rec_model_dir, rec_url) @@ -226,7 +299,8 @@ class PaddleOCR(predict_system.TextSystem): sys.exit(0) if params.rec_char_dict_path is None: - params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path']) + params.rec_char_dict_path = str( + Path(__file__).parent / rec_model_config['dict_path']) print(params) # init det_model and rec_model @@ -293,24 +367,32 @@ class PPStructure(OCRSystem): lang, det_lang = parse_lang(params.lang) # init model dir - params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang), - model_urls['det'][det_lang]) - params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang), - model_urls['rec'][lang]['url']) - params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'table'), - model_urls['table']['url']) + det_model_config = get_model_config(params.version, 'det', det_lang) + params.det_model_dir, det_url = confirm_model_dir_url( + params.det_model_dir, + os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang), + det_model_config['url']) + rec_model_config = get_model_config(params.version, 'rec', lang) + params.rec_model_dir, rec_url = confirm_model_dir_url( + params.rec_model_dir, + os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang), + rec_model_config['url']) + table_model_config = get_model_config(params.version, 'table', 'en') + params.table_model_dir, table_url = confirm_model_dir_url( + params.table_model_dir, + os.path.join(BASE_DIR, VERSION, 'ocr', 'table'), + table_model_config['url']) # download model maybe_download(params.det_model_dir, det_url) maybe_download(params.rec_model_dir, rec_url) maybe_download(params.table_model_dir, table_url) if params.rec_char_dict_path is None: - params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path']) + params.rec_char_dict_path = str( + Path(__file__).parent / rec_model_config['dict_path']) if params.table_char_dict_path is None: - params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path']) + params.table_char_dict_path = str( + Path(__file__).parent / table_model_config['dict_path']) print(params) super().__init__(params) @@ -374,4 +456,3 @@ def main(): for item in result: item.pop('img') logger.info(item) - -- GitLab