diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index d57f2ac3255a78b630e2ea4189ab182d5c7f71ba..17278926bbf072bd9708d2422d0a792065ff1fdb 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -416,4 +416,4 @@ im_show.save('result.jpg') | cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE | | show_log | 是否打印logger信息 | FALSE | | type | 执行ocr或者表格结构化, 值可选['ocr','structure'] | ocr | -| ocr_version | OCR模型版本,可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 目前仅支持中、英文的检测和识别模型,方向分类器模型;PP-OCRv2 目前仅支持中文的检测和识别模型;PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv3 | +| ocr_version | OCR模型版本,可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 支持中、英文的检测、识别、多语种识别,方向分类器等模型;PP-OCRv2 目前仅支持中文的检测和识别模型;PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv3 | diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 40a2e122d19679a59e7e65df29dd59781b4a2143..9f7e10d4ece14ec191b08950eef0304dfd0ae966 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -368,4 +368,4 @@ im_show.save('result.jpg') | cls | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction) | FALSE | | show_log | Whether to print log| FALSE | | type | Perform ocr or table structuring, the value is selected in ['ocr','structure'] | ocr | -| ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv3 support Chinese and English detection and recognition model and direction classifier model, PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv3 | +| ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv3 supports Chinese and English detection, recognition, multilingual recognition, direction classifier models, PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv3 | diff --git a/paddleocr.py b/paddleocr.py index 417350839ac4d1e512c7396831f89ab4b2d6c724..3ed4b47bf129889f388d07ca9c2ae0bc8ecda018 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -67,6 +67,10 @@ MODEL_URLS = { 'url': 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar', }, + 'ml': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar' + } }, 'rec': { 'ch': { @@ -79,6 +83,56 @@ MODEL_URLS = { 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar', 'dict_path': './ppocr/utils/en_dict.txt' }, + 'korean': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/korean_dict.txt' + }, + 'japan': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/japan_dict.txt' + }, + 'chinese_cht': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt' + }, + 'ta': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/ta_dict.txt' + }, + 'te': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/te_dict.txt' + }, + 'ka': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/ka_dict.txt' + }, + 'latin': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/latin_dict.txt' + }, + 'arabic': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/arabic_dict.txt' + }, + 'cyrillic': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/cyrillic_dict.txt' + }, + 'devanagari': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/dict/devanagari_dict.txt' + }, }, 'cls': { 'ch': { @@ -259,7 +313,7 @@ def parse_lang(lang): 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv', - 'sw', 'tl', 'tr', 'uz', 'vi' + 'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german' ] arabic_lang = ['ar', 'fa', 'ug', 'ur'] cyrillic_lang = [ @@ -285,8 +339,10 @@ def parse_lang(lang): det_lang = "ch" elif lang == 'structure': det_lang = 'structure' - else: + elif lang in ["en", "latin"]: det_lang = "en" + else: + det_lang = "ml" return lang, det_lang