add desc of version params (#3929)

* add desc of version params

add desc of version params (#3929)
* add desc of version params
96c8c004 · zhoujun · GitHub · b6a21419 · 96c8c004 · 96c8c004
7 changed file
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,5 +5,6 @@ recursive-include ppocr/utils *.txt utility.py logging.py network.py
 recursive-include ppocr/data *.py
 recursive-include ppocr/postprocess *.py
 recursive-include tools/infer *.py
+recursive-include tools __init__.py
 recursive-include ppocr/utils/e2e_utils *.py
 recursive-include ppstructure *.py
\ No newline at end of file
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -420,3 +420,5 @@ im_show.save('result.jpg')
 | cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
 | show_log                     | 是否打印det和rec等信息                                                                                                                                                                                                | FALSE                    |
 | type                     | 执行ocr或者表格结构化, 值可选['ocr','structure']                                                                                                                                                                                             | ocr                    |
+| ocr_version                     | OCR模型版本，可选PP-OCRv2, PP-OCR。PP-OCRv2 目前仅支持中文的检测和识别模型，PP-OCR支持中文的检测，识别，多语种识别，方向分类器等模型                                                                                                                                        | PP-OCRv2                   |
+| structure_version                     | 表格结构化模型版本，可选 STRUCTURE。STRUCTURE支持表格结构化模型                                                                                                                                                                                        | STRUCTURE                    |
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -367,3 +367,5 @@ im_show.save('result.jpg')
 | cls                     | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction)                                                                                                                                                                                                   | FALSE                    |
 | show_log                     | Whether to print log in det and rec | FALSE                    |
 | type                     | Perform ocr or table structuring, the value is selected in ['ocr','structure']                                                                                                                                                                                             | ocr                    |
+| ocr_version                     | OCR Model version number, the current model support list is as follows: PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv2                 |
+| structure_version                     | table structure Model version number, the current model support list is as follows: STRUCTURE support english table structure model | STRUCTURE                 |
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -16,6 +16,9 @@ import os
 import sys

 __dir__ = os.path.dirname(__file__)
+
+import paddle
+
 sys.path.append(os.path.join(__dir__, ''))

 import cv2
@@ -29,7 +32,7 @@ from ppocr.utils.logging import get_logger
 logger = get_logger()
 from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
-from tools.infer.utility import draw_ocr, str2bool
+from tools.infer.utility import draw_ocr, str2bool, check_gpu
 from ppstructure.utility import init_args, draw_structure_result
 from ppstructure.predict_system import OCRSystem, save_structure_res

@@ -39,13 +42,15 @@ __all__ = [
 ]

 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.2.1'
+VERSION = '2.3.0.1'
 SUPPORT_REC_MODEL = ['CRNN']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")

-DEFAULT_MODEL_VERSION = '2.0'
+DEFAULT_OCR_MODEL_VERSION = 'PP-OCR'
+DEFAULT_STRUCTURE_MODEL_VERSION = 'STRUCTURE'
 MODEL_URLS = {
-    '2.1': {
+    'OCR': {
+        'PP-OCRv2': {
            'det': {
                'ch': {
                    'url':
@@ -60,7 +65,7 @@ MODEL_URLS = {
                }
            }
        },
-    '2.0': {
+        DEFAULT_OCR_MODEL_VERSION: {
            'det': {
                'ch': {
                    'url':
@@ -158,6 +163,10 @@ MODEL_URLS = {
                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
                }
            },
+        }
+    },
+    'STRUCTURE': {
+        DEFAULT_STRUCTURE_MODEL_VERSION: {
            'table': {
                'en': {
                    'url':
@@ -166,6 +175,7 @@ MODEL_URLS = {
                }
            }
        }
+    }
 }


@@ -177,7 +187,20 @@ def parse_args(mMain=True):
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
    parser.add_argument("--type", type=str, default='ocr')
-    parser.add_argument("--version", type=str, default='2.1')
+    parser.add_argument(
+        "--ocr_version",
+        type=str,
+        default='PP-OCRv2',
+        help='OCR Model version, the current model support list is as follows: '
+        '1. PP-OCRv2 Support Chinese detection and recognition model. '
+        '2. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
+    )
+    parser.add_argument(
+        "--structure_version",
+        type=str,
+        default='STRUCTURE',
+        help='Model version, the current model support list is as follows:'
+        ' 1. STRUCTURE Support en table structure model.')

    for action in parser._actions:
        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
@@ -215,9 +238,9 @@ def parse_lang(lang):
        lang = "cyrillic"
    elif lang in devanagari_lang:
        lang = "devanagari"
-    assert lang in MODEL_URLS[DEFAULT_MODEL_VERSION][
+    assert lang in MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION][
        'rec'], 'param lang must in {}, but got {}'.format(
-            MODEL_URLS[DEFAULT_MODEL_VERSION]['rec'].keys(), lang)
+            MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION]['rec'].keys(), lang)
    if lang == "ch":
        det_lang = "ch"
    elif lang == 'structure':
@@ -227,33 +250,41 @@ def parse_lang(lang):
    return lang, det_lang


-def get_model_config(version, model_type, lang):
-    if version not in MODEL_URLS:
-        logger.warning('version {} not in {}, use version {} instead'.format(
-            version, MODEL_URLS.keys(), DEFAULT_MODEL_VERSION))
+def get_model_config(type, version, model_type, lang):
+    if type == 'OCR':
+        DEFAULT_MODEL_VERSION = DEFAULT_OCR_MODEL_VERSION
+    elif type == 'STRUCTURE':
+        DEFAULT_MODEL_VERSION = DEFAULT_STRUCTURE_MODEL_VERSION
+    else:
+        raise NotImplementedError
+    model_urls = MODEL_URLS[type]
+    if version not in model_urls:
+        logger.warning('version {} not in {}, auto switch to version {}'.format(
+            version, model_urls.keys(), DEFAULT_MODEL_VERSION))
        version = DEFAULT_MODEL_VERSION
-    if model_type not in MODEL_URLS[version]:
-        if model_type in MODEL_URLS[DEFAULT_MODEL_VERSION]:
+    if model_type not in model_urls[version]:
+        if model_type in model_urls[DEFAULT_MODEL_VERSION]:
            logger.warning(
-                'version {} not support {} models, use version {} instead'.
+                'version {} not support {} models, auto switch to version {}'.
                format(version, model_type, DEFAULT_MODEL_VERSION))
            version = DEFAULT_MODEL_VERSION
        else:
            logger.error('{} models is not support, we only support {}'.format(
-                model_type, MODEL_URLS[DEFAULT_MODEL_VERSION].keys()))
+                model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
            sys.exit(-1)
-    if lang not in MODEL_URLS[version][model_type]:
-        if lang in MODEL_URLS[DEFAULT_MODEL_VERSION][model_type]:
-            logger.warning('lang {} is not support in {}, use {} instead'.
+    if lang not in model_urls[version][model_type]:
+        if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
+            logger.warning(
+                'lang {} is not support in {}, auto switch to version {}'.
                format(lang, version, DEFAULT_MODEL_VERSION))
            version = DEFAULT_MODEL_VERSION
        else:
            logger.error(
                'lang {} is not support, we only support {} for {} models'.
-                format(lang, MODEL_URLS[DEFAULT_MODEL_VERSION][model_type].keys(
+                format(lang, model_urls[DEFAULT_MODEL_VERSION][model_type].keys(
                ), model_type))
            sys.exit(-1)
-    return MODEL_URLS[version][model_type][lang]
+    return model_urls[version][model_type][lang]


 class PaddleOCR(predict_system.TextSystem):
@@ -265,23 +296,28 @@ class PaddleOCR(predict_system.TextSystem):
        """
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
+        params.use_gpu = check_gpu(params.use_gpu)
+
        if not params.show_log:
            logger.setLevel(logging.INFO)
        self.use_angle_cls = params.use_angle_cls
        lang, det_lang = parse_lang(params.lang)

        # init model dir
-        det_model_config = get_model_config(params.version, 'det', det_lang)
+        det_model_config = get_model_config('OCR', params.ocr_version, 'det',
+                                            det_lang)
        params.det_model_dir, det_url = confirm_model_dir_url(
            params.det_model_dir,
            os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
            det_model_config['url'])
-        rec_model_config = get_model_config(params.version, 'rec', lang)
+        rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
+                                            lang)
        params.rec_model_dir, rec_url = confirm_model_dir_url(
            params.rec_model_dir,
            os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
            rec_model_config['url'])
-        cls_model_config = get_model_config(params.version, 'cls', 'ch')
+        cls_model_config = get_model_config('OCR', params.ocr_version, 'cls',
+                                            'ch')
        params.cls_model_dir, cls_url = confirm_model_dir_url(
            params.cls_model_dir,
            os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
@@ -362,22 +398,27 @@ class PPStructure(OCRSystem):
    def __init__(self, **kwargs):
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
+        params.use_gpu = check_gpu(params.use_gpu)
+
        if not params.show_log:
            logger.setLevel(logging.INFO)
        lang, det_lang = parse_lang(params.lang)

        # init model dir
-        det_model_config = get_model_config(params.version, 'det', det_lang)
+        det_model_config = get_model_config('OCR', params.ocr_version, 'det',
+                                            det_lang)
        params.det_model_dir, det_url = confirm_model_dir_url(
            params.det_model_dir,
            os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
            det_model_config['url'])
-        rec_model_config = get_model_config(params.version, 'rec', lang)
+        rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
+                                            lang)
        params.rec_model_dir, rec_url = confirm_model_dir_url(
            params.rec_model_dir,
            os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
            rec_model_config['url'])
-        table_model_config = get_model_config(params.version, 'table', 'en')
+        table_model_config = get_model_config(
+            'STRUCTURE', params.structure_version, 'table', 'en')
        params.table_model_dir, table_url = confirm_model_dir_url(
            params.table_model_dir,
            os.path.join(BASE_DIR, VERSION, 'ocr', 'table'),

--- a/ppocr/utils/network.py
+++ b/ppocr/utils/network.py
@@ -24,15 +24,17 @@ from ppocr.utils.logging import get_logger
 def download_with_progressbar(url, save_path):
    logger = get_logger()
    response = requests.get(url, stream=True)
-    total_size_in_bytes = int(response.headers.get('content-length', 0))
+    if response.status_code == 200:
+        total_size_in_bytes = int(response.headers.get('content-length', 1))
        block_size = 1024  # 1 Kibibyte
-    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+        progress_bar = tqdm(
+            total=total_size_in_bytes, unit='iB', unit_scale=True)
        with open(save_path, 'wb') as file:
            for data in response.iter_content(block_size):
                progress_bar.update(len(data))
                file.write(data)
        progress_bar.close()
-    if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
+    else:
        logger.error("Something went wrong while downloading models")
        sys.exit(0)


--- a/tools/__init__.py
+++ b/tools/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -17,7 +17,7 @@ import os
 import sys
 import cv2
 import numpy as np
-import json
+import paddle
 from PIL import Image, ImageDraw, ImageFont
 import math
 from paddle import inference
@@ -601,5 +601,12 @@ def get_rotate_crop_image(img, points):
    return dst_img


+def check_gpu(use_gpu):
+    if use_gpu and not paddle.is_compiled_with_cuda():
+
+        use_gpu = False
+    return use_gpu
+
+
 if __name__ == '__main__':
    pass