merge upstream

78d5d87b · 文幕地方 · c6319565 · 9555713f · 78d5d87b · c6319565
5 changed file
--- a/README_ch.md
+++ b/README_ch.md
@@ -140,11 +140,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
    - [文本识别算法](./doc/doc_ch/algorithm_overview.md#12-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
    - [端到端算法](./doc/doc_ch/algorithm_overview.md#2-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
    - [使用PaddleOCR架构添加新算法](./doc/doc_ch/add_new_algorithm.md)
- [场景应用](./doc/doc_ch/application.md)
-    - [金融场景（表单/票据等）]()
-    - [工业场景（电表度数/车牌等）]()
-    - [教育场景（手写体/公式等）]()
-    - [医疗场景（化验单等）]()
+- [场景应用](./applications)
 - 数据标注与合成
    - [半自动标注工具PPOCRLabel](./PPOCRLabel/README_ch.md)
    - [数据合成工具Style-Text](./StyleText/README_ch.md)

--- a/doc/doc_ch/application.md
+++ b/doc/doc_ch/application.md
-# 场景应用
\ No newline at end of file
--- a/doc/doc_en/ocr_book_en.md
+++ b/doc/doc_en/ocr_book_en.md
 # E-book: *Dive Into OCR*

-"Dive Into OCR" is a textbook that combines OCR theory and practice, written by the PaddleOCR team, Chen Zhineng, a Junior Research Fellow at Fudan University, Huang Wenhui, a senior expert in the field of vision at China Mobile Research Institute, and other industry-university-research colleagues, as well as OCR developers. The main features are as follows:
+"Dive Into OCR" is a textbook that combines OCR theory and practice, written by the PaddleOCR team, Chen Zhineng, a Pre-tenure Professor at Fudan University, Huang Wenhui, a senior expert in the field of vision at China Mobile Research Institute, and other industry-university-research colleagues, as well as OCR developers. The main features are as follows:

 - OCR full-stack technology covering text detection, recognition and document analysis
 - Closely integrate theory and practice, cross the code implementation gap, and supporting instructional videos

--- a/paddleocr.py
+++ b/paddleocr.py
@@ -47,8 +47,8 @@ __all__ = [
 ]

 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.5.0.1'
-SUPPORT_REC_MODEL = ['CRNN']
+VERSION = '2.5.0.3'
+SUPPORT_REC_MODEL = ['CRNN', 'SVTR_LCNet']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")

 DEFAULT_OCR_MODEL_VERSION = 'PP-OCRv3'

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -79,7 +79,7 @@ def init_args():
    parser.add_argument("--det_fce_box_type", type=str, default='poly')

    # params for text recognizer
-    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
+    parser.add_argument("--rec_algorithm", type=str, default='SVTR_LCNet')
    parser.add_argument("--rec_model_dir", type=str)
    parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
    parser.add_argument("--rec_batch_num", type=int, default=6)
@@ -269,11 +269,11 @@ def create_predictor(args, mode, logger):
                max_input_shape.update(max_pact_shape)
                opt_input_shape.update(opt_pact_shape)
            elif mode == "rec":
-                if args.rec_algorithm != "CRNN":
+                if args.rec_algorithm not in ["CRNN", "SVTR_LCNet"]:
                    use_dynamic_shape = False
                imgH = int(args.rec_image_shape.split(',')[-2])
                min_input_shape = {"x": [1, 3, imgH, 10]}
-                max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]}
+                max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 2304]}
                opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
            elif mode == "cls":
                min_input_shape = {"x": [1, 3, 48, 10]}
@@ -320,7 +320,7 @@ def create_predictor(args, mode, logger):
 def get_output_tensors(args, mode, predictor):
    output_names = predictor.get_output_names()
    output_tensors = []
-    if mode == "rec" and args.rec_algorithm == "CRNN":
+    if mode == "rec" and args.rec_algorithm in ["CRNN", "SVTR_LCNet"]:
        output_name = 'softmax_0.tmp_0'
        if output_name in output_names:
            return [predictor.get_output_handle(output_name)]