Merge remote-tracking branch 'origin/dygraph' into dy1

22ad94aa · qq_25193841 · 1a0a75e3 · e6c62504 · 22ad94aa · 1a0a75e3
14 changed file
--- a/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
+++ b/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
@@ -12,7 +12,7 @@ Global:
  checkpoints:
  save_inference_dir:
  use_visualdl: false
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: ./doc/imgs_words/arabic/ar_2.jpg
  character_dict_path: ppocr/utils/dict/arabic_dict.txt
  max_text_length: &max_text_length 25
  infer_mode: false

--- a/doc/overview_en.png
+++ b/doc/overview_en.png
--- a/doc/ppocr_v3/svtr_tiny.jpg
+++ b/doc/ppocr_v3/svtr_tiny.jpg
--- a/ppocr/modeling/backbones/vqa_layoutlm.py
+++ b/ppocr/modeling/backbones/vqa_layoutlm.py
@@ -29,14 +29,14 @@ __all__ = ["LayoutXLMForSer", "LayoutLMForSer"]
 pretrained_model_dict = {
    LayoutXLMModel: {
        "base": "layoutxlm-base-uncased",
-        "vi": "layoutxlm-wo-backbone-base-uncased",
+        "vi": "vi-layoutxlm-base-uncased",
    },
    LayoutLMModel: {
        "base": "layoutlm-base-uncased",
    },
    LayoutLMv2Model: {
        "base": "layoutlmv2-base-uncased",
-        "vi": "layoutlmv2-wo-backbone-base-uncased",
+        "vi": "vi-layoutlmv2-base-uncased",
    },
 }


--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -45,6 +45,27 @@ class BaseRecLabelDecode(object):
            self.dict[char] = i
        self.character = dict_character

+        if 'arabic' in character_dict_path:
+            self.reverse = True
+        else:
+            self.reverse = False
+
+    def pred_reverse(self, pred):
+        pred_re = []
+        c_current = ''
+        for c in pred:
+            if not bool(re.search('[a-zA-Z0-9 :*./%+-]', c)):
+                if c_current != '':
+                    pred_re.append(c_current)
+                pred_re.append(c)
+                c_current = ''
+            else:
+                c_current += c
+        if c_current != '':
+            pred_re.append(c_current)
+
+        return ''.join(pred_re[::-1])
+
    def add_special_char(self, dict_character):
        return dict_character

@@ -73,6 +94,10 @@ class BaseRecLabelDecode(object):
                conf_list = [0]

            text = ''.join(char_list)
+
+            if self.reverse:  # for arabic rec
+                text = self.pred_reverse(text)
+
            result_list.append((text, np.mean(conf_list).tolist()))
        return result_list


--- a/ppocr/utils/dict/arabic_dict.txt
+++ b/ppocr/utils/dict/arabic_dict.txt
- 
 !
 #
 $

--- a/ppstructure/docs/models_list_en.md
+++ b/ppstructure/docs/models_list_en.md
@@ -13,7 +13,7 @@
 |model name| description                                                                                                                                             | inference model size                                                                                                                         |download|dict path|
 | --- |---------------------------------------------------------------------------------------------------------------------------------------------------------| --- | --- | --- |
 | picodet_lcnet_x1_0_fgd_layout | The layout analysis English model trained on the PubLayNet dataset based on PicoDet LCNet_x1_0 and FGD . the model can recognition 5 types of areas such as **Text, Title, Table, Picture and List** | 9.7M | [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout.pdparams) | [PubLayNet dict](../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt) |
-| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis English model trained on the PubLayNet dataset based on PP-YOLOv2 | 221M | [inference_moel]](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) / [trained model](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet_pretrained.pdparams) | sme as above |
+| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis English model trained on the PubLayNet dataset based on PP-YOLOv2 | 221M | [inference_moel](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) / [trained model](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet_pretrained.pdparams) | same as above |
 | picodet_lcnet_x1_0_fgd_layout_cdla | The layout analysis Chinese model trained on the CDLA dataset, the model can recognition 10 types of areas such as **Table、Figure、Figure caption、Table、Table caption、Header、Footer、Reference、Equation** | 9.7M | [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla.pdparams) | [CDLA dict](../../ppocr/utils/dict/layout_dict/layout_cdla_dict.txt) |
 | picodet_lcnet_x1_0_fgd_layout_table | The layout analysis model trained on the table dataset, the model can detect tables in Chinese and English documents                     | 9.7M                                                  | [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_table_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_table.pdparams) | [Table dict](../../ppocr/utils/dict/layout_dict/layout_table_dict.txt) |
 | ppyolov2_r50vd_dcn_365e_tableBank_word | The layout analysis model trained on the TableBank Word dataset based on PP-YOLOv2, the model can detect  tables  in English documents | 221M | [inference model](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) | same as above |

--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
@@ -48,7 +48,7 @@
 pip3 install "paddleocr>=2.6"

 # 安装 图像方向分类依赖包paddleclas（如不需要图像方向分类功能，可跳过）
-pip3 install paddleclas
+pip3 install paddleclas>=2.4.3

 # 安装 关键信息抽取 依赖包（如不需要KIE功能，可跳过）
 pip3 install -r ppstructure/kie/requirements.txt

--- a/ppstructure/docs/quickstart_en.md
+++ b/ppstructure/docs/quickstart_en.md
@@ -50,7 +50,7 @@ For more software version requirements, please refer to the instructions in [Ins
 pip3 install "paddleocr>=2.6"

 # Install the image direction classification dependency package paddleclas (if you do not use the image direction classification, you can skip it)
-pip3 install paddleclas
+pip3 install paddleclas>=2.4.3

 # Install the KIE dependency packages (if you do not use the KIE, you can skip it)
 pip3 install -r kie/requirements.txt

--- a/ppstructure/layout/README.md
+++ b/ppstructure/layout/README.md
--- a/ppstructure/layout/README_ch.md
+++ b/ppstructure/layout/README_ch.md
+简体中文 | [English](README.md)
+
+# 版面分析
+
 - [1. 简介](#1-简介)
 - [2. 安装](#2-安装)
  - [2.1 安装PaddlePaddle](#21-安装paddlepaddle)
@@ -15,8 +19,6 @@
  - [6.1 模型导出](#61-模型导出)
  - [6.2 模型推理](#62-模型推理)

-# 版面分析
-
 ## 1. 简介

 版面分析指的是对图片形式的文档进行区域划分，定位其中的关键区域，如文字、标题、表格、图片等。版面分析算法基于[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)的轻量模型PP-PicoDet进行开发。
@@ -37,10 +39,10 @@
 python3 -m pip install --upgrade pip

 # GPU安装
-python3 -m pip install "paddlepaddle-gpu>=2.2" -i https://mirror.baidu.com/pypi/simple
+python3 -m pip install "paddlepaddle-gpu>=2.3" -i https://mirror.baidu.com/pypi/simple

 # CPU安装
-python3 -m pip install "paddlepaddle>=2.2" -i https://mirror.baidu.com/pypi/simple
+python3 -m pip install "paddlepaddle>=2.3" -i https://mirror.baidu.com/pypi/simple
 ```
 更多需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。


--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
@@ -66,7 +66,7 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR

 - **(2) Install recovery's `requirements`**

-The layout restoration is exported as docx and PDF files, so python-docx and docx2pdf API need to be installed, and fitz and PyMuPDF apis need to be installed to process the input files in pdf format.
+The layout restoration is exported as docx and PDF files, so python-docx and docx2pdf API need to be installed, and PyMuPDF api([requires Python >= 3.7](https://pypi.org/project/PyMuPDF/)) need to be installed to process the input files in pdf format.

 ```bash
 python3 -m pip install -r ppstructure/recovery/requirements.txt

--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
@@ -68,7 +68,7 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR

 - **（2）安装recovery的`requirements`**

-版面恢复导出为docx、pdf文件，所以需要安装python-docx、docx2pdf API，同时处理pdf格式的输入文件，需要安装fitz、PyMuPDF API。
+版面恢复导出为docx、pdf文件，所以需要安装python-docx、docx2pdf API，同时处理pdf格式的输入文件，需要安装PyMuPDF API([要求Python >= 3.7](https://pypi.org/project/PyMuPDF/))。

 ```bash
 python3 -m pip install -r ppstructure/recovery/requirements.txt

--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
 python-docx
 docx2pdf
-fitz
-PyMuPDF==1.16.14
+PyMuPDF
 beautifulsoup4
\ No newline at end of file