Merge https://github.com/PaddlePaddle/PaddleOCR into dygraph

cf03889b · weishengyu · 5a5017fe · 011104e0 · cf03889b · cf03889b
31 changed file
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.preButton.setIconSize(QSize(40, 100))
        self.preButton.clicked.connect(self.openPrevImg)
        self.preButton.setStyleSheet('border: none;')
+        self.preButton.setShortcut('a')
        self.iconlist = QListWidget()
        self.iconlist.setViewMode(QListView.IconMode)
        self.iconlist.setFlow(QListView.TopToBottom)
@@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin):
        self.nextButton.setIconSize(QSize(40, 100))
        self.nextButton.setStyleSheet('border: none;')
        self.nextButton.clicked.connect(self.openNextImg)
+        self.nextButton.setShortcut('d')
        hlayout.addWidget(self.preButton)
        hlayout.addWidget(self.iconlist)
        hlayout.addWidget(self.nextButton)
-        # self.setLayout(hlayout)
        iconListContainer = QWidget()
        iconListContainer.setLayout(hlayout)
@@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin):
        opendir = action(getStr('openDir'), self.openDirDialog,
                         'Ctrl+u', 'open', getStr('openDir'))
-        openNextImg = action(getStr('nextImg'), self.openNextImg,
-                             'd', 'next', getStr('nextImgDetail'))
-        openPrevImg = action(getStr('prevImg'), self.openPrevImg,
-                             'a', 'prev', getStr('prevImgDetail'))
        save = action(getStr('save'), self.saveFile,
                      'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
@@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin):
        alcm = action(getStr('choosemodel'), self.autolcm,
                                        'Ctrl+M', 'next', getStr('tipchoosemodel'))
-        deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+D', 'close', getStr('deleteImgDetail'),
+        deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
                           enabled=True)
        resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail'))
@@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin):
                        'w', 'new', getStr('crtBoxDetail'), enabled=False)
        delete = action(getStr('delBox'), self.deleteSelectedShape,
-                        'Delete', 'delete', getStr('delBoxDetail'), enabled=False)
+                        'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
        copy = action(getStr('dupBox'), self.copySelectedShape,
                      'Ctrl+C', 'copy', getStr('dupBoxDetail'),
                      enabled=False)
@@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin):
        reRec = action(getStr('reRecognition'), self.reRecognition, 
                      'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
+        singleRere = action(getStr('singleRe'), self.singleRerecognition,
+                            'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
        createpoly = action(getStr('creatPolygon'), self.createPolygon,
-                            'p', 'new', 'Creat Polygon', enabled=True)
+                            'q', 'new', 'Creat Polygon', enabled=True)
        saveRec = action(getStr('saveRec'), self.saveRecResult,
                            '', 'save', getStr('saveRec'), enabled=False)
@@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin):
                                icon='color', tip=getStr('shapeFillColorDetail'),
                                enabled=False)
        # Label list context menu.
        labelMenu = QMenu()
        addActions(labelMenu, (edit, delete))
@@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin):
        # Draw squares/rectangles
        self.drawSquaresOption = QAction(getStr('drawSquares'), self)
-        self.drawSquaresOption.setShortcut('Ctrl+Shift+R')
        self.drawSquaresOption.setCheckable(True)
        self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False))
        self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
@@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin):
        # Store actions for further handling.
        self.actions = struct(save=save,  open=open,  resetAll=resetAll, deleteImg=deleteImg,
                              lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
-                              saveRec=saveRec,
+                              saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
                              createMode=createMode, editMode=editMode,
                              shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
                              zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
@@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin):
                              fileMenuActions=(
                                  open, opendir, saveLabel,  resetAll, quit),
                              beginner=(), advanced=(),
-                              editMenu=(createpoly, edit, copy, delete,
+                              editMenu=(createpoly, edit, copy, delete,singleRere,
                                        None, color1, self.drawSquaresOption),
-                              beginnerContext=(create, edit, copy, delete),
+                              beginnerContext=(create, edit, copy, delete, singleRere),
                              advancedContext=(createMode, editMode, edit, copy,
                                               delete, shapeLineColor, shapeFillColor),
                              onLoadActive=(
@@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin):
            zoomIn, zoomOut, zoomOrg, None,
            fitWindow, fitWidth))
-        addActions(self.menus.autolabel, (alcm, None, help)) #
+        addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
        self.menus.file.aboutToShow.connect(self.updateFileMenu)
@@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin):
            action('&Copy here', self.copyShape),
            action('&Move here', self.moveShape)))
        self.statusBar().showMessage('%s started.' % __appname__)
        self.statusBar().show()
@@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.actions.edit.setEnabled(selected)
        self.actions.shapeLineColor.setEnabled(selected)
        self.actions.shapeFillColor.setEnabled(selected)
+        self.actions.singleRere.setEnabled(selected)
    def addLabel(self, shape):
        shape.paintLabel = self.displayLabelOption.isChecked()
@@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin):
        self.updateComboBox()
        self.canvas.loadShapes(s)
+    def singleLabel(self, shape):
+        if shape is None:
+            # print('rm empty label')
+            return
+        item = self.shapesToItems[shape]
+        item.setText(shape.label)
+        self.updateComboBox()
+        # ADD:
+        item = self.shapesToItemsbox[shape]
+        item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
+        self.updateComboBox()
    def updateComboBox(self):
        # Get the unique labels and add them to the Combobox.
        itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
@@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin):
        self.haveAutoReced = False
        self.AutoRecognition.setEnabled(True)
        self.reRecogButton.setEnabled(True)
+        self.actions.AutoRec.setEnabled(True)
+        self.actions.reRec.setEnabled(True)
        self.actions.saveLabel.setEnabled(True)
@@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.loadFile(self.filePath) # ADD
        self.haveAutoReced = True
        self.AutoRecognition.setEnabled(False)
+        self.actions.AutoRec.setEnabled(False)
        self.setDirty()
        self.saveCacheLabel()
@@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin):
        else:
            QMessageBox.information(self, "Information", "Draw a box!")
+    def singleRerecognition(self):
+        img = cv2.imread(self.filePath)
+        shape = self.canvas.selectedShape
+        box = [[int(p.x()), int(p.y())] for p in shape.points]
+        assert len(box) == 4
+        img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
+        if img_crop is None:
+            msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
+            QMessageBox.information(self, "Information", msg)
+            return
+        result = self.ocr.ocr(img_crop, cls=True, det=False)
+        if result[0][0] is not '':
+            result.insert(0, box)
+            print('result in reRec is ', result)
+            if result[1][0] == shape.label:
+                print('label no change')
+            else:
+                shape.label = result[1][0]
+            self.singleLabel(shape)
+            self.setDirty()
+        print(box)
    def autolcm(self):
        vbox = QVBoxLayout()
@@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.dialog.exec_()
        if self.filePath:
            self.AutoRecognition.setEnabled(True)
+            self.actions.AutoRec.setEnabled(True)
    def modelChoose(self):

--- a/PPOCRLabel/README.md
+++ b/PPOCRLabel/README.md
@@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I
 <img src="./data/gif/steps_en.gif" width="100%"/>
+### Recent Update
+- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
 ## Installation
 ### 1. Install PaddleOCR
@@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch
 ## Explanation
+### Shortcut keys
+| Shortcut keys    | Description                                      |
+| ---------------- | ------------------------------------------------ |
+| Ctrl + shift + A | Automatically label all unchecked images         |
+| Ctrl + shift + R | Re-recognize all the labels of the current image |
+| W                | Create a rect box                                |
+| Q                | Create a four-points box                         |
+| Ctrl + E         | Edit label of the selected box                   |
+| Ctrl + R         | Re-recognize the selected box                    |
+| Backspace        | Delete the selected box                          |
+| Ctrl + V         | Check image                                      |
+| Ctrl + Shift + d | Delete image                                     |
+| D                | Next image                                       |
+| A                | Previous image                                   |
+| Ctrl++           | Zoom in                                          |
+| Ctrl--           | Zoom out                                         |
+| ↑→↓←             | Move selected box                                |
 ### Built-in Model
 - Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
+- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese. 
  For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
 - Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)

--- a/PPOCRLabel/README_ch.md
+++ b/PPOCRLabel/README_ch.md
@@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具，使用p
 <img src="./data/gif/steps.gif" width="100%"/>
+#### 近期更新
+- 2020.12.18: 支持对单个标记框进行重新识别（by [ninetailskim](https://github.com/ninetailskim) ），完善快捷键。
 ## 安装
 ### 1. 安装PaddleOCR
@@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch
 |   crop_img    |   识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。   |
 ## 说明
+### 快捷键
+| 快捷键           | 说明                         |
+| ---------------- | ---------------------------- |
+| Ctrl + shift + A | 自动标注所有未确认过的图片   |
+| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
+| W                | 新建矩形框                   |
+| Q                | 新建四点框                   |
+| Ctrl + E         | 编辑所选框标签               |
+| Ctrl + R         | 重新识别所选标记             |
+| Backspace        | 删除所选框                   |
+| Ctrl + V         | 确认本张图片标记             |
+| Ctrl + Shift + d | 删除本张图片                 |
+| D                | 下一张图片                   |
+| A                | 上一张图片                   |
+| Ctrl++           | 缩小                         |
+| Ctrl--           | 放大                         |
+| ↑→↓←             | 移动标记框                   |
 ### 内置模型
 - 默认模型：PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型，支持中英文与数字识别，多种语言检测。

--- a/PPOCRLabel/libs/autoDialog.py
+++ b/PPOCRLabel/libs/autoDialog.py
@@ -46,8 +46,9 @@ class Worker(QThread):
                            chars = res[1][0]
                            cond = res[1][1]
                            posi = res[0]
-                            strs += "Transcription: " + chars + " Probability: " + str(
+                            strs += "Transcription: " + chars + " Probability: " + str(cond) + \
-                                cond) + " Location: " + json.dumps(posi) + '\n'
+                                    " Location: " + json.dumps(posi) +'\n'
+                        # Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
                        self.listValue.emit(strs)
                        self.mainThread.result_dic = self.result_dic
                        self.mainThread.filePath = Imgpath

--- a/PPOCRLabel/libs/resources.py
+++ b/PPOCRLabel/libs/resources.py
--- a/PPOCRLabel/resources/strings/strings-zh-CN.properties
+++ b/PPOCRLabel/resources/strings/strings-zh-CN.properties
@@ -94,4 +94,5 @@ ok=确认
 autolabeling=自动标注中
 hideBox=隐藏所有标注
 showBox=显示所有标注
 saveLabel=保存标记结果
\ No newline at end of file
+singleRe=重识别此区块
\ No newline at end of file
--- a/PPOCRLabel/resources/strings/strings-zh-TW.properties
+++ b/PPOCRLabel/resources/strings/strings-zh-TW.properties
-saveAsDetail=將標籤保存到其他文件
-changeSaveDir=改變存放目錄
-openFile=開啟檔案
-shapeLineColorDetail=更改線條顏色
-resetAll=重置
-crtBox=創建區塊
-crtBoxDetail=畫一個區塊
-dupBoxDetail=複製區塊
-verifyImg=驗證圖像
-zoominDetail=放大
-verifyImgDetail=驗證圖像
-saveDetail=將標籤存到
-openFileDetail=打開圖像
-fitWidthDetail=調整到窗口寬度
-tutorial=YouTube教學
-editLabel=編輯標籤
-openAnnotationDetail=打開標籤文件
-quit=結束
-shapeFillColorDetail=更改填充顏色
-closeCurDetail=關閉目前檔案
-closeCur=關閉
-deleteImg=刪除圖像
-deleteImgDetail=刪除目前圖像
-fitWin=調整到跟窗口一樣大小
-delBox=刪除選取區塊
-boxLineColorDetail=選擇框線顏色
-originalsize=原始大小
-resetAllDetail=重設所有設定
-zoomoutDetail=畫面放大
-save=儲存
-saveAs=另存為
-fitWinDetail=縮放到窗口一樣
-openDir=開啟目錄
-copyPrevBounding=複製當前圖像中的上一個邊界框
-showHide=顯示/隱藏標籤
-changeSaveFormat=更改儲存格式
-shapeFillColor=填充顏色
-quitApp=離開本程式
-dupBox=複製區塊
-delBoxDetail=刪除區塊
-zoomin=放大畫面
-info=資訊
-openAnnotation=開啟標籤
-prevImgDetail=上一個圖像
-fitWidth=縮放到跟畫面一樣寬
-zoomout=縮小畫面
-changeSavedAnnotationDir=更改預設標籤存的目錄
-nextImgDetail=下一個圖像
-originalsizeDetail=放大到原始大小
-prevImg=上一個圖像
-tutorialDetail=顯示示範內容
-shapeLineColor=形狀線條顏色
-boxLineColor=日期分隔線顏色
-editLabelDetail=修改所選區塊的標籤
-nextImg=下一張圖片
-useDefaultLabel=使用預設標籤
-useDifficult=有難度的
-boxLabelText=區塊的標籤
-labels=標籤
-autoSaveMode=自動儲存模式
-singleClsMode=單一類別模式
-displayLabel=顯示類別
-fileList=檔案清單
-files=檔案
-iconList=XX
-icon=XX
-advancedMode=進階模式
-advancedModeDetail=切到進階模式
-showAllBoxDetail=顯示所有區塊
-hideAllBoxDetail=隱藏所有區塊
--- a/PPOCRLabel/resources/strings/strings.properties
+++ b/PPOCRLabel/resources/strings/strings.properties
@@ -94,4 +94,5 @@ ok=OK
 autolabeling=Automatic Labeling
 hideBox=Hide All Box
 showBox=Show All Box
 saveLabel=Save Label
\ No newline at end of file
+singleRe=Re-recognition RectBox
\ No newline at end of file
--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -69,12 +69,14 @@ fusion_generator:
 1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
 ```python
-python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
+python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
 ```
 * Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
-* Note 2: Synth-Text is mainly used to generate images for OCR recognition models. 
+* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
  So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
+* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
 For example, enter the following image and corpus `PaddleOCR`.
@@ -139,9 +141,10 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
 2. You can run the following command to start synthesis task:
   ``` bash
-   python -m tools.synth_dataset.py -c configs/dataset_config.yml
+   python3 tools/synth_dataset.py -c configs/dataset_config.yml
   ```
-We also provide example corpus and images in `examples` folder. 
+We also provide example corpus and images in `examples` folder.
    <div align="center">
        <img src="examples/style_images/1.jpg" width="300">
        <img src="examples/style_images/2.jpg" width="300">

--- a/StyleText/README_ch.md
+++ b/StyleText/README_ch.md
@@ -61,11 +61,12 @@ fusion_generator:
 输入一张风格图和一段文字语料，运行tools/synth_image，合成单张图片，结果图像保存在当前目录下：
 ```python
-python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
+python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
 ```
 * 注1：语言选项和语料相对应，目前该工具只支持英文、简体中文和韩语。
 * 注2：Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计，我们主要支持高度在32左右的风格图像。
  如果输入图像尺寸相差过多，效果可能不佳。
+* 注3：可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
 例如，输入如下图片和语料"PaddleOCR":
@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
 2. 运行`tools/synth_dataset`合成数据：
   ``` bash
-   python -m tools.synth_dataset -c configs/dataset_config.yml
+   python tools/synth_dataset.py -c configs/dataset_config.yml
   ```
   我们在examples目录下提供了样例图片和语料。
    <div align="center">

--- a/StyleText/engine/predictors.py
+++ b/StyleText/engine/predictors.py
@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
                             ], "Generator {} not supported.".format(algorithm)
        use_gpu = config["Global"]['use_gpu']
        check_gpu(use_gpu)
+        paddle.set_device('gpu' if use_gpu else 'cpu')
        self.logger = get_logger()
        self.generator = getattr(style_text_rec, algorithm)(config)
        self.height = config["Global"]["image_height"]

--- a/StyleText/tools/synth_dataset.py
+++ b/StyleText/tools/synth_dataset.py
@@ -11,6 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
 from engine.synthesisers import DatasetSynthesiser

--- a/StyleText/tools/synth_image.py
+++ b/StyleText/tools/synth_image.py
@@ -16,13 +16,13 @@ import cv2
 import sys
 import glob
-from utils.config import ArgsParser
-from engine.synthesisers import ImageSynthesiser
 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+from utils.config import ArgsParser
+from engine.synthesisers import ImageSynthesiser
 def synth_image():
    args = ArgsParser().parse_args()

--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -107,10 +107,10 @@ make inference_lib_dist
 For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html).
-* After the compilation process, you can see the following files in the folder of `build/fluid_inference_install_dir/`.
+* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
 ```
-build/fluid_inference_install_dir/
+build/paddle_inference_install_dir/
 |-- CMakeCache.txt
 |-- paddle
 |-- third_party

--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
  else if (resize_h / 32 < 1 + 1e-5)
    resize_h = 32;
  else
-    resize_h = (resize_h / 32 - 1) * 32;
+    resize_h = (resize_h / 32) * 32;
  if (resize_w % 32 == 0)
    resize_w = resize_w;
  else if (resize_w / 32 < 1 + 1e-5)
    resize_w = 32;
  else
-    resize_w = (resize_w / 32 - 1) * 32;
+    resize_w = (resize_w / 32) * 32;
  cv::resize(img, resize_img, cv::Size(resize_w, resize_h));

--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -11,7 +11,7 @@ max_side_len  960
 det_db_thresh  0.3
 det_db_box_thresh  0.5
 det_db_unclip_ratio  2.0
-det_model_dir  ./inference/ch__ppocr_mobile_v2.0_det_infer/
+det_model_dir  ./inference/ch_ppocr_mobile_v2.0_det_infer/
 # cls config
 use_angle_cls 0

--- a/doc/doc_ch/angle_class.md
+++ b/doc/doc_ch/angle_class.md
@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
 ```
 # 预测分类结果
-python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
+python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
 ```
 预测图片：

--- a/doc/doc_ch/detection.md
+++ b/doc/doc_ch/detection.md
@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml  -o Global.checkpoints="{pat
 测试单张图像的检测效果
 ```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
 ```
 测试DB模型时，调整后处理阈值，
 ```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
 ```
 测试文件夹下所有图像的检测效果
 ```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
 ```
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
 超轻量中文识别模型推理，可以执行如下命令：
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
+# 下载超轻量中文识别模型：
+wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
+tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
 ```
 ![](../imgs_words/ch/word_4.jpg)
@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
 ```
 python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
 ```
 CRNN 文本识别模型推理，可以执行如下命令：
@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
 方向分类模型推理，可以执行如下命令：
 ```
-python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
+# 下载超轻量中文方向分类器模型：
+wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
+python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
 ```
 ![](../imgs_words/ch/word_1.jpg)

--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -324,7 +324,6 @@ Eval:
 评估数据集可以通过 `configs/rec/rec_icdar15_train.yml`  修改Eval中的 `label_file_path` 设置。
-*注意* 评估时必须确保配置文件中 infer_img 字段为空
 ```
 # GPU 评估， Global.checkpoints 为待测权重
 python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
 ```
 # 预测英文结果
-python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
+python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
 ```
 预测图片：
@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
 ```
 # 预测中文结果
-python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
 ```
 预测图片：

--- a/doc/doc_en/angle_class_en.md
+++ b/doc/doc_en/angle_class_en.md
@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a
 ```
 # Predict English results
-python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_10.png
+python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words_en/word_10.png
 ```
 Input image:

--- a/doc/doc_en/detection_en.md
+++ b/doc/doc_en/detection_en.md
@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml  -o Global.checkpoints="{pat
 Test the detection result on a single image:
 ```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
 ```
 When testing the DB model, adjust the post-processing threshold:
 ```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
 ```
 Test the detection result on all images in the folder:
 ```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
 ```
--- a/doc/doc_en/inference_en.md
+++ b/doc/doc_en/inference_en.md
@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference
 For lightweight Chinese recognition model inference, you can execute the following commands:
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
+# download CRNN text recognition inference model
+wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
+tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
 ```
-![](../imgs_words/ch/word_4.jpg)
+![](../imgs_words_en/word_10.png)
 After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
 ```bash
-Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
+Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
 ```
 <a name="CTC-BASED_RECOGNITION"></a>
@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands
 ```
 python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
 ```
+```
+# download text angle class inference model：
+wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
+python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
+```
 ![](../imgs_words_en/word_10.png)
 After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.

--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -317,11 +317,11 @@ Eval:
 <a name="EVALUATION"></a>
 ### EVALUATION
-The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader.
+The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
 ```
 # GPU evaluation, Global.checkpoints is the weight to be tested
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
 ```
 <a name="PREDICTION"></a>
@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci
 ```
 # Predict English results
-python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg
 ```
 Input image:
@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training.
 ```
 # Predict Chinese results
-python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/ch/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
 ```
 Input image:

--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem):
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
-        postprocess_params.rec_char_dict_path = Path(
+        postprocess_params.rec_char_dict_path = str(
-            __file__).parent / postprocess_params.rec_char_dict_path
+            Path(__file__).parent / postprocess_params.rec_char_dict_path)
        # init det_model and rec_model
        super().__init__(postprocess_params)

--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
    package_dir={'paddleocr': ''},
    include_package_data=True,
    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
-    version='2.0.1',
+    version='2.0.2',
    install_requires=requirements,
    license='Apache License 2.0',
    description='Awesome OCR toolkits based on PaddlePaddle （8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -35,6 +35,7 @@ logger = get_logger()
 class TextDetector(object):
    def __init__(self, args):
+        self.args = args
        self.det_algorithm = args.det_algorithm
        self.use_zero_copy_run = args.use_zero_copy_run
        pre_process_list = [{
@@ -70,6 +71,9 @@ class TextDetector(object):
            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
        elif self.det_algorithm == "SAST":
+            pre_process_list[0] = {
+                'DetResizeForTest': {'resize_long': args.det_limit_side_len}
+            }
            postprocess_params['name'] = 'SASTPostProcess'
            postprocess_params["score_thresh"] = args.det_sast_score_thresh
            postprocess_params["nms_thresh"] = args.det_sast_nms_thresh

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -33,6 +33,7 @@ def parse_args():
    parser.add_argument("--use_gpu", type=str2bool, default=True)
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+    parser.add_argument("--use_fp16", type=str2bool, default=False)
    parser.add_argument("--gpu_mem", type=int, default=8000)
    # params for text detector
@@ -46,7 +47,7 @@ def parse_args():
    parser.add_argument("--det_db_thresh", type=float, default=0.3)
    parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
    parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
+    parser.add_argument("--max_batch_size", type=int, default=10)
    # EAST parmas
    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
@@ -62,7 +63,7 @@ def parse_args():
    parser.add_argument("--rec_model_dir", type=str)
    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
    parser.add_argument("--rec_char_type", type=str, default='ch')
-    parser.add_argument("--rec_batch_num", type=int, default=6)
+    parser.add_argument("--rec_batch_num", type=int, default=1)
    parser.add_argument("--max_text_length", type=int, default=25)
    parser.add_argument(
        "--rec_char_dict_path",
@@ -78,7 +79,7 @@ def parse_args():
    parser.add_argument("--cls_model_dir", type=str)
    parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
    parser.add_argument("--label_list", type=list, default=['0', '180'])
-    parser.add_argument("--cls_batch_num", type=int, default=30)
+    parser.add_argument("--cls_batch_num", type=int, default=6)
    parser.add_argument("--cls_thresh", type=float, default=0.9)
    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
@@ -113,6 +114,11 @@ def create_predictor(args, mode, logger):
    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
+        if args.use_tensorrt:
+            config.enable_tensorrt_engine(
+                precision_mode=AnalysisConfig.Precision.Half
+                if args.use_fp16 else AnalysisConfig.Precision.Float32,
+                max_batch_size=args.max_batch_size)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(6)

--- a/tools/program.py
+++ b/tools/program.py
@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
    return metirc
-def preprocess():
+def preprocess(is_train=False):
    FLAGS = ArgsParser().parse_args()
    config = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
@@ -350,15 +350,17 @@ def preprocess():
    device = paddle.set_device(device)
    config['Global']['distributed'] = dist.get_world_size() != 1
+    if is_train:
-    # save_config
+        # save_config
-    save_model_dir = config['Global']['save_model_dir']
+        save_model_dir = config['Global']['save_model_dir']
-    os.makedirs(save_model_dir, exist_ok=True)
+        os.makedirs(save_model_dir, exist_ok=True)
-    with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
+        with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
-        yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
+            yaml.dump(
+                dict(config), f, default_flow_style=False, sort_keys=False)
-    logger = get_logger(
+        log_file = '{}/train.log'.format(save_model_dir)
-        name='root', log_file='{}/train.log'.format(save_model_dir))
+    else:
+        log_file = None
+    logger = get_logger(name='root', log_file=log_file)
    if config['Global']['use_visualdl']:
        from visualdl import LogWriter
        vdl_writer_path = '{}/vdl/'.format(save_model_dir)

--- a/tools/train.py
+++ b/tools/train.py
@@ -110,6 +110,6 @@ def test_reader(config, device, logger):
 if __name__ == '__main__':
-    config, device, logger, vdl_writer = program.preprocess()
+    config, device, logger, vdl_writer = program.preprocess(is_train=True)
    main(config, device, logger, vdl_writer)
    # test_reader(config, device, logger)