Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into lock_seed

88f25272 · LDOUBLEV · 1b486757 · 63ed5fca · 88f25272 · 88f25272
72 changed file
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -27,7 +27,12 @@ import json
 import cv2


+
 __dir__ = os.path.dirname(os.path.abspath(__file__))
+
+import numpy as np
+
+
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
 sys.path.append("..")
@@ -78,7 +83,7 @@ class WindowMixin(object):
            addActions(menu, actions)
        return menu

-    def toolbar(self, title, actions=None):  
+    def toolbar(self, title, actions=None):
        toolbar = ToolBar(title)
        toolbar.setObjectName(u'%sToolBar' % title)
        # toolbar.setOrientation(Qt.Vertical)
@@ -98,7 +103,7 @@ class MainWindow(QMainWindow, WindowMixin):

        # Load setting in the main thread
        self.settings = Settings()
-        self.settings.load()  
+        self.settings.load()
        settings = self.settings
        self.lang = lang
        # Load string bundle for i18n
@@ -159,7 +164,7 @@ class MainWindow(QMainWindow, WindowMixin):
        filelistLayout = QVBoxLayout()
        filelistLayout.setContentsMargins(0, 0, 0, 0)
        filelistLayout.addWidget(self.fileListWidget)
-        
+
        self.AutoRecognition = QToolButton()
        self.AutoRecognition.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
        self.AutoRecognition.setIcon(newIcon('Auto'))
@@ -176,7 +181,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.filedock.setObjectName(getStr('files'))
        self.filedock.setWidget(fileListContainer)
        self.addDockWidget(Qt.LeftDockWidgetArea, self.filedock)
-        
+
        ######## Right area ##########
        listLayout = QVBoxLayout()
        listLayout.setContentsMargins(0, 0, 0, 0)
@@ -250,7 +255,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.imgsplider.setMaximum(150)
        self.imgsplider.setSingleStep(1)
        self.imgsplider.setTickPosition(QSlider.TicksBelow)
-        self.imgsplider.setTickInterval(1) 
+        self.imgsplider.setTickInterval(1)
        op = QGraphicsOpacityEffect()
        op.setOpacity(0.2)
        self.imgsplider.setGraphicsEffect(op)
@@ -266,7 +271,9 @@ class MainWindow(QMainWindow, WindowMixin):
        self.zoomWidget = ZoomWidget()
        self.colorDialog = ColorDialog(parent=self)
        self.zoomWidgetValue = self.zoomWidget.value()
-        
+
+        self.msgBox = QMessageBox()
+
        ########## thumbnail #########
        hlayout = QHBoxLayout()
        m = (0, 0, 0, 0)
@@ -294,7 +301,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.nextButton.setStyleSheet('border: none;')
        self.nextButton.clicked.connect(self.openNextImg)
        self.nextButton.setShortcut('d')
-        
+
        hlayout.addWidget(self.preButton)
        hlayout.addWidget(self.iconlist)
        hlayout.addWidget(self.nextButton)
@@ -303,7 +310,7 @@ class MainWindow(QMainWindow, WindowMixin):
        iconListContainer = QWidget()
        iconListContainer.setLayout(hlayout)
        iconListContainer.setFixedHeight(100)
-        
+
        ########### Canvas ###########
        self.canvas = Canvas(parent=self)
        self.canvas.zoomRequest.connect(self.zoomRequest)
@@ -360,6 +367,9 @@ class MainWindow(QMainWindow, WindowMixin):
        opendir = action(getStr('openDir'), self.openDirDialog,
                         'Ctrl+u', 'open', getStr('openDir'))

+        open_dataset_dir = action(getStr('openDatasetDir'), self.openDatasetDirDialog,
+                         'Ctrl+p', 'open', getStr('openDatasetDir'), enabled=False)
+
        save = action(getStr('save'), self.saveFile,
                      'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)

@@ -439,7 +449,7 @@ class MainWindow(QMainWindow, WindowMixin):
        AutoRec = action(getStr('autoRecognition'), self.autoRecognition,
                      '', 'Auto', getStr('autoRecognition'), enabled=False)

-        reRec = action(getStr('reRecognition'), self.reRecognition, 
+        reRec = action(getStr('reRecognition'), self.reRecognition,
                      'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)

        singleRere = action(getStr('singleRe'), self.singleRerecognition,
@@ -457,6 +467,12 @@ class MainWindow(QMainWindow, WindowMixin):
        undoLastPoint = action(getStr("undoLastPoint"), self.canvas.undoLastPoint,
                               'Ctrl+Z', "undo", getStr("undoLastPoint"), enabled=False)

+        rotateLeft = action(getStr("rotateLeft"), partial(self.rotateImgAction,1),
+                               'Ctrl+Alt+L', "rotateLeft", getStr("rotateLeft"), enabled=False)
+
+        rotateRight = action(getStr("rotateRight"), partial(self.rotateImgAction,-1),
+                               'Ctrl+Alt+R', "rotateRight", getStr("rotateRight"), enabled=False)
+
        undo = action(getStr("undo"), self.undoShapeEdit,
                      'Ctrl+Z', "undo", getStr("undo"), enabled=False)

@@ -520,13 +536,14 @@ class MainWindow(QMainWindow, WindowMixin):
                              zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
                              fitWindow=fitWindow, fitWidth=fitWidth,
                              zoomActions=zoomActions, saveLabel=saveLabel,
-                              undo=undo, undoLastPoint=undoLastPoint,
+                              undo=undo, undoLastPoint=undoLastPoint,open_dataset_dir=open_dataset_dir,
+                              rotateLeft=rotateLeft,rotateRight=rotateRight,
                              fileMenuActions=(
-                                  opendir, saveLabel,  resetAll, quit),
+                                  opendir,  open_dataset_dir, saveLabel,  resetAll, quit),
                              beginner=(), advanced=(),
                              editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
-                                        None, color1, self.drawSquaresOption),
-                              beginnerContext=(create, edit, copy, delete, singleRere),
+                                        None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption),
+                              beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,),
                              advancedContext=(createMode, editMode, edit, copy,
                                               delete, shapeLineColor, shapeFillColor),
                              onLoadActive=(
@@ -564,7 +581,7 @@ class MainWindow(QMainWindow, WindowMixin):
        self.autoSaveOption.triggered.connect(self.autoSaveFunc)

        addActions(self.menus.file,
-                   (opendir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))
+                   (opendir, open_dataset_dir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))

        addActions(self.menus.help, (showKeys,showSteps, showInfo))
        addActions(self.menus.view, (
@@ -778,6 +795,38 @@ class MainWindow(QMainWindow, WindowMixin):
        self.actions.create.setEnabled(False)
        self.actions.undoLastPoint.setEnabled(True)

+    def rotateImg(self, filename, k, _value):
+
+        self.actions.rotateRight.setEnabled(_value)
+        pix = cv2.imread(filename)
+        pix = np.rot90(pix, k)
+        cv2.imwrite(filename, pix)
+        self.canvas.update()
+        self.loadFile(filename)
+
+    def rotateImgWarn(self):
+        if self.lang == 'ch':
+            self.msgBox.warning (self, "提示", "\n 该图片已经有标注框,旋转操作会打乱标注,建议清除标注框后旋转。")
+        else:
+            self.msgBox.warning (self, "Warn", "\n The picture already has a label box, and rotation will disrupt the label.\
+             It is recommended to clear the label box and rotate it.")
+
+    def rotateImgAction(self, k=1, _value=False):
+
+        filename = self.mImgList[self.currIndex]
+
+        if os.path.exists(filename):
+            if self.itemsToShapesbox:
+                self.rotateImgWarn()
+            else:
+                self.saveFile()
+                self.dirty = False
+                self.rotateImg(filename=filename, k=k, _value=True)
+        else:
+            self.rotateImgWarn()
+            self.actions.rotateRight.setEnabled(False)
+            self.actions.rotateLeft.setEnabled(False)
+
    def toggleDrawingSensitive(self, drawing=True):
        """In the middle of drawing, toggling between modes should be disabled."""
        self.actions.editMode.setEnabled(not drawing)
@@ -885,7 +934,12 @@ class MainWindow(QMainWindow, WindowMixin):
            self.updateComboBox()

    def updateBoxlist(self):
-        for shape in self.canvas.selectedShapes+[self.canvas.hShape]:
+        self.canvas.selectedShapes_hShape = []
+        if self.canvas.hShape != None:
+            self.canvas.selectedShapes_hShape = self.canvas.selectedShapes + [self.canvas.hShape]
+        else:
+            self.canvas.selectedShapes_hShape = self.canvas.selectedShapes
+        for shape in self.canvas.selectedShapes_hShape:
            item = self.shapesToItemsbox[shape]  # listitem
            text = [(int(p.x()), int(p.y())) for p in shape.points]
            item.setText(str(text))
@@ -1274,7 +1328,7 @@ class MainWindow(QMainWindow, WindowMixin):
                        titem = self.iconlist.item(i)
                        titem.setSelected(True)
                        self.iconlist.scrollToItem(titem)
-                        break 
+                        break
            else:
                self.fileListWidget.clear()
                self.mImgList.clear()
@@ -1282,7 +1336,7 @@ class MainWindow(QMainWindow, WindowMixin):

        # if unicodeFilePath and self.iconList.count() > 0:
        #     if unicodeFilePath in self.mImgList:
-                
+
        if unicodeFilePath and os.path.exists(unicodeFilePath):
            self.canvas.verified = False

@@ -1313,7 +1367,7 @@ class MainWindow(QMainWindow, WindowMixin):
            self.addRecentFile(self.filePath)
            self.toggleActions(True)
            self.showBoundingBoxFromPPlabel(filePath)
-            
+
            self.setWindowTitle(__appname__ + ' ' + filePath)

            # Default : select last item if there is at least one item
@@ -1325,7 +1379,7 @@ class MainWindow(QMainWindow, WindowMixin):
            return True
        return False

-    
+
    def showBoundingBoxFromPPlabel(self, filePath):
        imgidx = self.getImglabelidx(filePath)
        if imgidx not in self.PPlabel.keys():
@@ -1418,6 +1472,7 @@ class MainWindow(QMainWindow, WindowMixin):

    def loadRecent(self, filename):
        if self.mayContinue():
+            print(filename,"======")
            self.loadFile(filename)

    def scanAllImages(self, folderPath):
@@ -1453,6 +1508,23 @@ class MainWindow(QMainWindow, WindowMixin):
        self.lastOpenDir = targetDirPath
        self.importDirImages(targetDirPath)

+    def openDatasetDirDialog(self,):
+        if self.lastOpenDir and os.path.exists(self.lastOpenDir):
+            if platform.system() == 'Windows':
+                os.startfile(self.lastOpenDir)
+            else:
+                os.system('open ' + os.path.normpath(self.lastOpenDir))
+            defaultOpenDirPath = self.lastOpenDir
+
+        else:
+            if self.lang == 'ch':
+                self.msgBox.warning(self, "提示", "\n 原文件夹已不存在,请从新选择数据集路径!")
+            else:
+                self.msgBox.warning(self, "Warn", "\n The original folder no longer exists, please choose the data set path again!")
+
+            self.actions.open_dataset_dir.setEnabled(False)
+            defaultOpenDirPath = os.path.dirname(self.filePath) if self.filePath else '.'
+
    def importDirImages(self, dirpath, isDelete = False):
        if not self.mayContinue() or not dirpath:
            return
@@ -1500,6 +1572,10 @@ class MainWindow(QMainWindow, WindowMixin):
        self.reRecogButton.setEnabled(True)
        self.actions.AutoRec.setEnabled(True)
        self.actions.reRec.setEnabled(True)
+        self.actions.open_dataset_dir.setEnabled(True)
+        self.actions.rotateLeft.setEnabled(True)
+        self.actions.rotateRight.setEnabled(True)
+


    def openPrevImg(self, _value=False):
@@ -1508,7 +1584,7 @@ class MainWindow(QMainWindow, WindowMixin):

        if self.filePath is None:
            return
-        
+
        currIndex = self.mImgList.index(self.filePath)
        self.mImgList5 = self.mImgList[:5]
        if currIndex - 1 >= 0:
@@ -1538,7 +1614,7 @@ class MainWindow(QMainWindow, WindowMixin):
        if filename:
            print('file name in openNext is ',filename)
            self.loadFile(filename)
-        
+
    def updateFileListIcon(self, filename):
        pass

@@ -1650,7 +1726,7 @@ class MainWindow(QMainWindow, WindowMixin):
        proc.startDetached(os.path.abspath(__file__))

    def mayContinue(self):  #
-        if not self.dirty:                                    
+        if not self.dirty:
            return True
        else:
            discardChanges = self.discardChangesDialog()
@@ -2077,7 +2153,7 @@ def main():


 if __name__ == '__main__':
-        
+
    resource_file = './libs/resources.py'
    if not os.path.exists(resource_file):
        output = os.system('pyrcc5 -o libs/resources.py resources.qrc')

--- a/PPOCRLabel/README.md
+++ b/PPOCRLabel/README.md
@@ -8,9 +8,12 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w

 ### Recent Update

+- 2021.8.11：
+  - New functions: Open the dataset folder, image rotation (Note: Please delete the label box before rotating the image) (by [Wei-JL](https://github.com/Wei-JL))
+  - Added shortcut key description (Help-Shortcut Key), repaired the direction shortcut key movement function under batch processing (by [d2623587501](https://github.com/d2623587501))
 - 2021.2.5: New batch processing and undo functions (by [Evezerest](https://github.com/Evezerest)):
-  - Batch processing function: Press and hold the Ctrl key to select the box, you can move, copy, and delete in batches.
-  - Undo function: In the process of drawing a four-point label box or after editing the box, press Ctrl+Z to undo the previous operation.
+  - **Batch processing function**: Press and hold the Ctrl key to select the box, you can move, copy, and delete in batches.
+  - **Undo function**: In the process of drawing a four-point label box or after editing the box, press Ctrl+Z to undo the previous operation.
  - Fix image rotation and size problems, optimize the process of editing the mark frame (by [ninetailskim](https://github.com/ninetailskim)、 [edencfc](https://github.com/edencfc)).
 - 2021.1.11: Optimize the labeling experience (by [edencfc](https://github.com/edencfc)),
  - Users can choose whether to pop up the label input dialog after drawing the detection box in "View - Pop-up Label Input Dialog".
@@ -23,15 +26,51 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w

 ## Installation

-### 1. Install PaddleOCR
+### 1. Environment Preparation

-PaddleOCR models has been built in PPOCRLabel, please refer to [PaddleOCR installation document](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md) to prepare PaddleOCR and make sure it works.
+#### **Install PaddlePaddle 2.0**
+
+```bash
+pip3 install --upgrade pip
+
+# If you have cuda9 or cuda10 installed on your machine, please run the following command to install
+python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
+
+# If you only have cpu on your machine, please run the following command to install
+python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple
+```
+
+For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation.
+
+#### **Install PaddleOCR**
+
+```bash
+# Recommend
+git clone https://github.com/PaddlePaddle/PaddleOCR
+
+# If you cannot pull successfully due to network problems, you can also choose to use the code hosting on the cloud:
+
+git clone https://gitee.com/paddlepaddle/PaddleOCR
+
+# Note: The cloud-hosting code may not be able to synchronize the update with this GitHub project in real time. There might be a delay of 3-5 days. Please give priority to the recommended method.
+```
+
+#### **Install Third-party Libraries**
+
+```bash
+cd PaddleOCR
+pip3 install -r requirements.txt
+```
+
+If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. Please try to download Shapely whl file using http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely.
+
+Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)

 ### 2. Install PPOCRLabel

 #### Windows

-```
+```bash
 pip install pyqt5
 cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
 python PPOCRLabel.py
@@ -39,15 +78,15 @@ python PPOCRLabel.py

 #### Ubuntu Linux

-```
+```bash
 pip3 install pyqt5
 pip3 install trash-cli
 cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
 python3 PPOCRLabel.py
 ```

-#### macOS
-```
+#### MacOS
+```bash
 pip3 install pyqt5
 pip3 uninstall opencv-python # Uninstall opencv manually as it conflicts with pyqt
 pip3 install opencv-contrib-python-headless==4.2.0.32 # Install the headless version of opencv
@@ -77,11 +116,11 @@ python3 PPOCRLabel.py

 7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.

-8. Click "Check", the image status will switch to "√",then the program automatically jump to the next.
+8. **Click "Check", the image status will switch to "√",then the program automatically jump to the next.**

 9. Click "Delete Image" and the image will be deleted to the recycle bin.

-10. Labeling result: the user can save manually through the menu "File - Save Label", while the program will also save automatically if "File - Auto Save Label Mode" is selected. The manually checked label will be stored in *Label.txt* under the opened picture folder. Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
+10. Labeling result: the user can export the label result manually through the menu "File - Export Label", while the program will also export automatically if "File - Auto export Label Mode" is selected. The manually checked label will be stored in *Label.txt* under the opened picture folder. Click "File"-"Export Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.

 ### Note

@@ -95,10 +134,10 @@ python3 PPOCRLabel.py

 |   File name   |                         Description                          |
 | :-----------: | :----------------------------------------------------------: |
-|   Label.txt   | The detection label file can be directly used for PPOCR detection model training. After the user saves 5 label results, the file will be automatically saved. It will also be written when the user closes the application or changes the file folder. |
+|   Label.txt   | The detection label file can be directly used for PPOCR detection model training. After the user saves 5 label results, the file will be automatically exported. It will also be written when the user closes the application or changes the file folder. |
 | fileState.txt | The picture status file save the image in the current folder that has been manually confirmed by the user. |
 |  Cache.cach   |    Cache files to save the results of model recognition.     |
-|  rec_gt.txt   | The recognition label file, which can be directly used for PPOCR identification model training, is generated after the user clicks on the menu bar "File"-"Save recognition result". |
+|  rec_gt.txt   | The recognition label file, which can be directly used for PPOCR identification model training, is generated after the user clicks on the menu bar "File"-"Export recognition result". |
 |   crop_img    | The recognition data, generated at the same time with *rec_gt.txt* |

 ## Explanation
@@ -132,16 +171,16 @@ python3 PPOCRLabel.py

 - Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)

-### Save
+### Export Label Result

-PPOCRLabel supports three ways to save Label.txt
+PPOCRLabel supports three ways to export Label.txt

- Automatically save: After selecting "File - Auto Save Label Mode", the program will automatically write the annotations into Label.txt every time the user confirms an image. If this option is not turned on, it will be automatically saved after detecting that the user has manually checked 5 images.
- Manual save: Click "File-Save Marking Results" to manually save the label.
- Close application save
+- Automatically export: After selecting "File - Auto Export Label Mode", the program will automatically write the annotations into Label.txt every time the user confirms an image. If this option is not turned on, it will be automatically exported after detecting that the user has manually checked 5 images.
+- Manual export: Click "File-Export Marking Results" to manually export the label.
+- Close application export


-### Export partial recognition results
+### Export Partial Recognition Results

 For some data that are difficult to recognize, the recognition results will not be exported by **unchecking** the corresponding tags in the recognition results checkbox.


--- a/PPOCRLabel/README_ch.md
+++ b/PPOCRLabel/README_ch.md
@@ -8,9 +8,12 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具，内置P

 #### 近期更新

+- 2021.8.11：
+  - 新增功能：打开数据所在文件夹、图像旋转（注意：旋转前的图片上不能存在标记框）（by [Wei-JL](https://github.com/Wei-JL)）
+  - 新增快捷键说明（帮助-快捷键）、修复批处理下的方向快捷键移动功能（by [d2623587501](https://github.com/d2623587501)）
 - 2021.2.5：新增批处理与撤销功能（by [Evezerest](https://github.com/Evezerest))
-  - 批处理功能：按住Ctrl键选择标记框后可批量移动、复制、删除。
-  - 撤销功能：在绘制四点标注框过程中或对框进行编辑操作后，按下Ctrl+Z可撤销上一部操作。
+  - **批处理功能**：按住Ctrl键选择标记框后可批量移动、复制、删除、重新识别。
+  - **撤销功能**：在绘制四点标注框过程中或对框进行编辑操作后，按下Ctrl+Z可撤销上一部操作。
  - 修复图像旋转和尺寸问题、优化编辑标记框过程（by [ninetailskim](https://github.com/ninetailskim)、 [edencfc](https://github.com/edencfc)）
 - 2021.1.11：优化标注体验（by [edencfc](https://github.com/edencfc)）：
  - 用户可在“视图 - 弹出标记输入框”选择在画完检测框后标记输入框是否弹出。
@@ -27,13 +30,48 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具，内置P

 ## 安装

-### 1. 安装PaddleOCR
-PPOCRLabel内置PaddleOCR模型，故请参考[PaddleOCR安装文档](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md)准备好PaddleOCR，并确保PaddleOCR安装成功。
+### 1. 环境搭建
+#### 安装PaddlePaddle
+
+```bash
+pip3 install --upgrade pip
+
+如果您的机器安装的是CUDA9或CUDA10，请运行以下命令安装
+python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
+
+如果您的机器是CPU，请运行以下命令安装
+
+python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple
+```
+
+更多的版本需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
+
+#### **安装PaddleOCR**
+
+```bash
+【推荐】git clone https://github.com/PaddlePaddle/PaddleOCR
+
+如果因为网络问题无法pull成功，也可选择使用码云上的托管：
+
+git clone https://gitee.com/paddlepaddle/PaddleOCR
+
+注：码云托管代码可能无法实时同步本github项目更新，存在3~5天延时，请优先使用推荐方式。
+```
+
+#### 安装第三方库
+
+```bash
+cd PaddleOCR
+pip3 install -r requirements.txt
+```
+
+注意，windows环境下，建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载shapely安装包完成安装， 直接通过pip安装的shapely库可能出现`[winRrror 126] 找不到指定模块的问题`。

 ### 2. 安装PPOCRLabel
+
 #### Windows

-```
+```bash
 pip install pyqt5
 cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
 python PPOCRLabel.py --lang ch
@@ -41,15 +79,15 @@ python PPOCRLabel.py --lang ch

 #### Ubuntu Linux

-```
+```bash
 pip3 install pyqt5
 pip3 install trash-cli
 cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
 python3 PPOCRLabel.py --lang ch
 ```

-#### macOS
-```
+#### MacOS
+```bash
 pip3 install pyqt5
 pip3 uninstall opencv-python # 由于mac版本的opencv与pyqt有冲突，需先手动卸载opencv
 pip3 install opencv-contrib-python-headless==4.2.0.32 # 安装headless版本的open-cv
@@ -57,6 +95,8 @@ cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
 python3 PPOCRLabel.py --lang ch
 ```

+
+
 ## 使用

 ### 操作步骤
@@ -68,9 +108,9 @@ python3 PPOCRLabel.py --lang ch
 5. 标记框绘制完成后，用户点击 “确认”，检测框会先被预分配一个 “待识别” 标签。
 6. 重新识别：将图片中的所有检测画绘制/调整完成后，点击 “重新识别”，PPOCR模型会对当前图片中的**所有检测框**重新识别<sup>[3]</sup>。
 7. 内容更改：双击识别结果，对不准确的识别结果进行手动更改。
-8. **确认标记**：点击 “确认”，图片状态切换为 “√”，跳转至下一张。
+8. **确认标记：点击 “确认”，图片状态切换为 “√”，跳转至下一张。**
 9. 删除：点击 “删除图像”，图片将会被删除至回收站。
-10. 保存结果：用户可以通过菜单中“文件-保存标记结果”手动保存，同时也可以点击“文件 - 自动保存标记结果”开启自动保存。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "保存识别结果"后，会将此类图片的识别训练数据保存在*crop_img*文件夹下，识别标签保存在*rec_gt.txt*中<sup>[4]</sup>。
+10. 导出结果：用户可以通过菜单中“文件-导出标记结果”手动导出，同时也可以点击“文件 - 自动导出标记结果”开启自动导出。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "导出识别结果"后，会将此类图片的识别训练数据保存在*crop_img*文件夹下，识别标签保存在*rec_gt.txt*中<sup>[4]</sup>。

 ### 注意

@@ -84,10 +124,10 @@ python3 PPOCRLabel.py --lang ch

 |    文件名     |                             说明                             |
 | :-----------: | :----------------------------------------------------------: |
-|   Label.txt   | 检测标签，可直接用于PPOCR检测模型训练。用户每保存5张检测结果后，程序会进行自动写入。当用户关闭应用程序或切换文件路径后同样会进行写入。 |
+|   Label.txt   | 检测标签，可直接用于PPOCR检测模型训练。用户每确认5张检测结果后，程序会进行自动写入。当用户关闭应用程序或切换文件路径后同样会进行写入。 |
 | fileState.txt | 图片状态标记文件，保存当前文件夹下已经被用户手动确认过的图片名称。 |
 |  Cache.cach   |              缓存文件，保存模型自动识别的结果。              |
-|  rec_gt.txt   | 识别标签。可直接用于PPOCR识别模型训练。需用户手动点击菜单栏“文件” - "保存识别结果"后产生。 |
+|  rec_gt.txt   | 识别标签。可直接用于PPOCR识别模型训练。需用户手动点击菜单栏“文件” - "导出识别结果"后产生。 |
 |   crop_img    |   识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。   |

 ## 说明
@@ -120,19 +160,19 @@ python3 PPOCRLabel.py --lang ch

 - 自定义模型：用户可根据[自定义模型代码使用](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B)，通过修改PPOCRLabel.py中针对[PaddleOCR类的实例化](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110)替换成自己训练的模型。

-### 保存方式
+### 导出标记结果

-PPOCRLabel支持三种保存方式：
+PPOCRLabel支持三种导出方式：

- 自动保存：点击“文件 - 自动保存标记结果”后，用户每确认过一张图片，程序自动将标记结果写入Label.txt中。若未开启此选项，则检测到用户手动确认过5张图片后进行自动保存。
- 手动保存：点击“文件 - 保存标记结果”手动保存标记。
- 关闭应用程序保存
+- 自动导出：点击“文件 - 自动导出标记结果”后，用户每确认过一张图片，程序自动将标记结果写入Label.txt中。若未开启此选项，则检测到用户手动确认过5张图片后进行自动导出。
+- 手动导出：点击“文件 - 导出标记结果”手动导出标记。
+- 关闭应用程序导出

 ### 导出部分识别结果

 针对部分难以识别的数据，通过在识别结果的复选框中**取消勾选**相应的标记，其识别结果不会被导出。

-*注意：识别结果中的复选框状态仍需用户手动点击保存后才能保留*
+*注意：识别结果中的复选框状态仍需用户手动点击确认后才能保留*

 ### 错误提示
 - 如果同时使用whl包安装了paddleocr，其优先级大于通过paddleocr.py调用PaddleOCR类，whl包未更新时会导致程序异常。

--- a/PPOCRLabel/libs/canvas.py
+++ b/PPOCRLabel/libs/canvas.py
@@ -23,6 +23,7 @@ except ImportError:

 from libs.shape import Shape
 from libs.utils import distance
+import copy

 CURSOR_DEFAULT = Qt.ArrowCursor
 CURSOR_POINT = Qt.PointingHandCursor
@@ -81,6 +82,7 @@ class Canvas(QWidget):
        self.fourpoint = True # ADD
        self.pointnum = 0
        self.movingShape = False
+        self.selectCountShape = False

        #initialisation for panning
        self.pan_initial_pos = QPoint()
@@ -702,6 +704,10 @@ class Canvas(QWidget):

    def keyPressEvent(self, ev):
        key = ev.key()
+        shapesBackup = []
+        shapesBackup = copy.deepcopy(self.shapes)
+        self.shapesBackups.pop()
+        self.shapesBackups.append(shapesBackup)
        if key == Qt.Key_Escape and self.current:
            print('ESC press')
            self.current = None
@@ -709,41 +715,48 @@ class Canvas(QWidget):
            self.update()
        elif key == Qt.Key_Return and self.canCloseShape():
            self.finalise()
-        elif key == Qt.Key_Left and self.selectedShape:
+        elif key == Qt.Key_Left and self.selectedShapes:
             self.moveOnePixel('Left')
-        elif key == Qt.Key_Right and self.selectedShape:
+        elif key == Qt.Key_Right and self.selectedShapes:
             self.moveOnePixel('Right')
-        elif key == Qt.Key_Up and self.selectedShape:
+        elif key == Qt.Key_Up and self.selectedShapes:
             self.moveOnePixel('Up')
-        elif key == Qt.Key_Down and self.selectedShape:
+        elif key == Qt.Key_Down and self.selectedShapes:
             self.moveOnePixel('Down')

    def moveOnePixel(self, direction):
        # print(self.selectedShape.points)
-        if direction == 'Left' and not self.moveOutOfBound(QPointF(-1.0, 0)):
-            # print("move Left one pixel")
-            self.selectedShape.points[0] += QPointF(-1.0, 0)
-            self.selectedShape.points[1] += QPointF(-1.0, 0)
-            self.selectedShape.points[2] += QPointF(-1.0, 0)
-            self.selectedShape.points[3] += QPointF(-1.0, 0)
-        elif direction == 'Right' and not self.moveOutOfBound(QPointF(1.0, 0)):
-            # print("move Right one pixel")
-            self.selectedShape.points[0] += QPointF(1.0, 0)
-            self.selectedShape.points[1] += QPointF(1.0, 0)
-            self.selectedShape.points[2] += QPointF(1.0, 0)
-            self.selectedShape.points[3] += QPointF(1.0, 0)
-        elif direction == 'Up' and not self.moveOutOfBound(QPointF(0, -1.0)):
-            # print("move Up one pixel")
-            self.selectedShape.points[0] += QPointF(0, -1.0)
-            self.selectedShape.points[1] += QPointF(0, -1.0)
-            self.selectedShape.points[2] += QPointF(0, -1.0)
-            self.selectedShape.points[3] += QPointF(0, -1.0)
-        elif direction == 'Down' and not self.moveOutOfBound(QPointF(0, 1.0)):
-            # print("move Down one pixel")
-            self.selectedShape.points[0] += QPointF(0, 1.0)
-            self.selectedShape.points[1] += QPointF(0, 1.0)
-            self.selectedShape.points[2] += QPointF(0, 1.0)
-            self.selectedShape.points[3] += QPointF(0, 1.0)
+        self.selectCount = len(self.selectedShapes)
+        self.selectCountShape = True
+        for i in range(len(self.selectedShapes)):
+            self.selectedShape = self.selectedShapes[i]
+            if direction == 'Left' and not self.moveOutOfBound(QPointF(-1.0, 0)):
+                # print("move Left one pixel")
+                self.selectedShape.points[0] += QPointF(-1.0, 0)
+                self.selectedShape.points[1] += QPointF(-1.0, 0)
+                self.selectedShape.points[2] += QPointF(-1.0, 0)
+                self.selectedShape.points[3] += QPointF(-1.0, 0)
+            elif direction == 'Right' and not self.moveOutOfBound(QPointF(1.0, 0)):
+                # print("move Right one pixel")
+                self.selectedShape.points[0] += QPointF(1.0, 0)
+                self.selectedShape.points[1] += QPointF(1.0, 0)
+                self.selectedShape.points[2] += QPointF(1.0, 0)
+                self.selectedShape.points[3] += QPointF(1.0, 0)
+            elif direction == 'Up' and not self.moveOutOfBound(QPointF(0, -1.0)):
+                # print("move Up one pixel")
+                self.selectedShape.points[0] += QPointF(0, -1.0)
+                self.selectedShape.points[1] += QPointF(0, -1.0)
+                self.selectedShape.points[2] += QPointF(0, -1.0)
+                self.selectedShape.points[3] += QPointF(0, -1.0)
+            elif direction == 'Down' and not self.moveOutOfBound(QPointF(0, 1.0)):
+                # print("move Down one pixel")
+                self.selectedShape.points[0] += QPointF(0, 1.0)
+                self.selectedShape.points[1] += QPointF(0, 1.0)
+                self.selectedShape.points[2] += QPointF(0, 1.0)
+                self.selectedShape.points[3] += QPointF(0, 1.0)
+        shapesBackup = []
+        shapesBackup = copy.deepcopy(self.shapes)
+        self.shapesBackups.append(shapesBackup)
        self.shapeMoved.emit()
        self.repaint()

@@ -840,6 +853,7 @@ class Canvas(QWidget):
    def restoreShape(self):
        if not self.isShapeRestorable:
            return
+
        self.shapesBackups.pop()  # latest
        shapesBackup = self.shapesBackups.pop()
        self.shapes = shapesBackup

--- a/PPOCRLabel/libs/resources.py
+++ b/PPOCRLabel/libs/resources.py
--- a/PPOCRLabel/resources.qrc
+++ b/PPOCRLabel/resources.qrc
@@ -18,6 +18,8 @@
 <file alias="quit">resources/icons/quit.png</file>
 <file alias="copy">resources/icons/copy.png</file>
 <file alias="edit">resources/icons/edit.png</file>
+<file alias="rotateLeft">resources/icons/rotateLeft.png</file>
+<file alias="rotateRight">resources/icons/rotateRight.png</file>
 <file alias="open">resources/icons/open.png</file>
 <file alias="save">resources/icons/save.png</file>
 <file alias="format_voc">resources/icons/format_voc.png</file>

--- a/PPOCRLabel/resources/icons/rotateLeft.png
+++ b/PPOCRLabel/resources/icons/rotateLeft.png
--- a/PPOCRLabel/resources/icons/rotateRight.png
+++ b/PPOCRLabel/resources/icons/rotateRight.png
--- a/PPOCRLabel/resources/strings/strings-zh-CN.properties
+++ b/PPOCRLabel/resources/strings/strings-zh-CN.properties
@@ -31,6 +31,7 @@ save=确认
 saveAs=另存为
 fitWinDetail=缩放到当前窗口大小
 openDir=打开目录
+openDatasetDir=打开数据集路径
 copyPrevBounding=复制当前图像中的上一个边界框
 showHide=显示/隐藏标签
 changeSaveFormat=更改存储格式
@@ -85,7 +86,9 @@ detectionBoxposition=检测框位置
 recognitionResult=识别结果
 creatPolygon=四点标注
 drawSquares=正方形标注
-saveRec=保存识别结果
+rotateLeft=图片左旋转90度
+rotateRight=图片右旋转90度
+saveRec=导出识别结果
 tempLabel=待识别
 nullLabel=无法识别
 steps=操作步骤
@@ -96,9 +99,9 @@ ok=确认
 autolabeling=自动标注中
 hideBox=隐藏所有标注
 showBox=显示所有标注
-saveLabel=保存标记结果
+saveLabel=导出标记结果
 singleRe=重识别此区块
 labelDialogOption=弹出标记输入框
 undo=撤销
 undoLastPoint=撤销上个点
-autoSaveMode=自动保存标记结果
\ No newline at end of file
+autoSaveMode=自动导出标记结果
\ No newline at end of file
--- a/PPOCRLabel/resources/strings/strings.properties
+++ b/PPOCRLabel/resources/strings/strings.properties
@@ -3,6 +3,7 @@ openFileDetail=Open image or label file
 quit=Quit
 quitApp=Quit application
 openDir=Open Dir
+openDatasetDir=Open DatasetDir
 copyPrevBounding=Copy previous Bounding Boxes in the current image 
 changeSavedAnnotationDir=Change default saved Annotation dir
 openAnnotation=Open Annotation
@@ -84,8 +85,10 @@ iconList=Icon List
 detectionBoxposition=Detection box position
 recognitionResult=Recognition result
 creatPolygon=Create Quadrilateral
+rotateLeft=Left turn 90 degrees
+rotateRight=Right turn 90 degrees
 drawSquares=Draw Squares
-saveRec=Save Recognition Result
+saveRec=Export Recognition Result
 tempLabel=TEMPORARY
 nullLabel=NULL
 steps=Steps
@@ -96,9 +99,9 @@ ok=OK
 autolabeling=Automatic Labeling
 hideBox=Hide All Box
 showBox=Show All Box
-saveLabel=Save Label
+saveLabel=Export Label
 singleRe=Re-recognition RectBox
 labelDialogOption=Pop-up Label Input Dialog
 undo=Undo
 undoLastPoint=Undo Last Point
-autoSaveMode=Auto Save Label Mode
\ No newline at end of file
+autoSaveMode=Auto Export Label Mode
\ No newline at end of file
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
+Global:
+  use_gpu: True
+  epoch_num: 21
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/nrtr/
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path: 
+  character_type: EN_symbol
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: True
+  save_res_path: ./output/rec/predicts_nrtr.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.99
+  clip_norm: 5.0
+  lr:
+    name: Cosine
+    learning_rate: 0.0005
+    warmup_epoch: 2
+  regularizer:
+    name: 'L2'
+    factor: 0.
+
+Architecture:
+  model_type: rec
+  algorithm: NRTR
+  in_channels: 1
+  Transform:
+  Backbone:
+    name: MTB
+    cnn_num: 2
+  Head:
+    name: Transformer
+    d_model: 512
+    num_encoder_layers: 6
+    beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
+    
+
+Loss:
+  name: NRTRLoss
+  smoothing: True
+
+PostProcess:
+  name: NRTRLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - NRTRDecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - NRTRLabelEncode: # Class handling label
+      - NRTRRecResizeImg:
+          image_shape: [100, 32]
+          resize_type: PIL # PIL or OpenCV
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 512
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - NRTRDecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - NRTRLabelEncode: # Class handling label
+      - NRTRRecResizeImg:
+          image_shape: [100, 32]
+          resize_type: PIL # PIL or OpenCV
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 1
+    use_shared_memory: False
--- a/deploy/cpp_infer/CMakeLists.txt
+++ b/deploy/cpp_infer/CMakeLists.txt
-project(ocr_system CXX C)
+project(ppocr CXX C)

 option(WITH_MKL        "Compile demo with MKL/OpenBlas support, default use MKL."       ON)
 option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    OFF)
@@ -11,7 +11,8 @@ SET(CUDA_LIB "" CACHE PATH "Location of libraries")
 SET(CUDNN_LIB "" CACHE PATH "Location of libraries")
 SET(TENSORRT_DIR "" CACHE PATH "Compile demo with TensorRT")

-set(DEMO_NAME "ocr_system")
+set(DEMO_NAME "ppocr")
+

 macro(safe_set_static_flag)
    foreach(flag_var
@@ -205,6 +206,10 @@ endif()

 set(DEPS ${DEPS} ${OpenCV_LIBS})

+include(ExternalProject)
+include(external-cmake/auto-log.cmake)
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/autolog/src/extern_Autolog/auto_log)
+
 AUX_SOURCE_DIRECTORY(./src SRCS)
 add_executable(${DEMO_NAME} ${SRCS})


--- a/deploy/cpp_infer/external-cmake/auto-log.cmake
+++ b/deploy/cpp_infer/external-cmake/auto-log.cmake
+find_package(Git REQUIRED)
+message("${CMAKE_BUILD_TYPE}")
+
+set(AUTOLOG_REPOSITORY     https://github.com/LDOUBLEV/AutoLog.git)
+SET(AUTOLOG_INSTALL_DIR   ${CMAKE_CURRENT_BINARY_DIR}/install/Autolog)
+
+ExternalProject_Add(
+    extern_Autolog
+    PREFIX autolog
+    GIT_REPOSITORY ${AUTOLOG_REPOSITORY}
+    GIT_TAG main
+    DOWNLOAD_NO_EXTRACT True
+    INSTALL_COMMAND cmake -E echo "Skipping install step."
+)
--- a/deploy/cpp_infer/src/clipper.cpp
+++ b/deploy/cpp_infer/src/clipper.cpp
--- a/deploy/cpp_infer/include/clipper.h
+++ b/deploy/cpp_infer/include/clipper.h
@@ -31,6 +31,8 @@
 *                                                                              *
 *******************************************************************************/

+#pragma once
+
 #ifndef clipper_hpp
 #define clipper_hpp


--- a/deploy/cpp_infer/include/config.h
+++ b/deploy/cpp_infer/include/config.h
-// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iomanip>
-#include <iostream>
-#include <map>
-#include <ostream>
-#include <string>
-#include <vector>
-
-#include "include/utility.h"
-
-namespace PaddleOCR {
-
-class OCRConfig {
-public:
-  explicit OCRConfig(const std::string &config_file) {
-    config_map_ = LoadConfig(config_file);
-
-    this->use_gpu = bool(stoi(config_map_["use_gpu"]));
-
-    this->gpu_id = stoi(config_map_["gpu_id"]);
-
-    this->gpu_mem = stoi(config_map_["gpu_mem"]);
-
-    this->cpu_math_library_num_threads =
-        stoi(config_map_["cpu_math_library_num_threads"]);
-
-    this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"]));
-
-    this->max_side_len = stoi(config_map_["max_side_len"]);
-
-    this->det_db_thresh = stod(config_map_["det_db_thresh"]);
-
-    this->det_db_box_thresh = stod(config_map_["det_db_box_thresh"]);
-
-    this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]);
-
-    this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"]));
-
-    this->det_model_dir.assign(config_map_["det_model_dir"]);
-
-    this->rec_model_dir.assign(config_map_["rec_model_dir"]);
-
-    this->char_list_file.assign(config_map_["char_list_file"]);
-
-    this->use_angle_cls = bool(stoi(config_map_["use_angle_cls"]));
-
-    this->cls_model_dir.assign(config_map_["cls_model_dir"]);
-
-    this->cls_thresh = stod(config_map_["cls_thresh"]);
-
-    this->visualize = bool(stoi(config_map_["visualize"]));
-
-    this->use_tensorrt = bool(stoi(config_map_["use_tensorrt"]));
-
-    this->use_fp16 = bool(stod(config_map_["use_fp16"]));
-  }
-
-  bool use_gpu = false;
-
-  int gpu_id = 0;
-
-  int gpu_mem = 4000;
-
-  int cpu_math_library_num_threads = 1;
-
-  bool use_mkldnn = false;
-
-  int max_side_len = 960;
-
-  double det_db_thresh = 0.3;
-
-  double det_db_box_thresh = 0.5;
-
-  double det_db_unclip_ratio = 2.0;
-
-  bool use_polygon_score = false;
-
-  std::string det_model_dir;
-
-  std::string rec_model_dir;
-
-  bool use_angle_cls;
-
-  std::string char_list_file;
-
-  std::string cls_model_dir;
-
-  double cls_thresh;
-
-  bool visualize = true;
-
-  bool use_tensorrt = false;
-
-  bool use_fp16 = false;
-
-  void PrintConfigInfo();
-
-private:
-  // Load configuration
-  std::map<std::string, std::string> LoadConfig(const std::string &config_file);
-
-  std::vector<std::string> split(const std::string &str,
-                                 const std::string &delim);
-
-  std::map<std::string, std::string> config_map_;
-};
-
-} // namespace PaddleOCR
--- a/deploy/cpp_infer/include/ocr_cls.h
+++ b/deploy/cpp_infer/include/ocr_cls.h
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#pragma once
+
 #include "opencv2/core.hpp"
 #include "opencv2/imgcodecs.hpp"
 #include "opencv2/imgproc.hpp"
@@ -40,7 +42,7 @@ public:
                      const int &gpu_id, const int &gpu_mem,
                      const int &cpu_math_library_num_threads,
                      const bool &use_mkldnn, const double &cls_thresh,
-                      const bool &use_tensorrt, const bool &use_fp16) {
+                      const bool &use_tensorrt, const std::string &precision) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -49,7 +51,7 @@ public:

    this->cls_thresh = cls_thresh;
    this->use_tensorrt_ = use_tensorrt;
-    this->use_fp16_ = use_fp16;
+    this->precision_ = precision;

    LoadModel(model_dir);
  }
@@ -73,7 +75,7 @@ private:
  std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
  bool is_scale_ = true;
  bool use_tensorrt_ = false;
-  bool use_fp16_ = false;
+  std::string precision_ = "fp32";
  // pre-process
  ClsResizeImg resize_op_;
  Normalize normalize_op_;

--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -46,7 +46,7 @@ public:
                      const double &det_db_box_thresh,
                      const double &det_db_unclip_ratio,
                      const bool &use_polygon_score, const bool &visualize,
-                      const bool &use_tensorrt, const bool &use_fp16) {
+                      const bool &use_tensorrt, const std::string &precision) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -62,7 +62,7 @@ public:

    this->visualize_ = visualize;
    this->use_tensorrt_ = use_tensorrt;
-    this->use_fp16_ = use_fp16;
+    this->precision_ = precision;

    LoadModel(model_dir);
  }
@@ -71,7 +71,7 @@ public:
  void LoadModel(const std::string &model_dir);

  // Run predictor
-  void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes);
+  void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes, std::vector<double> *times);

 private:
  std::shared_ptr<Predictor> predictor_;
@@ -91,7 +91,7 @@ private:

  bool visualize_ = true;
  bool use_tensorrt_ = false;
-  bool use_fp16_ = false;
+  std::string precision_ = "fp32";

  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
  std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};

--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#pragma once
+
 #include "opencv2/core.hpp"
 #include "opencv2/imgcodecs.hpp"
 #include "opencv2/imgproc.hpp"
@@ -42,14 +44,14 @@ public:
                          const int &gpu_id, const int &gpu_mem,
                          const int &cpu_math_library_num_threads,
                          const bool &use_mkldnn, const string &label_path,
-                          const bool &use_tensorrt, const bool &use_fp16) {
+                          const bool &use_tensorrt, const std::string &precision) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
    this->use_mkldnn_ = use_mkldnn;
    this->use_tensorrt_ = use_tensorrt;
-    this->use_fp16_ = use_fp16;
+    this->precision_ = precision;

    this->label_list_ = Utility::ReadDict(label_path);
    this->label_list_.insert(this->label_list_.begin(),
@@ -62,8 +64,7 @@ public:
  // Load Paddle inference model
  void LoadModel(const std::string &model_dir);

-  void Run(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat &img,
-           Classifier *cls);
+  void Run(cv::Mat &img, std::vector<double> *times);

 private:
  std::shared_ptr<Predictor> predictor_;
@@ -80,7 +81,7 @@ private:
  std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
  bool is_scale_ = true;
  bool use_tensorrt_ = false;
-  bool use_fp16_ = false;
+  std::string precision_ = "fp32";
  // pre-process
  CrnnResizeImg resize_op_;
  Normalize normalize_op_;
@@ -89,9 +90,6 @@ private:
  // post-process
  PostProcessor post_processor_;

-  cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
-                             std::vector<std::vector<int>> box);
-
 }; // class CrnnRecognizer

 } // namespace PaddleOCR
--- a/deploy/cpp_infer/include/utility.h
+++ b/deploy/cpp_infer/include/utility.h
@@ -47,6 +47,9 @@ public:

  static void GetAllFiles(const char *dir_name,
                          std::vector<std::string> &all_inputs);
+    
+  static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
+                          std::vector<std::vector<int>> box);
 };

 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -154,82 +154,102 @@ inference/

 * 编译命令如下，其中Paddle C++预测库、opencv等其他依赖库的地址需要换成自己机器上的实际地址。

-
 ```shell
 sh tools/build.sh
 ```

-具体地，`tools/build.sh`中内容如下。
+* 具体的，需要修改`tools/build.sh`中环境路径，相关内容如下：

 ```shell
 OPENCV_DIR=your_opencv_dir
 LIB_DIR=your_paddle_inference_dir
 CUDA_LIB_DIR=your_cuda_lib_dir
 CUDNN_LIB_DIR=/your_cudnn_lib_dir
-
-BUILD_DIR=build
-rm -rf ${BUILD_DIR}
-mkdir ${BUILD_DIR}
-cd ${BUILD_DIR}
-cmake .. \
-    -DPADDLE_LIB=${LIB_DIR} \
-    -DWITH_MKL=ON \
-    -DDEMO_NAME=ocr_system \
-    -DWITH_GPU=OFF \
-    -DWITH_STATIC_LIB=OFF \
-    -DUSE_TENSORRT=OFF \
-    -DOPENCV_DIR=${OPENCV_DIR} \
-    -DCUDNN_LIB=${CUDNN_LIB_DIR} \
-    -DCUDA_LIB=${CUDA_LIB_DIR} \
-
-make -j
 ```

-`OPENCV_DIR`为opencv编译安装的地址；`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹)；`CUDA_LIB_DIR`为cuda库文件地址，在docker中为`/usr/local/cuda/lib64`；`CUDNN_LIB_DIR`为cudnn库文件地址，在docker中为`/usr/lib/x86_64-linux-gnu/`。**注意**：以上路径都写绝对路径，不要写相对路径。
+其中，`OPENCV_DIR`为opencv编译安装的地址；`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹)；`CUDA_LIB_DIR`为cuda库文件地址，在docker中为`/usr/local/cuda/lib64`；`CUDNN_LIB_DIR`为cudnn库文件地址，在docker中为`/usr/lib/x86_64-linux-gnu/`。**注意：以上路径都写绝对路径，不要写相对路径。**


-* 编译完成之后，会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。
+* 编译完成之后，会在`build`文件夹下生成一个名为`ppocr`的可执行文件。


 ### 运行demo
-* 执行以下命令，完成对一幅图像的OCR识别与检测。

+运行方式：  
+```shell
+./build/ppocr <mode> [--param1] [--param2] [...]
+```  
+其中，`mode`为必选参数，表示选择的功能，取值范围['det', 'rec', 'system']，分别表示调用检测、识别、检测识别串联（包括方向分类器）。具体命令如下：
+
+##### 1. 只调用检测：
+```shell
+./build/ppocr det \
+    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+    --image_dir=../../doc/imgs/12.jpg
+```
+##### 2. 只调用识别：
+```shell
+./build/ppocr rec \
+    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+    --image_dir=../../doc/imgs_words/ch/
+```
+##### 3. 调用串联：
 ```shell
-sh tools/run.sh
+# 不使用方向分类器
+./build/ppocr system \
+    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+    --image_dir=../../doc/imgs/12.jpg
+# 使用方向分类器
+./build/ppocr system \
+    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+    --use_angle_cls=true \
+    --cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \
+    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+    --image_dir=../../doc/imgs/12.jpg
 ```

-* 若需要使用方向分类器，则需要将`tools/config.txt`中的`use_angle_cls`参数修改为1，表示开启方向分类器的预测。
-* 更多地，tools/config.txt中的参数及解释如下。
+更多参数如下：

-```
-use_gpu  0 # 是否使用GPU，1表示使用，0表示不使用
-gpu_id  0 # GPU id，使用GPU时有效
-gpu_mem  4000  # 申请的GPU内存
-cpu_math_library_num_threads  10 # CPU预测时的线程数，在机器核数充足的情况下，该值越大，预测速度越快
-use_mkldnn 1 # 是否使用mkldnn库
+- 通用参数

-# det config
-max_side_len  960 # 输入图像长宽大于960时，等比例缩放图像，使得图像最长边为960
-det_db_thresh  0.3 # 用于过滤DB预测的二值化图像，设置为0.-0.3对结果影响不明显
-det_db_box_thresh  0.5 # DB后处理过滤box的阈值，如果检测存在漏框情况，可酌情减小
-det_db_unclip_ratio  1.6 # 表示文本框的紧致程度，越小则文本框更靠近文本
-use_polygon_score 1 # 是否使用多边形框计算bbox score，0表示使用矩形框计算。矩形框计算速度更快，多边形框对弯曲文本区域计算更准确。
-det_model_dir  ./inference/det_db # 检测模型inference model地址
+|参数名称|类型|默认参数|意义|
+| --- | --- | --- | --- |
+|use_gpu|bool|false|是否使用GPU|
+|gpu_id|int|0|GPU id，使用GPU时有效|
+|gpu_mem|int|4000|申请的GPU内存|
+|cpu_math_library_num_threads|int|10|CPU预测时的线程数，在机器核数充足的情况下，该值越大，预测速度越快|
+|use_mkldnn|bool|true|是否使用mkldnn库|

-# cls config
-use_angle_cls 0 # 是否使用方向分类器，0表示不使用，1表示使用
-cls_model_dir ./inference/cls # 方向分类器inference model地址
-cls_thresh  0.9 # 方向分类器的得分阈值
+- 检测模型相关

-# rec config
-rec_model_dir  ./inference/rec_crnn # 识别模型inference model地址
-char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # 字典文件
+|参数名称|类型|默认参数|意义|
+| --- | --- | --- | --- |
+|det_model_dir|string|-|检测模型inference model地址|
+|max_side_len|int|960|输入图像长宽大于960时，等比例缩放图像，使得图像最长边为960|
+|det_db_thresh|float|0.3|用于过滤DB预测的二值化图像，设置为0.-0.3对结果影响不明显|
+|det_db_box_thresh|float|0.5|DB后处理过滤box的阈值，如果检测存在漏框情况，可酌情减小|
+|det_db_unclip_ratio|float|1.6|表示文本框的紧致程度，越小则文本框更靠近文本|
+|use_polygon_score|bool|false|是否使用多边形框计算bbox score，false表示使用矩形框计算。矩形框计算速度更快，多边形框对弯曲文本区域计算更准确。|
+|visualize|bool|true|是否对结果进行可视化，为1时，会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。|
+
+- 方向分类器相关
+
+|参数名称|类型|默认参数|意义|
+| --- | --- | --- | --- |
+|use_angle_cls|bool|false|是否使用方向分类器|
+|cls_model_dir|string|-|方向分类器inference model地址|
+|cls_thresh|float|0.9|方向分类器的得分阈值|
+
+- 识别模型相关
+
+|参数名称|类型|默认参数|意义|
+| --- | --- | --- | --- |
+|rec_model_dir|string|-|识别模型inference model地址|
+|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|

-# show the detection results
-visualize 1 # 是否对结果进行可视化，为1时，会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。
-```

-* PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`tools/config.txt`中的`char_list_file`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。
+* PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`char_list_file`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。

 最终屏幕上会输出检测结果如下。


--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -162,30 +162,13 @@ inference/
 sh tools/build.sh
 ```

-Specifically, the content in `tools/build.sh` is as follows.
+Specifically, you should modify the paths in `tools/build.sh`. The related content is as follows.

 ```shell
 OPENCV_DIR=your_opencv_dir
 LIB_DIR=your_paddle_inference_dir
 CUDA_LIB_DIR=your_cuda_lib_dir
 CUDNN_LIB_DIR=your_cudnn_lib_dir
-
-BUILD_DIR=build
-rm -rf ${BUILD_DIR}
-mkdir ${BUILD_DIR}
-cd ${BUILD_DIR}
-cmake .. \
-    -DPADDLE_LIB=${LIB_DIR} \
-    -DWITH_MKL=ON \
-    -DDEMO_NAME=ocr_system \
-    -DWITH_GPU=OFF \
-    -DWITH_STATIC_LIB=OFF \
-    -DUSE_TENSORRT=OFF \
-    -DOPENCV_DIR=${OPENCV_DIR} \
-    -DCUDNN_LIB=${CUDNN_LIB_DIR} \
-    -DCUDA_LIB=${CUDA_LIB_DIR} \
-
-make -j
 ```

 `OPENCV_DIR` is the opencv installation path; `LIB_DIR` is the download (`paddle_inference` folder)
@@ -193,48 +176,84 @@ or the generated Paddle inference library path (`build/paddle_inference_install_
 `CUDA_LIB_DIR` is the cuda library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cudnn library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`.


-* After the compilation is completed, an executable file named `ocr_system` will be generated in the `build` folder.
+* After the compilation is completed, an executable file named `ppocr` will be generated in the `build` folder.


 ### Run the demo
-* Execute the following command to complete the OCR recognition and detection of an image.
+Execute the built executable file:  
+```shell
+./build/ppocr <mode> [--param1] [--param2] [...]
+```  
+Here, `mode` is a required parameter，and the value range is ['det', 'rec', 'system'], representing using detection only, using recognition only and using the end-to-end system respectively. Specifically,

+##### 1. run det demo:
+```shell
+./build/ppocr det \
+    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+    --image_dir=../../doc/imgs/12.jpg
+```
+##### 2. run rec demo:
+```shell
+./build/ppocr rec \
+    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+    --image_dir=../../doc/imgs_words/ch/
+```
+##### 3. run system demo:
 ```shell
-sh tools/run.sh
+# without text direction classifier
+./build/ppocr system \
+    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+    --image_dir=../../doc/imgs/12.jpg
+# with text direction classifier
+./build/ppocr system \
+    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+    --use_angle_cls=true \
+    --cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \
+    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+    --image_dir=../../doc/imgs/12.jpg
 ```

-* If you want to orientation classifier to correct the detected boxes, you can set `use_angle_cls` in the file `tools/config.txt` as 1 to enable the function.
-* What's more, Parameters and their meanings in `tools/config.txt` are as follows.
+More parameters are as follows,  

+- common parameters

-```
-use_gpu  0 # Whether to use GPU, 0 means not to use, 1 means to use
-gpu_id  0 # GPU id when use_gpu is 1
-gpu_mem  4000  # GPU memory requested
-cpu_math_library_num_threads  10 # Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed
-use_mkldnn 1 # Whether to use mkdlnn library
+|parameter|data type|default|meaning|
+| --- | --- | --- | --- |
+|use_gpu|bool|false|Whether to use GPU|
+|gpu_id|int|0|GPU id when use_gpu is true|
+|gpu_mem|int|4000|GPU memory requested|
+|cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed|
+|use_mkldnn|bool|true|Whether to use mkdlnn library|

-max_side_len  960 #  Limit the maximum image height and width to 960
-det_db_thresh  0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result
-det_db_box_thresh  0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
-det_db_unclip_ratio  1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
-use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.
-det_model_dir  ./inference/det_db # Address of detection inference model
+- detection related parameters

-# cls config
-use_angle_cls 0 # Whether to use the direction classifier, 0 means not to use, 1 means to use
-cls_model_dir ./inference/cls # Address of direction classifier inference model
-cls_thresh  0.9 # Score threshold of the  direction classifier
+|parameter|data type|default|meaning|
+| --- | --- | --- | --- |
+|det_model_dir|string|-|Address of detection inference model|
+|max_side_len|int|960|Limit the maximum image height and width to 960|
+|det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result|
+|det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate|
+|det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text|
+|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.|
+|visualize|bool|true|Whether to visualize the results，when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.|

-# rec config
-rec_model_dir  ./inference/rec_crnn # Address of recognition inference model
-char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # dictionary file
+- classifier related parameters

-# show the detection results
-visualize 1 # Whether to visualize the results，when it is set as 1, The prediction result will be save in the image file `./ocr_vis.png`.
-```
+|parameter|data type|default|meaning|
+| --- | --- | --- | --- |
+|use_angle_cls|bool|false|Whether to use the direction classifier|
+|cls_model_dir|string|-|Address of direction classifier inference model|
+|cls_thresh|float|0.9|Score threshold of the  direction classifier|
+
+- recogniton related parameters
+
+|parameter|data type|default|meaning|
+| --- | --- | --- | --- |
+|rec_model_dir|string|-|Address of recognition inference model|
+|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file|

-* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir` in file `tools/config.txt`.
+* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir`.


 The detection results will be shown on the screen, which is as follows.

--- a/deploy/cpp_infer/src/config.cpp
+++ b/deploy/cpp_infer/src/config.cpp
-// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <include/config.h>
-
-namespace PaddleOCR {
-
-std::vector<std::string> OCRConfig::split(const std::string &str,
-                                          const std::string &delim) {
-  std::vector<std::string> res;
-  if ("" == str)
-    return res;
-
-  int strlen = str.length() + 1;
-  chars *strs = new char[strlen];
-  std::strcpy(strs, str.c_str());
-
-  int delimlen = delim.length() + 1;
-  char *d = new char[delimlen];
-  std::strcpy(d, delim.c_str());
-
-  delete[] strs;
-  delete[] d;
-
-  char *p = std::strtok(strs, d);
-  while (p) {
-    std::string s = p;
-    res.push_back(s);
-    p = std::strtok(NULL, d);
-  }
-
-  return res;
-}
-
-std::map<std::string, std::string>
-OCRConfig::LoadConfig(const std::string &config_path) {
-  auto config = Utility::ReadDict(config_path);
-
-  std::map<std::string, std::string> dict;
-  for (int i = 0; i < config.size(); i++) {
-    // pass for empty line or comment
-    if (config[i].size() <= 1 || config[i][0] == '#') {
-      continue;
-    }
-    std::vector<std::string> res = split(config[i], " ");
-    dict[res[0]] = res[1];
-  }
-  return dict;
-}
-
-void OCRConfig::PrintConfigInfo() {
-  std::cout << "=======Paddle OCR inference config======" << std::endl;
-  for (auto iter = config_map_.begin(); iter != config_map_.end(); iter++) {
-    std::cout << iter->first << " : " << iter->second << std::endl;
-  }
-  std::cout << "=======End of Paddle OCR inference config======" << std::endl;
-}
-
-} // namespace PaddleOCR
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -28,76 +28,255 @@
 #include <numeric>

 #include <glog/logging.h>
-#include <include/config.h>
 #include <include/ocr_det.h>
+#include <include/ocr_cls.h>
 #include <include/ocr_rec.h>
 #include <include/utility.h>
 #include <sys/stat.h>

+#include <gflags/gflags.h>
+
+DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
+DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
+DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
+DEFINE_int32(cpu_threads, 10, "Num of threads with CPU.");
+DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
+DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
+DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
+DEFINE_bool(benchmark, true, "Whether use benchmark.");
+DEFINE_string(save_log_path, "./log_output/", "Save benchmark log path.");
+// detection related
+DEFINE_string(image_dir, "", "Dir of input image.");
+DEFINE_string(det_model_dir, "", "Path of det inference model.");
+DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
+DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
+DEFINE_double(det_db_box_thresh, 0.5, "Threshold of det_db_box_thresh.");
+DEFINE_double(det_db_unclip_ratio, 1.6, "Threshold of det_db_unclip_ratio.");
+DEFINE_bool(use_polygon_score, false, "Whether use polygon score.");
+DEFINE_bool(visualize, true, "Whether show the detection results.");
+// classification related
+DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
+DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
+DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
+// recognition related
+DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
+DEFINE_int32(rec_batch_num, 1, "rec_batch_num.");
+DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary.");
+
+
 using namespace std;
 using namespace cv;
 using namespace PaddleOCR;

+
+static bool PathExists(const std::string& path){
+#ifdef _WIN32
+  struct _stat buffer;
+  return (_stat(path.c_str(), &buffer) == 0);
+#else
+  struct stat buffer;
+  return (stat(path.c_str(), &buffer) == 0);
+#endif  // !_WIN32
+}
+
+
+int main_det(std::vector<cv::String> cv_all_img_names) {
+    std::vector<double> time_info = {0, 0, 0};
+    DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
+                   FLAGS_gpu_mem, FLAGS_cpu_threads, 
+                   FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
+                   FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
+                   FLAGS_use_polygon_score, FLAGS_visualize,
+                   FLAGS_use_tensorrt, FLAGS_precision);
+    
+    for (int i = 0; i < cv_all_img_names.size(); ++i) {
+      LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+
+      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+      if (!srcimg.data) {
+        std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
+        exit(1);
+      }
+      std::vector<std::vector<std::vector<int>>> boxes;
+      std::vector<double> det_times;
+
+      det.Run(srcimg, boxes, &det_times);
+  
+      time_info[0] += det_times[0];
+      time_info[1] += det_times[1];
+      time_info[2] += det_times[2];
+    }
+    
+    if (FLAGS_benchmark) {
+        AutoLogger autolog("ocr_det", 
+                           FLAGS_use_gpu,
+                           FLAGS_use_tensorrt,
+                           FLAGS_enable_mkldnn,
+                           FLAGS_cpu_threads,
+                           1, 
+                           "dynamic", 
+                           FLAGS_precision, 
+                           time_info, 
+                           cv_all_img_names.size());
+        autolog.report();
+    }
+    return 0;
+}
+
+
+int main_rec(std::vector<cv::String> cv_all_img_names) {
+    std::vector<double> time_info = {0, 0, 0};
+    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
+                       FLAGS_gpu_mem, FLAGS_cpu_threads,
+                       FLAGS_enable_mkldnn, FLAGS_char_list_file,
+                       FLAGS_use_tensorrt, FLAGS_precision);
+
+    for (int i = 0; i < cv_all_img_names.size(); ++i) {
+      LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+
+      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+      if (!srcimg.data) {
+        std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
+        exit(1);
+      }
+
+      std::vector<double> rec_times;
+      rec.Run(srcimg, &rec_times);
+        
+      time_info[0] += rec_times[0];
+      time_info[1] += rec_times[1];
+      time_info[2] += rec_times[2];
+    }
+    
+    return 0;
+}
+
+
+int main_system(std::vector<cv::String> cv_all_img_names) {
+    DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
+                   FLAGS_gpu_mem, FLAGS_cpu_threads, 
+                   FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
+                   FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
+                   FLAGS_use_polygon_score, FLAGS_visualize,
+                   FLAGS_use_tensorrt, FLAGS_precision);
+
+    Classifier *cls = nullptr;
+    if (FLAGS_use_angle_cls) {
+      cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
+                           FLAGS_gpu_mem, FLAGS_cpu_threads,
+                           FLAGS_enable_mkldnn, FLAGS_cls_thresh,
+                           FLAGS_use_tensorrt, FLAGS_precision);
+    }
+
+    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
+                       FLAGS_gpu_mem, FLAGS_cpu_threads,
+                       FLAGS_enable_mkldnn, FLAGS_char_list_file,
+                       FLAGS_use_tensorrt, FLAGS_precision);
+
+    auto start = std::chrono::system_clock::now();
+
+    for (int i = 0; i < cv_all_img_names.size(); ++i) {
+      LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+
+      cv::Mat srcimg = cv::imread(FLAGS_image_dir, cv::IMREAD_COLOR);
+      if (!srcimg.data) {
+        std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
+        exit(1);
+      }
+      std::vector<std::vector<std::vector<int>>> boxes;
+      std::vector<double> det_times;
+      std::vector<double> rec_times;
+        
+      det.Run(srcimg, boxes, &det_times);
+    
+      cv::Mat crop_img;
+      for (int j = 0; j < boxes.size(); j++) {
+        crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
+
+        if (cls != nullptr) {
+          crop_img = cls->Run(crop_img);
+        }
+        rec.Run(crop_img, &rec_times);
+      }
+        
+      auto end = std::chrono::system_clock::now();
+      auto duration =
+          std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+      std::cout << "Cost  "
+                << double(duration.count()) *
+                       std::chrono::microseconds::period::num /
+                       std::chrono::microseconds::period::den
+                << "s" << std::endl;
+    }
+      
+    return 0;
+}
+
+
+void check_params(char* mode) {
+    if (strcmp(mode, "det")==0) {
+        if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) {
+            std::cout << "Usage[det]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
+                      << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;      
+            exit(1);      
+        }
+    }
+    if (strcmp(mode, "rec")==0) {
+        if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
+            std::cout << "Usage[rec]: ./ppocr --rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
+                      << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;      
+            exit(1);
+        }
+    }
+    if (strcmp(mode, "system")==0) {
+        if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) ||
+           (FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) {
+            std::cout << "Usage[system without angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
+                        << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
+                        << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
+            std::cout << "Usage[system with angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
+                        << "--use_angle_cls=true "
+                        << "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ "
+                        << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
+                        << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
+            exit(1);      
+        }
+    }
+    if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && FLAGS_precision != "int8") {
+        cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl;
+        exit(1);
+    }
+}
+
+
 int main(int argc, char **argv) {
-  if (argc < 3) {
-    std::cerr << "[ERROR] usage: " << argv[0]
-              << " configure_filepath image_path\n";
-    exit(1);
-  }
-
-  OCRConfig config(argv[1]);
-
-  config.PrintConfigInfo();
-
-  std::string img_path(argv[2]);
-  std::vector<std::string> all_img_names;
-  Utility::GetAllFiles((char *)img_path.c_str(), all_img_names);
-
-  DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
-                 config.gpu_mem, config.cpu_math_library_num_threads,
-                 config.use_mkldnn, config.max_side_len, config.det_db_thresh,
-                 config.det_db_box_thresh, config.det_db_unclip_ratio,
-                 config.use_polygon_score, config.visualize,
-                 config.use_tensorrt, config.use_fp16);
-
-  Classifier *cls = nullptr;
-  if (config.use_angle_cls == true) {
-    cls = new Classifier(config.cls_model_dir, config.use_gpu, config.gpu_id,
-                         config.gpu_mem, config.cpu_math_library_num_threads,
-                         config.use_mkldnn, config.cls_thresh,
-                         config.use_tensorrt, config.use_fp16);
-  }
-
-  CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
-                     config.gpu_mem, config.cpu_math_library_num_threads,
-                     config.use_mkldnn, config.char_list_file,
-                     config.use_tensorrt, config.use_fp16);
-
-  auto start = std::chrono::system_clock::now();
-
-  for (auto img_dir : all_img_names) {
-    LOG(INFO) << "The predict img: " << img_dir;
-
-    cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR);
-    if (!srcimg.data) {
-      std::cerr << "[ERROR] image read failed! image path: " << img_path
-                << "\n";
-      exit(1);
+    if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) {
+        std::cout << "Please choose one mode of [det, rec, system] !" << std::endl;
+        return -1;
+    }
+    std::cout << "mode: " << argv[1] << endl;
+
+    // Parsing command-line
+    google::ParseCommandLineFlags(&argc, &argv, true);
+    check_params(argv[1]);
+        
+    if (!PathExists(FLAGS_image_dir)) {
+        std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir << endl;
+        exit(1);      
    }
-    std::vector<std::vector<std::vector<int>>> boxes;
-
-    det.Run(srcimg, boxes);
-
-    rec.Run(boxes, srcimg, cls);
-    auto end = std::chrono::system_clock::now();
-    auto duration =
-        std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-    std::cout << "Cost  "
-              << double(duration.count()) *
-                     std::chrono::microseconds::period::num /
-                     std::chrono::microseconds::period::den
-              << "s" << std::endl;
-  }
-
-  return 0;
+    
+    std::vector<cv::String> cv_all_img_names;
+    cv::glob(FLAGS_image_dir, cv_all_img_names);
+    std::cout << "total images num: " << cv_all_img_names.size() << endl;
+    
+    if (strcmp(argv[1], "det")==0) {
+        return main_det(cv_all_img_names);
+    }
+    if (strcmp(argv[1], "rec")==0) {
+        return main_rec(cv_all_img_names);
+    }    
+    if (strcmp(argv[1], "system")==0) {
+        return main_system(cv_all_img_names);
+    } 
+
 }
--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -77,10 +77,16 @@ void Classifier::LoadModel(const std::string &model_dir) {
  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
    if (this->use_tensorrt_) {
+      auto precision = paddle_infer::Config::Precision::kFloat32;
+      if (this->precision_ == "fp16") {
+        precision = paddle_infer::Config::Precision::kHalf;
+      }
+     if (this->precision_ == "int8") {
+        precision = paddle_infer::Config::Precision::kInt8;
+      } 
      config.EnableTensorRtEngine(
          1 << 20, 10, 3,
-          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
-                          : paddle_infer::Config::Precision::kFloat32,
+          precision,
          false, false);
    }
  } else {

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -14,6 +14,7 @@

 #include <include/ocr_det.h>

+
 namespace PaddleOCR {

 void DBDetector::LoadModel(const std::string &model_dir) {
@@ -25,10 +26,16 @@ void DBDetector::LoadModel(const std::string &model_dir) {
  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
    if (this->use_tensorrt_) {
+      auto precision = paddle_infer::Config::Precision::kFloat32;
+      if (this->precision_ == "fp16") {
+        precision = paddle_infer::Config::Precision::kHalf;
+      }
+     if (this->precision_ == "int8") {
+        precision = paddle_infer::Config::Precision::kInt8;
+      } 
      config.EnableTensorRtEngine(
          1 << 20, 10, 3,
-          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
-                          : paddle_infer::Config::Precision::kFloat32,
+          precision,
          false, false);
      std::map<std::string, std::vector<int>> min_input_shape = {
          {"x", {1, 3, 50, 50}},
@@ -90,13 +97,16 @@ void DBDetector::LoadModel(const std::string &model_dir) {
 }

 void DBDetector::Run(cv::Mat &img,
-                     std::vector<std::vector<std::vector<int>>> &boxes) {
+                     std::vector<std::vector<std::vector<int>>> &boxes,
+                     std::vector<double> *times) {
  float ratio_h{};
  float ratio_w{};

  cv::Mat srcimg;
  cv::Mat resize_img;
  img.copyTo(srcimg);
+  
+  auto preprocess_start = std::chrono::steady_clock::now();
  this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
                       this->use_tensorrt_);

@@ -105,14 +115,17 @@ void DBDetector::Run(cv::Mat &img,

  std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
  this->permute_op_.Run(&resize_img, input.data());
-
+  auto preprocess_end = std::chrono::steady_clock::now();
+    
  // Inference.
  auto input_names = this->predictor_->GetInputNames();
  auto input_t = this->predictor_->GetInputHandle(input_names[0]);
  input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+  auto inference_start = std::chrono::steady_clock::now();
  input_t->CopyFromCpu(input.data());
+  
  this->predictor_->Run();
-
+    
  std::vector<float> out_data;
  auto output_names = this->predictor_->GetOutputNames();
  auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
@@ -122,7 +135,9 @@ void DBDetector::Run(cv::Mat &img,

  out_data.resize(out_num);
  output_t->CopyToCpu(out_data.data());
-
+  auto inference_end = std::chrono::steady_clock::now();
+  
+  auto postprocess_start = std::chrono::steady_clock::now();
  int n2 = output_shape[2];
  int n3 = output_shape[3];
  int n = n2 * n3;
@@ -150,7 +165,16 @@ void DBDetector::Run(cv::Mat &img,
      this->det_db_unclip_ratio_, this->use_polygon_score_);

  boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
-
+  auto postprocess_end = std::chrono::steady_clock::now();
+  std::cout << "Detected boxes num: " << boxes.size() << endl;
+
+  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
+  times->push_back(double(preprocess_diff.count() * 1000));
+  std::chrono::duration<float> inference_diff = inference_end - inference_start;
+  times->push_back(double(inference_diff.count() * 1000));
+  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
+  times->push_back(double(postprocess_diff.count() * 1000));
+    
  //// visualization
  if (this->visualize_) {
    Utility::VisualizeBboxes(srcimg, boxes);

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -16,80 +16,80 @@

 namespace PaddleOCR {

-void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
-                         cv::Mat &img, Classifier *cls) {
+void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) {
  cv::Mat srcimg;
  img.copyTo(srcimg);
-  cv::Mat crop_img;
  cv::Mat resize_img;

-  std::cout << "The predicted text is :" << std::endl;
-  int index = 0;
-  for (int i = 0; i < boxes.size(); i++) {
-    crop_img = GetRotateCropImage(srcimg, boxes[i]);
-
-    if (cls != nullptr) {
-      crop_img = cls->Run(crop_img);
-    }
-
-    float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
-
-    this->resize_op_.Run(crop_img, resize_img, wh_ratio, this->use_tensorrt_);
-
-    this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
-                            this->is_scale_);
-
-    std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
-
-    this->permute_op_.Run(&resize_img, input.data());
-
-    // Inference.
-    auto input_names = this->predictor_->GetInputNames();
-    auto input_t = this->predictor_->GetInputHandle(input_names[0]);
-    input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
-    input_t->CopyFromCpu(input.data());
-    this->predictor_->Run();
-
-    std::vector<float> predict_batch;
-    auto output_names = this->predictor_->GetOutputNames();
-    auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
-    auto predict_shape = output_t->shape();
-
-    int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1,
-                                  std::multiplies<int>());
-    predict_batch.resize(out_num);
-
-    output_t->CopyToCpu(predict_batch.data());
-
-    // ctc decode
-    std::vector<std::string> str_res;
-    int argmax_idx;
-    int last_index = 0;
-    float score = 0.f;
-    int count = 0;
-    float max_value = 0.0f;
-
-    for (int n = 0; n < predict_shape[1]; n++) {
-      argmax_idx =
-          int(Utility::argmax(&predict_batch[n * predict_shape[2]],
-                              &predict_batch[(n + 1) * predict_shape[2]]));
-      max_value =
-          float(*std::max_element(&predict_batch[n * predict_shape[2]],
-                                  &predict_batch[(n + 1) * predict_shape[2]]));
-
-      if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
-        score += max_value;
-        count += 1;
-        str_res.push_back(label_list_[argmax_idx]);
-      }
-      last_index = argmax_idx;
+  float wh_ratio = float(srcimg.cols) / float(srcimg.rows);
+  auto preprocess_start = std::chrono::steady_clock::now();
+  this->resize_op_.Run(srcimg, resize_img, wh_ratio, this->use_tensorrt_);
+
+  this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
+                          this->is_scale_);
+
+  std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+
+  this->permute_op_.Run(&resize_img, input.data());
+  auto preprocess_end = std::chrono::steady_clock::now();
+
+  // Inference.
+  auto input_names = this->predictor_->GetInputNames();
+  auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+  input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+  auto inference_start = std::chrono::steady_clock::now();
+  input_t->CopyFromCpu(input.data());
+  this->predictor_->Run();
+
+  std::vector<float> predict_batch;
+  auto output_names = this->predictor_->GetOutputNames();
+  auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
+  auto predict_shape = output_t->shape();
+
+  int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1,
+                                std::multiplies<int>());
+  predict_batch.resize(out_num);
+
+  output_t->CopyToCpu(predict_batch.data());
+  auto inference_end = std::chrono::steady_clock::now();
+
+  // ctc decode
+  auto postprocess_start = std::chrono::steady_clock::now();
+  std::vector<std::string> str_res;
+  int argmax_idx;
+  int last_index = 0;
+  float score = 0.f;
+  int count = 0;
+  float max_value = 0.0f;
+
+  for (int n = 0; n < predict_shape[1]; n++) {
+    argmax_idx =
+        int(Utility::argmax(&predict_batch[n * predict_shape[2]],
+                            &predict_batch[(n + 1) * predict_shape[2]]));
+    max_value =
+        float(*std::max_element(&predict_batch[n * predict_shape[2]],
+                                &predict_batch[(n + 1) * predict_shape[2]]));
+
+    if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
+      score += max_value;
+      count += 1;
+      str_res.push_back(label_list_[argmax_idx]);
    }
-    score /= count;
-    for (int i = 0; i < str_res.size(); i++) {
-      std::cout << str_res[i];
-    }
-    std::cout << "\tscore: " << score << std::endl;
+    last_index = argmax_idx;
+  }
+  auto postprocess_end = std::chrono::steady_clock::now();
+  score /= count;
+  for (int i = 0; i < str_res.size(); i++) {
+    std::cout << str_res[i];
  }
+  std::cout << "\tscore: " << score << std::endl;
+
+  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
+  times->push_back(double(preprocess_diff.count() * 1000));
+  std::chrono::duration<float> inference_diff = inference_end - inference_start;
+  times->push_back(double(inference_diff.count() * 1000));
+  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
+  times->push_back(double(postprocess_diff.count() * 1000));
 }

 void CRNNRecognizer::LoadModel(const std::string &model_dir) {
@@ -101,10 +101,16 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
    if (this->use_tensorrt_) {
+      auto precision = paddle_infer::Config::Precision::kFloat32;
+      if (this->precision_ == "fp16") {
+        precision = paddle_infer::Config::Precision::kHalf;
+      }
+     if (this->precision_ == "int8") {
+        precision = paddle_infer::Config::Precision::kInt8;
+      } 
      config.EnableTensorRtEngine(
          1 << 20, 10, 3,
-          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
-                          : paddle_infer::Config::Precision::kFloat32,
+          precision,
          false, false);
      std::map<std::string, std::vector<int>> min_input_shape = {
          {"x", {1, 3, 32, 10}}};
@@ -138,59 +144,4 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  this->predictor_ = CreatePredictor(config);
 }

-cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage,
-                                           std::vector<std::vector<int>> box) {
-  cv::Mat image;
-  srcimage.copyTo(image);
-  std::vector<std::vector<int>> points = box;
-
-  int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
-  int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
-  int left = int(*std::min_element(x_collect, x_collect + 4));
-  int right = int(*std::max_element(x_collect, x_collect + 4));
-  int top = int(*std::min_element(y_collect, y_collect + 4));
-  int bottom = int(*std::max_element(y_collect, y_collect + 4));
-
-  cv::Mat img_crop;
-  image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
-
-  for (int i = 0; i < points.size(); i++) {
-    points[i][0] -= left;
-    points[i][1] -= top;
-  }
-
-  int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
-                                pow(points[0][1] - points[1][1], 2)));
-  int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
-                                 pow(points[0][1] - points[3][1], 2)));
-
-  cv::Point2f pts_std[4];
-  pts_std[0] = cv::Point2f(0., 0.);
-  pts_std[1] = cv::Point2f(img_crop_width, 0.);
-  pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
-  pts_std[3] = cv::Point2f(0.f, img_crop_height);
-
-  cv::Point2f pointsf[4];
-  pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
-  pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
-  pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
-  pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
-
-  cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
-
-  cv::Mat dst_img;
-  cv::warpPerspective(img_crop, dst_img, M,
-                      cv::Size(img_crop_width, img_crop_height),
-                      cv::BORDER_REPLICATE);
-
-  if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
-    cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
-    cv::transpose(dst_img, srcCopy);
-    cv::flip(srcCopy, srcCopy, 0);
-    return srcCopy;
-  } else {
-    return dst_img;
-  }
-}
-
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include <include/postprocess_op.h>
+#include <include/clipper.cpp>

 namespace PaddleOCR {


--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
@@ -92,4 +92,59 @@ void Utility::GetAllFiles(const char *dir_name,
  }
 }

+cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
+                            std::vector<std::vector<int>> box) {
+  cv::Mat image;
+  srcimage.copyTo(image);
+  std::vector<std::vector<int>> points = box;
+
+  int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
+  int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+
+  cv::Mat img_crop;
+  image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
+
+  for (int i = 0; i < points.size(); i++) {
+    points[i][0] -= left;
+    points[i][1] -= top;
+  }
+
+  int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
+                                pow(points[0][1] - points[1][1], 2)));
+  int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
+                                 pow(points[0][1] - points[3][1], 2)));
+
+  cv::Point2f pts_std[4];
+  pts_std[0] = cv::Point2f(0., 0.);
+  pts_std[1] = cv::Point2f(img_crop_width, 0.);
+  pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
+  pts_std[3] = cv::Point2f(0.f, img_crop_height);
+
+  cv::Point2f pointsf[4];
+  pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
+  pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
+  pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
+  pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
+
+  cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
+
+  cv::Mat dst_img;
+  cv::warpPerspective(img_crop, dst_img, M,
+                      cv::Size(img_crop_width, img_crop_height),
+                      cv::BORDER_REPLICATE);
+
+  if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
+    cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
+    cv::transpose(dst_img, srcCopy);
+    cv::flip(srcCopy, srcCopy, 0);
+    return srcCopy;
+  } else {
+    return dst_img;
+  }
+}
+
 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
-# model load config
-use_gpu 0
-gpu_id  0
-gpu_mem  4000
-cpu_math_library_num_threads  10
-use_mkldnn 0
-
-# det config
-max_side_len  960
-det_db_thresh  0.3
-det_db_box_thresh  0.5
-det_db_unclip_ratio  1.6
-use_polygon_score 1
-det_model_dir  ./inference/ch_ppocr_mobile_v2.0_det_infer/
-
-# cls config
-use_angle_cls 0
-cls_model_dir  ./inference/ch_ppocr_mobile_v2.0_cls_infer/
-cls_thresh  0.9
-
-# rec config
-rec_model_dir  ./inference/ch_ppocr_mobile_v2.0_rec_infer/
-char_list_file  ../../ppocr/utils/ppocr_keys_v1.txt
-
-# show the detection results
-visualize 0
-
-# use_tensorrt
-use_tensorrt 0
-use_fp16   0
-
--- a/deploy/cpp_infer/tools/run.sh
+++ b/deploy/cpp_infer/tools/run.sh
-
-./build/ocr_system ./tools/config.txt ../../doc/imgs/12.jpg
--- a/deploy/slim/prune/sensitivity_anal.py
+++ b/deploy/slim/prune/sensitivity_anal.py
@@ -75,7 +75,7 @@ def main(config, device, logger, vdl_writer):
    model = build_model(config['Architecture'])

    flops = paddle.flops(model, [1, 3, 640, 640])
-    logger.info(f"FLOPs before pruning: {flops}")
+    logger.info("FLOPs before pruning: {}".format(flops))

    from paddleslim.dygraph import FPGMFilterPruner
    model.train()
@@ -106,8 +106,8 @@ def main(config, device, logger, vdl_writer):

    def eval_fn():
        metric = program.eval(model, valid_dataloader, post_process_class,
-                              eval_class)
-        logger.info(f"metric['hmean']: {metric['hmean']}")
+                              eval_class, False)
+        logger.info("metric['hmean']: {}".format(metric['hmean']))
        return metric['hmean']

    params_sensitive = pruner.sensitive(
@@ -123,16 +123,17 @@ def main(config, device, logger, vdl_writer):
    # calculate pruned params's ratio
    params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02)
    for key in params_sensitive.keys():
-        logger.info(f"{key}, {params_sensitive[key]}")
+        logger.info("{}, {}".format(key, params_sensitive[key]))
+
+    #params_sensitive = {}
+    #for param in model.parameters():
+    #    if 'transpose' not in param.name and 'linear' not in param.name:
+    #        params_sensitive[param.name] = 0.1  

    plan = pruner.prune_vars(params_sensitive, [0])
-    for param in model.parameters():
-        if ("weights" in param.name and "conv" in param.name) or (
-                "w_0" in param.name and "conv2d" in param.name):
-            logger.info(f"{param.name}: {param.shape}")

    flops = paddle.flops(model, [1, 3, 640, 640])
-    logger.info(f"FLOPs after pruning: {flops}")
+    logger.info("FLOPs after pruning: {}".format(flops))

    # start train


--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -44,6 +44,7 @@ PaddleOCR基于动态图开源的文本识别算法列表：
 - [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
 - [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
 - [x]  SRN([paper](https://arxiv.org/abs/2003.12294))[5]
+- [x]  NRTR([paper](https://arxiv.org/abs/1806.00926v2))

 参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程，使用MJSynth和SynthText两个文字识别数据集训练，在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估，算法效果如下：

@@ -58,6 +59,7 @@ PaddleOCR基于动态图开源的文本识别算法列表：
 |RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
 |RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
 |SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar) |
+|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |


 PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -185,11 +185,11 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs
 <a name="数据增强"></a>
 #### 2.1 数据增强

-PaddleOCR提供了多种数据增强方式，如果您希望在训练时加入扰动，请在配置文件中设置 `distort: true`。
+PaddleOCR提供了多种数据增强方式，默认配置文件中已经添加了数据增广。

-默认的扰动方式有：颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse)。
+默认的扰动方式有：颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse)、TIA数据增广。

-训练过程中每种扰动方式以50%的概率被选择，具体代码实现请参考：[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+训练过程中每种扰动方式以40%的概率被选择，具体代码实现请参考：[rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)

 *由于OpenCV的兼容性问题，扰动操作暂时只支持Linux*

@@ -215,6 +215,7 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t
 | rec_mv3_tps_bilstm_att.yml |  CRNN |   Mobilenet_v3 |  TPS   |  BiLSTM |  att  |
 | rec_r34_vd_tps_bilstm_att.yml |  CRNN |   Resnet34_vd |  TPS   |  BiLSTM |  att  |
 | rec_r50fpn_vd_none_srn.yml    | SRN | Resnet50_fpn_vd    | None    | rnn | srn |
+| rec_mtb_nrtr.yml    | NRTR | nrtr_mtb    | None    | transformer encoder | transformer decoder |

 训练中文数据，推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)，如您希望尝试其他算法在中文数据集上的效果，请参考下列说明修改配置文件：


--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -46,6 +46,7 @@ PaddleOCR open-source text recognition algorithms list:
 - [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
 - [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
 - [x]  SRN([paper](https://arxiv.org/abs/2003.12294))[5]
+- [x]  NRTR([paper](https://arxiv.org/abs/1806.00926v2))

 Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:

@@ -60,5 +61,6 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
 |RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
 |RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
 |SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar)|
+|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |

 Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md)
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -177,11 +177,11 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs
 <a name="Data_Augmentation"></a>
 #### 2.1 Data Augmentation

-PaddleOCR provides a variety of data augmentation methods. If you want to add disturbance during training, please set `distort: true` in the configuration file.
+PaddleOCR provides a variety of data augmentation methods. All the augmentation methods are enabled by default.

-The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse.
+The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse, TIA augmentation.

-Each disturbance method is selected with a 50% probability during the training process. For specific code implementation, please refer to: [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+Each disturbance method is selected with a 40% probability during the training process. For specific code implementation, please refer to: [rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)

 <a name="Training"></a>
 #### 2.2 Training
@@ -207,7 +207,7 @@ If the evaluation set is large, the test will be time-consuming. It is recommend
 | rec_mv3_tps_bilstm_att.yml |  CRNN |   Mobilenet_v3 |  TPS   |  BiLSTM |  att  |
 | rec_r34_vd_tps_bilstm_att.yml |  CRNN |   Resnet34_vd |  TPS   |  BiLSTM |  att  |
 | rec_r50fpn_vd_none_srn.yml    | SRN | Resnet50_fpn_vd    | None    | rnn | srn |
-
+| rec_mtb_nrtr.yml    | NRTR | nrtr_mtb    | None    | transformer encoder | transformer decoder |

 For training Chinese data, it is recommended to use
 [rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file:

--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/doc/table/1.png
+++ b/doc/table/1.png
--- a/doc/table/table.jpg
+++ b/doc/table/table.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -127,7 +127,7 @@ model_urls = {
 }

 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.2'
+VERSION = '2.2.0.1'
 SUPPORT_REC_MODEL = ['CRNN']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")


--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -49,14 +49,12 @@ def term_mp(sig_num, frame):
    os.killpg(pgid, signal.SIGKILL)


-signal.signal(signal.SIGINT, term_mp)
-signal.signal(signal.SIGTERM, term_mp)
-
-
 def build_dataloader(config, mode, device, logger, seed=None):
    config = copy.deepcopy(config)

-    support_dict = ['SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet']
+    support_dict = [
+        'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet'
+    ]
    module_name = config[mode]['dataset']['name']
    assert module_name in support_dict, Exception(
        'DataSet only support {}'.format(support_dict))
@@ -96,4 +94,8 @@ def build_dataloader(config, mode, device, logger, seed=None):
        return_list=True,
        use_shared_memory=use_shared_memory)

+    # support exit using ctrl+c
+    signal.signal(signal.SIGINT, term_mp)
+    signal.signal(signal.SIGTERM, term_mp)
+
    return data_loader
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -21,7 +21,7 @@ from .make_border_map import MakeBorderMap
 from .make_shrink_map import MakeShrinkMap
 from .random_crop_data import EastRandomCropData, PSERandomCrop

-from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg
+from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
 from .operators import *

--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -161,6 +161,34 @@ class BaseRecLabelEncode(object):
        return text_list


+class NRTRLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='EN_symbol',
+                 use_space_char=False,
+                 **kwargs):
+
+        super(NRTRLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        data['length'] = np.array(len(text))
+        text.insert(0, 2)
+        text.append(3)
+        text = text + [0] * (self.max_text_len - len(text))
+        data['label'] = np.array(text)
+        return data
+    def add_special_char(self, dict_character):
+        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        return dict_character
+
 class CTCLabelEncode(BaseRecLabelEncode):
    """ Convert between text-label and text-index """


--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -57,6 +57,38 @@ class DecodeImage(object):
        return data


+class NRTRDecodeImage(object):
+    """ decode image """
+
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+
+        img = cv2.imdecode(img, 1)
+
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+        img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
+
 class NormalizeImage(object):
    """ normalize image such as substract mean, divide std
    """

--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -16,7 +16,7 @@ import math
 import cv2
 import numpy as np
 import random
-
+from PIL import Image
 from .text_image_aug import tia_perspective, tia_stretch, tia_distort


@@ -43,6 +43,25 @@ class ClsResizeImg(object):
        return data


+class NRTRRecResizeImg(object):
+    def __init__(self, image_shape, resize_type, **kwargs):
+        self.image_shape = image_shape
+        self.resize_type = resize_type
+
+    def __call__(self, data):
+        img = data['image']
+        if self.resize_type == 'PIL':
+            image_pil = Image.fromarray(np.uint8(img))
+            img = image_pil.resize(self.image_shape, Image.ANTIALIAS)
+            img = np.array(img)
+        if self.resize_type == 'OpenCV':
+            img = cv2.resize(img, self.image_shape)
+        norm_img = np.expand_dims(img, -1)
+        norm_img = norm_img.transpose((2, 0, 1))
+        data['image'] = norm_img.astype(np.float32) / 128. - 1.
+        return data
+
+
 class RecResizeImg(object):
    def __init__(self,
                 image_shape,

--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -25,7 +25,7 @@ from .det_sast_loss import SASTLoss
 from .rec_ctc_loss import CTCLoss
 from .rec_att_loss import AttentionLoss
 from .rec_srn_loss import SRNLoss
-
+from .rec_nrtr_loss import NRTRLoss
 # cls loss
 from .cls_loss import ClsLoss

@@ -44,8 +44,9 @@ from .table_att_loss import TableAttentionLoss
 def build_loss(config):
    support_dict = [
        'DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', 'AttentionLoss',
-        'SRNLoss', 'PGLoss', 'CombinedLoss', 'TableAttentionLoss'
+        'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', 'TableAttentionLoss'
    ]
+
    config = copy.deepcopy(config)
    module_name = config.pop('name')
    assert module_name in support_dict, Exception('loss only support {}'.format(

--- a/ppocr/losses/cls_loss.py
+++ b/ppocr/losses/cls_loss.py
@@ -25,6 +25,6 @@ class ClsLoss(nn.Layer):
        self.loss_func = nn.CrossEntropyLoss(reduction='mean')

    def forward(self, predicts, batch):
-        label = batch[1]
+        label = batch[1].astype("int64")
        loss = self.loss_func(input=predicts, label=label)
        return {'loss': loss}
--- a/ppocr/losses/rec_nrtr_loss.py
+++ b/ppocr/losses/rec_nrtr_loss.py
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+
+
+class NRTRLoss(nn.Layer):
+    def __init__(self, smoothing=True, **kwargs):
+        super(NRTRLoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
+        self.smoothing = smoothing
+
+    def forward(self, pred, batch):
+        pred = pred.reshape([-1, pred.shape[2]])
+        max_len = batch[2].max()
+        tgt = batch[1][:, 1:2 + max_len]
+        tgt = tgt.reshape([-1])
+        if self.smoothing:
+            eps = 0.1
+            n_class = pred.shape[1]
+            one_hot = F.one_hot(tgt, pred.shape[1])
+            one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
+            log_prb = F.log_softmax(pred, axis=1)
+            non_pad_mask = paddle.not_equal(
+                tgt, paddle.zeros(
+                    tgt.shape, dtype='int64'))
+            loss = -(one_hot * log_prb).sum(axis=1)
+            loss = loss.masked_select(non_pad_mask).mean()
+        else:
+            loss = self.loss_func(pred, tgt)
+        return {'loss': loss}
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -57,3 +57,4 @@ class RecMetric(object):
        self.correct_num = 0
        self.all_num = 0
        self.norm_edit_dis = 0
+        
--- a/ppocr/modeling/architectures/base_model.py
+++ b/ppocr/modeling/architectures/base_model.py
@@ -14,7 +14,6 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-
 from paddle import nn
 from ppocr.modeling.transforms import build_transform
 from ppocr.modeling.backbones import build_backbone

--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -26,8 +26,9 @@ def build_backbone(config, model_type):
        from .rec_resnet_vd import ResNet
        from .rec_resnet_fpn import ResNetFPN
        from .rec_mv1_enhance import MobileNetV1Enhance
+        from .rec_nrtr_mtb import MTB
        support_dict = [
-            "MobileNetV1Enhance", "MobileNetV3", "ResNet", "ResNetFPN"
+            'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB'
        ]
    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet

--- a/ppocr/modeling/backbones/rec_nrtr_mtb.py
+++ b/ppocr/modeling/backbones/rec_nrtr_mtb.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle import nn
+
+
+class MTB(nn.Layer):
+    def __init__(self, cnn_num, in_channels):
+        super(MTB, self).__init__()
+        self.block = nn.Sequential()
+        self.out_channels = in_channels
+        self.cnn_num = cnn_num
+        if self.cnn_num == 2:
+            for i in range(self.cnn_num):
+                self.block.add_sublayer(
+                    'conv_{}'.format(i),
+                    nn.Conv2D(
+                        in_channels=in_channels
+                        if i == 0 else 32 * (2**(i - 1)),
+                        out_channels=32 * (2**i),
+                        kernel_size=3,
+                        stride=2,
+                        padding=1))
+                self.block.add_sublayer('relu_{}'.format(i), nn.ReLU())
+                self.block.add_sublayer('bn_{}'.format(i),
+                                        nn.BatchNorm2D(32 * (2**i)))
+
+    def forward(self, images):
+        x = self.block(images)
+        if self.cnn_num == 2:
+            # (b, w, h, c)
+            x = x.transpose([0, 3, 2, 1])
+            x_shape = x.shape
+            x = x.reshape([x_shape[0], x_shape[1], x_shape[2] * x_shape[3]])
+        return x
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -26,12 +26,14 @@ def build_head(config):
    from .rec_ctc_head import CTCHead
    from .rec_att_head import AttentionHead
    from .rec_srn_head import SRNHead
+    from .rec_nrtr_head import Transformer

    # cls head
    from .cls_head import ClsHead
    support_dict = [
        'DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
-        'SRNHead', 'PGHead', 'TableAttentionHead']
+        'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead'
+    ]

    #table head
    from .table_att_head import TableAttentionHead

--- a/ppocr/modeling/heads/multiheadAttention.py
+++ b/ppocr/modeling/heads/multiheadAttention.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import Linear
+from paddle.nn.initializer import XavierUniform as xavier_uniform_
+from paddle.nn.initializer import Constant as constant_
+from paddle.nn.initializer import XavierNormal as xavier_normal_
+
+zeros_ = constant_(value=0.)
+ones_ = constant_(value=1.)
+
+
+class MultiheadAttention(nn.Layer):
+    """Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+
+    Args:
+        embed_dim: total dimension of the model
+        num_heads: parallel attention layers, or heads
+
+    """
+
+    def __init__(self,
+                 embed_dim,
+                 num_heads,
+                 dropout=0.,
+                 bias=True,
+                 add_bias_kv=False,
+                 add_zero_attn=False):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim**-0.5
+        self.out_proj = Linear(embed_dim, embed_dim, bias_attr=bias)
+        self._reset_parameters()
+        self.conv1 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+        self.conv2 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+        self.conv3 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+
+    def _reset_parameters(self):
+        xavier_uniform_(self.out_proj.weight)
+
+    def forward(self,
+                query,
+                key,
+                value,
+                key_padding_mask=None,
+                incremental_state=None,
+                need_weights=True,
+                static_kv=False,
+                attn_mask=None):
+        """
+        Inputs of forward function
+            query: [target length, batch size, embed dim]
+            key: [sequence length, batch size, embed dim]
+            value: [sequence length, batch size, embed dim]
+            key_padding_mask: if True, mask padding based on batch size
+            incremental_state: if provided, previous time steps are cashed
+            need_weights: output attn_output_weights
+            static_kv: key and value are static
+
+        Outputs of forward function
+            attn_output: [target length, batch size, embed dim]
+            attn_output_weights: [batch size, target length, sequence length]
+        """
+        tgt_len, bsz, embed_dim = query.shape
+        assert embed_dim == self.embed_dim
+        assert list(query.shape) == [tgt_len, bsz, embed_dim]
+        assert key.shape == value.shape
+
+        q = self._in_proj_q(query)
+        k = self._in_proj_k(key)
+        v = self._in_proj_v(value)
+        q *= self.scaling
+
+        q = q.reshape([tgt_len, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+        k = k.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+        v = v.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+
+        src_len = k.shape[1]
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.shape[0] == bsz
+            assert key_padding_mask.shape[1] == src_len
+
+        attn_output_weights = paddle.bmm(q, k.transpose([0, 2, 1]))
+        assert list(attn_output_weights.
+                    shape) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            attn_output_weights += attn_mask
+        if key_padding_mask is not None:
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz, self.num_heads, tgt_len, src_len])
+            key = key_padding_mask.unsqueeze(1).unsqueeze(2).astype('float32')
+            y = paddle.full(shape=key.shape, dtype='float32', fill_value='-inf')
+            y = paddle.where(key == 0., key, y)
+            attn_output_weights += y
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz * self.num_heads, tgt_len, src_len])
+
+        attn_output_weights = F.softmax(
+            attn_output_weights.astype('float32'),
+            axis=-1,
+            dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16
+            else attn_output_weights.dtype)
+        attn_output_weights = F.dropout(
+            attn_output_weights, p=self.dropout, training=self.training)
+
+        attn_output = paddle.bmm(attn_output_weights, v)
+        assert list(attn_output.
+                    shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn_output = attn_output.transpose([1, 0, 2]).reshape(
+            [tgt_len, bsz, embed_dim])
+        attn_output = self.out_proj(attn_output)
+
+        if need_weights:
+            # average attention weights over heads
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz, self.num_heads, tgt_len, src_len])
+            attn_output_weights = attn_output_weights.sum(
+                axis=1) / self.num_heads
+        else:
+            attn_output_weights = None
+        return attn_output, attn_output_weights
+
+    def _in_proj_q(self, query):
+        query = query.transpose([1, 2, 0])
+        query = paddle.unsqueeze(query, axis=2)
+        res = self.conv1(query)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
+
+    def _in_proj_k(self, key):
+        key = key.transpose([1, 2, 0])
+        key = paddle.unsqueeze(key, axis=2)
+        res = self.conv2(key)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
+
+    def _in_proj_v(self, value):
+        value = value.transpose([1, 2, 0])  #(1, 2, 0)
+        value = paddle.unsqueeze(value, axis=2)
+        res = self.conv3(value)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
--- a/ppocr/modeling/heads/rec_nrtr_head.py
+++ b/ppocr/modeling/heads/rec_nrtr_head.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -24,18 +24,16 @@ __all__ = ['build_post_process']
 from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
-from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
+from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode, \
    TableLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess

-
 def build_post_process(config, global_config=None):
    support_dict = [
        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'TableLabelDecode',
-        'DistillationDBPostProcess'
+        'DistillationCTCLabelDecode', 'NRTRLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -156,6 +156,69 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
        return output


+class NRTRLabelDecode(BaseRecLabelDecode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 character_dict_path=None,
+                 character_type='EN_symbol',
+                 use_space_char=True,
+                 **kwargs):
+        super(NRTRLabelDecode, self).__init__(character_dict_path,
+                                             character_type, use_space_char)
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if preds.dtype == paddle.int64:
+            if isinstance(preds, paddle.Tensor):
+                preds = preds.numpy()
+            if preds[0][0]==2:
+                preds_idx = preds[:,1:]
+            else:
+                preds_idx = preds
+
+            text = self.decode(preds_idx)
+            if label is None:
+                return text
+            label = self.decode(label[:,1:])
+        else:
+            if isinstance(preds, paddle.Tensor):
+                preds = preds.numpy()
+            preds_idx = preds.argmax(axis=2)
+            preds_prob = preds.max(axis=2)
+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
+            if label is None:
+                return text
+            label = self.decode(label[:,1:])
+        return text, label
+
+    def add_special_char(self, dict_character):
+        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        return dict_character
+    
+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        result_list = []
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            conf_list = []
+            for idx in range(len(text_index[batch_idx])):
+                if text_index[batch_idx][idx] == 3: # end
+                    break
+                try:
+                    char_list.append(self.character[int(text_index[batch_idx][idx])])
+                except:
+                    continue
+                if text_prob is not None:
+                    conf_list.append(text_prob[batch_idx][idx])
+                else:
+                    conf_list.append(1)
+            text = ''.join(char_list)
+            result_list.append((text.lower(), np.mean(conf_list)))
+        return result_list
+
+
+
 class AttnLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

@@ -193,8 +256,7 @@ class AttnLabelDecode(BaseRecLabelDecode):
                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
                            batch_idx][idx]:
                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
+                char_list.append(self.character[int(text_index[batch_idx][idx])])
                if text_prob is not None:
                    conf_list.append(text_prob[batch_idx][idx])
                else:

--- a/ppstructure/README.md
+++ b/ppstructure/README.md
@@ -30,13 +30,13 @@ python3 -m pip install paddlepaddle-gpu==2.1.1 -i https://mirror.baidu.com/pypi/
 # CPU
 python3 -m pip install paddlepaddle==2.1.1 -i https://mirror.baidu.com/pypi/simple

-# For more，refer[Installation](https://www.paddlepaddle.org.cn/install/quick)。
 ```
+For more，refer [Installation](https://www.paddlepaddle.org.cn/install/quick) .

 - **(2) Install Layout-Parser**

 ```bash
-pip3 install -U premailer paddleocr https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
 ```

 ### 2.2 Install PaddleOCR（including PP-OCR and PP-Structure）
@@ -180,10 +180,10 @@ OCR and table recognition model

 |model name|description|model size|download|
 | --- | --- | --- | --- |
-|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
-|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
-|en_ppocr_mobile_v2.0_table_det|Text detection of English table scenes trained on PubLayNet dataset|4.7M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) |
-|en_ppocr_mobile_v2.0_table_rec|Text recognition of English table scene trained on PubLayNet dataset|6.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) |
-|en_ppocr_mobile_v2.0_table_structure|Table structure prediction of English table scene trained on PubLayNet dataset|18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
+|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
+|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
+|en_ppocr_mobile_v2.0_table_det|Text detection of English table scenes trained on PubLayNet dataset|4.7M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
+|en_ppocr_mobile_v2.0_table_rec|Text recognition of English table scene trained on PubLayNet dataset|6.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar)  [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
+|en_ppocr_mobile_v2.0_table_structure|Table structure prediction of English table scene trained on PubLayNet dataset|18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |

 If you need to use other models, you can download the model in [model_list](../doc/doc_en/models_list_en.md) or use your own trained model to configure it to the three fields of `det_model_dir`, `rec_model_dir`, `table_model_dir` .
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
@@ -30,13 +30,13 @@ python3 -m pip install paddlepaddle-gpu==2.1.1 -i https://mirror.baidu.com/pypi/
 # CPU安装
 python3 -m pip install paddlepaddle==2.1.1 -i https://mirror.baidu.com/pypi/simple

-# 更多需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
 ```
+更多需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。

 - **(2) 安装 Layout-Parser**

 ```bash
-pip3 install -U premailer paddleocr https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
 ```

 ### 2.2 安装PaddleOCR（包含PP-OCR和PP-Structure）
@@ -179,10 +179,10 @@ OCR和表格识别模型

 |模型名称|模型简介|推理模型大小|下载地址|
 | --- | --- | --- | --- |
-|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型，支持中英文、多语种文本检测|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
-|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型，支持中英文、数字识别|6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
-|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) |
-|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) |
-|en_ppocr_mobile_v2.0_table_structure|PubLayNet数据集训练的英文表格场景的表格结构预测|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
+|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型，支持中英文、多语种文本检测|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
+|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型，支持中英文、数字识别|6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
+|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
+|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
+|en_ppocr_mobile_v2.0_table_structure|PubLayNet数据集训练的英文表格场景的表格结构预测|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |

 如需要使用其他模型，可以在 [model_list](../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到`det_model_dir`,`rec_model_dir`,`table_model_dir`三个字段即可。
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
@@ -41,7 +41,7 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_tab
 wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
 cd ..
 # run
-python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --det_limit_side_len=736 --det_limit_type=min --output ../output/table
+python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table
 ```
 Note: The above model is trained on the PubLayNet dataset and only supports English scanning scenarios. If you need to identify other scenarios, you need to train the model yourself and replace the three fields `det_model_dir`, `rec_model_dir`, `table_model_dir`.


--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
@@ -43,7 +43,7 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_tab
 wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
 cd ..
 # 执行预测
-python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --det_limit_side_len=736 --det_limit_type=min --output ../output/table
+python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table
 ```
 运行完成后，每张图片的excel表格会保存到output字段指定的目录下


--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,7 @@ tqdm
 numpy
 visualdl
 python-Levenshtein
-opencv-contrib-python==4.4.0.46
\ No newline at end of file
+opencv-contrib-python==4.4.0.46
+lxml
+premailer
+openpyxl
\ No newline at end of file
--- a/tests/ocr_det_params.txt
+++ b/tests/ocr_det_params.txt
@@ -4,7 +4,7 @@ python:python3.7
 gpu_list:0|0,1
 Global.use_gpu:True|True
 Global.auto_cast:null
-Global.epoch_num:lite_train_infer=2|whole_train_infer=300
+Global.epoch_num:lite_train_infer=1|whole_train_infer=300
 Global.save_model_dir:./output/
 Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
 Global.pretrained_model:null
@@ -15,7 +15,7 @@ null:null
 trainer:norm_train|pact_train
 norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
 pact_train:deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o
-fpgm_train:null
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 distill_train:null
 null:null
 null:null
@@ -29,7 +29,7 @@ Global.save_inference_dir:./output/
 Global.pretrained_model:
 norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o 
 quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o 
-fpgm_export:deploy/slim/prune/export_prune_model.py
+fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/det_mv3_db.yml -o 
 distill_export:null
 export1:null
 export2:null
@@ -49,4 +49,19 @@ inference:tools/infer/predict_det.py
 --save_log_path:null
 --benchmark:True
 null:null
+===========================cpp_infer_params===========================
+use_opencv:True
+infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppocr det
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True

--- a/tests/ocr_det_server_params.txt
+++ b/tests/ocr_det_server_params.txt
+===========================train_params===========================
+model_name:ocr_server_det
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_infer=2|whole_train_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train|pact_train
+norm_train:tools/train.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=""
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c configs/det/det_mv3_db.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:tools/export_model.py -c configs/det/det_r50_vd_db.yml -o 
+quant_export:null 
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:./inference/ch_ppocr_server_v2.0_det_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+null:null
+
--- a/tests/prepare.sh
+++ b/tests/prepare.sh
 #!/bin/bash
 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer']
+# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
 MODE=$2

 dataline=$(cat ${FILENAME})
@@ -34,11 +34,14 @@ MODE=$2
 if [ ${MODE} = "lite_train_infer" ];then
    # pretrain lite train data
    wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
+    wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar
+    cd ./pretrain_models/ && tar xf det_mv3_db_v2.0_train.tar && cd ../
    rm -rf ./train_data/icdar2015
    rm -rf ./train_data/ic15_data
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar # todo change to bcebos
-
+    wget -nc -P ./deploy/slim/prune https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/sen.pickle
+    
    cd ./train_data/ && tar xf icdar2015_lite.tar && tar xf ic15_data.tar
    ln -s ./icdar2015_lite ./icdar2015
    cd ../
@@ -58,13 +61,17 @@ elif [ ${MODE} = "whole_infer" ];then
    cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar
    ln -s ./icdar2015_infer ./icdar2015
    cd ../
-else
+elif [ ${MODE} = "infer" ] || [ ${MODE} = "cpp_infer" ];then
    if [ ${model_name} = "ocr_det" ]; then
        eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
        rm -rf ./train_data/icdar2015
-        wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
        cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../
+    elif [ ${model_name} = "ocr_server_det" ]; then
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
+        cd ./inference && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../
    else 
        rm -rf ./train_data/ic15_data
        eval_model_name="ch_ppocr_mobile_v2.0_rec_infer"
@@ -74,3 +81,72 @@ else
    fi 
 fi

+if [ ${MODE} = "cpp_infer" ];then
+    cd deploy/cpp_infer
+    use_opencv=$(func_parser_value "${lines[52]}")
+    if [ ${use_opencv} = "True" ]; then
+        echo "################### build opencv ###################"
+        rm -rf 3.4.7.tar.gz opencv-3.4.7/
+        wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
+        tar -xf 3.4.7.tar.gz
+
+        cd opencv-3.4.7/
+        install_path=$(pwd)/opencv-3.4.7/opencv3
+
+        rm -rf build
+        mkdir build
+        cd build
+
+        cmake .. \
+            -DCMAKE_INSTALL_PREFIX=${install_path} \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DBUILD_SHARED_LIBS=OFF \
+            -DWITH_IPP=OFF \
+            -DBUILD_IPP_IW=OFF \
+            -DWITH_LAPACK=OFF \
+            -DWITH_EIGEN=OFF \
+            -DCMAKE_INSTALL_LIBDIR=lib64 \
+            -DWITH_ZLIB=ON \
+            -DBUILD_ZLIB=ON \
+            -DWITH_JPEG=ON \
+            -DBUILD_JPEG=ON \
+            -DWITH_PNG=ON \
+            -DBUILD_PNG=ON \
+            -DWITH_TIFF=ON \
+            -DBUILD_TIFF=ON
+
+        make -j
+        make install
+        cd ../
+        echo "################### build opencv finished ###################"
+    fi
+
+
+    echo "################### build PaddleOCR demo ####################"
+    if [ ${use_opencv} = "True" ]; then
+        OPENCV_DIR=$(pwd)/opencv-3.4.7/opencv3/
+    else
+        OPENCV_DIR=''
+    fi
+    LIB_DIR=$(pwd)/Paddle/build/paddle_inference_install_dir/
+    CUDA_LIB_DIR=$(dirname `find /usr -name libcudart.so`)
+    CUDNN_LIB_DIR=$(dirname `find /usr -name libcudnn.so`)
+    
+    BUILD_DIR=build
+    rm -rf ${BUILD_DIR}
+    mkdir ${BUILD_DIR}
+    cd ${BUILD_DIR}
+    cmake .. \
+        -DPADDLE_LIB=${LIB_DIR} \
+        -DWITH_MKL=ON \
+        -DWITH_GPU=OFF \
+        -DWITH_STATIC_LIB=OFF \
+        -DWITH_TENSORRT=OFF \
+        -DOPENCV_DIR=${OPENCV_DIR} \
+        -DCUDNN_LIB=${CUDNN_LIB_DIR} \
+        -DCUDA_LIB=${CUDA_LIB_DIR} \
+        -DTENSORRT_DIR=${TENSORRT_DIR} \
+
+    make -j
+    echo "################### build PaddleOCR demo finished ###################"
+fi
\ No newline at end of file
--- a/tests/readme.md
+++ b/tests/readme.md
+
+# 介绍
+
+test.sh和params.txt文件配合使用，完成OCR轻量检测和识别模型从训练到预测的流程测试。
+
+# 安装依赖
+- 安装PaddlePaddle >= 2.0
+- 安装PaddleOCR依赖
+    ```
+    pip3 install  -r ../requirements.txt
+    ```
+- 安装autolog
+    ```
+    git clone https://github.com/LDOUBLEV/AutoLog
+    cd AutoLog
+    pip3 install -r requirements.txt
+    python3 setup.py bdist_wheel
+    pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl
+    cd ../
+    ```
+
+# 目录介绍
+
+```bash
+tests/
+├── ocr_det_params.txt   # 测试OCR检测模型的参数配置文件
+├── ocr_rec_params.txt   # 测试OCR识别模型的参数配置文件
+└── prepare.sh           # 完成test.sh运行所需要的数据和模型下载
+└── test.sh              # 根据
+```
+
+# 使用方法
+test.sh包含四种运行模式，每种模式的运行数据不同，分别用于测试速度和精度，分别是：
+- 模式1 lite_train_infer，使用少量数据训练，用于快速验证训练到预测的走通流程，不验证精度和速度；
+```
+bash test/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer'
+bash tests/test.sh ./tests/ocr_det_params.txt 'lite_train_infer'
+```
+- 模式2 whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
+```
+bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_infer'
+bash tests/test.sh ./tests/ocr_det_params.txt 'whole_infer'
+```
+
+- 模式3 infer 不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度;
+```
+bash tests/prepare.sh ./tests/ocr_det_params.txt 'infer'
+用法1:
+bash tests/test.sh ./tests/ocr_det_params.txt 'infer'
+用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
+bash tests/test.sh ./tests/ocr_det_params.txt 'infer' '1'
+```
+
+模式4: whole_train_infer , CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度
+```
+bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_train_infer'
+bash tests/test.sh ./tests/ocr_det_params.txt 'whole_train_infer'
+```
--- a/tests/test.sh
+++ b/tests/test.sh
 #!/bin/bash
 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer']
+# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
 MODE=$2

 dataline=$(cat ${FILENAME})
@@ -145,6 +145,33 @@ benchmark_value=$(func_parser_value "${lines[49]}")
 infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")

+if [ ${MODE} = "cpp_infer" ]; then
+    # parser cpp inference model 
+    cpp_infer_model_dir_list=$(func_parser_value "${lines[53]}")
+    cpp_infer_is_quant=$(func_parser_value "${lines[54]}")
+    # parser cpp inference 
+    inference_cmd=$(func_parser_value "${lines[55]}")
+    cpp_use_gpu_key=$(func_parser_key "${lines[56]}")
+    cpp_use_gpu_list=$(func_parser_value "${lines[56]}")
+    cpp_use_mkldnn_key=$(func_parser_key "${lines[57]}")
+    cpp_use_mkldnn_list=$(func_parser_value "${lines[57]}")
+    cpp_cpu_threads_key=$(func_parser_key "${lines[58]}")
+    cpp_cpu_threads_list=$(func_parser_value "${lines[58]}")
+    cpp_batch_size_key=$(func_parser_key "${lines[59]}")
+    cpp_batch_size_list=$(func_parser_value "${lines[59]}")
+    cpp_use_trt_key=$(func_parser_key "${lines[60]}")
+    cpp_use_trt_list=$(func_parser_value "${lines[60]}")
+    cpp_precision_key=$(func_parser_key "${lines[61]}")
+    cpp_precision_list=$(func_parser_value "${lines[61]}")
+    cpp_infer_model_key=$(func_parser_key "${lines[62]}")
+    cpp_image_dir_key=$(func_parser_key "${lines[63]}")
+    cpp_infer_img_dir=$(func_parser_value "${lines[63]}")
+    cpp_save_log_key=$(func_parser_key "${lines[64]}")
+    cpp_benchmark_key=$(func_parser_key "${lines[65]}")
+    cpp_benchmark_value=$(func_parser_value "${lines[65]}")
+fi
+
+
 LOG_PATH="./tests/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results.log"
@@ -218,6 +245,71 @@ function func_inference(){
    done
 }

+function func_cpp_inference(){
+    IFS='|'
+    _script=$1
+    _model_dir=$2
+    _log_path=$3
+    _img_dir=$4
+    _flag_quant=$5
+    # inference 
+    for use_gpu in ${cpp_use_gpu_list[*]}; do
+        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
+            for use_mkldnn in ${cpp_use_mkldnn_list[*]}; do
+                if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
+                    continue
+                fi
+                for threads in ${cpp_cpu_threads_list[*]}; do
+                    for batch_size in ${cpp_batch_size_list[*]}; do
+                        _save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
+                        set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
+                        set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
+                        set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
+                        set_cpu_threads=$(func_set_params "${cpp_cpu_threads_key}" "${threads}")
+                        set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
+                        command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${cpp_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
+                        eval $command
+                        last_status=${PIPESTATUS[0]}
+                        eval "cat ${_save_log_path}"
+                        status_check $last_status "${command}" "${status_log}"
+                    done
+                done
+            done
+        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
+            for use_trt in ${cpp_use_trt_list[*]}; do
+                for precision in ${cpp_precision_list[*]}; do
+                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
+                        continue
+                    fi 
+                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
+                        continue
+                    fi
+                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
+                        continue
+                    fi
+                    for batch_size in ${cpp_batch_size_list[*]}; do
+                        _save_log_path="${_log_path}/cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+                        set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
+                        set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
+                        set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
+                        set_tensorrt=$(func_set_params "${cpp_use_trt_key}" "${use_trt}")
+                        set_precision=$(func_set_params "${cpp_precision_key}" "${precision}")
+                        set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
+                        command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
+                        eval $command
+                        last_status=${PIPESTATUS[0]}
+                        eval "cat ${_save_log_path}"
+                        status_check $last_status "${command}" "${status_log}"
+                        
+                    done
+                done
+            done
+        else
+            echo "Does not support hardware other than CPU and GPU Currently!"
+        fi
+    done
+}
+
 if [ ${MODE} = "infer" ]; then
    GPUID=$3
    if [ ${#GPUID} -le 0 ];then
@@ -252,6 +344,25 @@ if [ ${MODE} = "infer" ]; then
        Count=$(($Count + 1))
    done

+elif [ ${MODE} = "cpp_infer" ]; then
+    GPUID=$3
+    if [ ${#GPUID} -le 0 ];then
+        env=" "
+    else
+        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+    fi
+    # set CUDA_VISIBLE_DEVICES
+    eval $env
+    export Count=0
+    IFS="|"
+    infer_quant_flag=(${cpp_infer_is_quant})
+    for infer_model in ${cpp_infer_model_dir_list[*]}; do
+        #run inference
+        is_quant=${infer_quant_flag[Count]}
+        func_cpp_inference "${inference_cmd}" "${infer_model}" "${LOG_PATH}" "${cpp_infer_img_dir}" ${is_quant}
+        Count=$(($Count + 1))
+    done
+
 else
    IFS="|"
    export Count=0

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -101,6 +101,7 @@ class TextDetector(object):
        if args.benchmark:
            import auto_log
            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
            self.autolog = auto_log.AutoLogger(
                model_name="det",
                model_precision=args.precision,
@@ -110,7 +111,7 @@ class TextDetector(object):
                inference_config=self.config,
                pids=pid,
                process_name=None,
-                gpu_ids=0,
+                gpu_ids=gpu_id if args.use_gpu else None,
                time_keys=[
                    'preprocess_time', 'inference_time', 'postprocess_time'
                ],

--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -74,7 +74,7 @@ class TextE2E(object):

        self.preprocess_op = create_operators(pre_process_list)
        self.postprocess_op = build_post_process(postprocess_params)
-        self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor(
+        self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor(
            args, 'e2e', logger)  # paddle.jit.load(args.det_model_dir)
        # self.predictor.eval()


--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -68,6 +68,7 @@ class TextRecognizer(object):
        if args.benchmark:
            import auto_log
            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
            self.autolog = auto_log.AutoLogger(
                model_name="rec",
                model_precision=args.precision,
@@ -77,7 +78,7 @@ class TextRecognizer(object):
                inference_config=self.config,
                pids=pid,
                process_name=None,
-                gpu_ids=0 if args.use_gpu else None,
+                gpu_ids=gpu_id if args.use_gpu else None,
                time_keys=[
                    'preprocess_time', 'inference_time', 'postprocess_time'
                ],
@@ -87,8 +88,8 @@ class TextRecognizer(object):
    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape
        assert imgC == img.shape[2]
-        if self.character_type == "ch":
-            imgW = int((32 * max_wh_ratio))
+        max_wh_ratio = max(max_wh_ratio, imgW / imgH)
+        imgW = int((32 * max_wh_ratio))
        h, w = img.shape[:2]
        ratio = w / float(h)
        if math.ceil(imgH * ratio) > imgW:
@@ -277,7 +278,7 @@ def main(args):
    if args.warmup:
        img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8)
        for i in range(2):
-            res = text_recognizer([img])
+            res = text_recognizer([img] * int(args.rec_batch_num))

    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -159,6 +159,11 @@ def create_predictor(args, mode, logger):
        precision = inference.PrecisionType.Float32

    if args.use_gpu:
+        gpu_id = get_infer_gpuid()
+        if gpu_id is None:
+            raise ValueError(
+                "Not found GPU in current device. Please check your device or set args.use_gpu as False"
+            )
        config.enable_use_gpu(args.gpu_mem, 0)
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
@@ -280,6 +285,20 @@ def create_predictor(args, mode, logger):
    return predictor, input_tensor, output_tensors, config


+def get_infer_gpuid():
+    cmd = "nvidia-smi"
+    res = os.popen(cmd).readlines()
+    if len(res) == 0:
+        return None
+    cmd = "env | grep CUDA_VISIBLE_DEVICES"
+    env_cuda = os.popen(cmd).readlines()
+    if len(env_cuda) == 0:
+        return 0
+    else:
+        gpu_id = env_cuda[0].strip().split("=")[1]
+        return int(gpu_id[0])
+
+
 def draw_e2e_res(dt_boxes, strs, img_path):
    src_im = cv2.imread(img_path)
    for box, str in zip(dt_boxes, strs):

--- a/tools/program.py
+++ b/tools/program.py
@@ -186,9 +186,11 @@ def train(config,
    model.train()

    use_srn = config['Architecture']['algorithm'] == "SRN"
-    try: 
+    use_nrtr = config['Architecture']['algorithm'] == "NRTR"
+
+    try:
        model_type = config['Architecture']['model_type']
-    except: 
+    except:
        model_type = None

    if 'start_epoch' in best_model_dict:
@@ -213,7 +215,7 @@ def train(config,
            images = batch[0]
            if use_srn:
                model_average = True
-            if use_srn or model_type == 'table':
+            if use_srn or model_type == 'table' or use_nrtr:
                preds = model(images, data=batch[1:])
            else:
                preds = model(images)
@@ -398,7 +400,7 @@ def preprocess(is_train=False):
    alg = config['Architecture']['algorithm']
    assert alg in [
        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
-        'CLS', 'PGNet', 'Distillation', 'TableAttn'
+        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn'
    ]

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'