未验证 提交 191c9dee 编写于 作者: E Evezerest 提交者: GitHub

Merge branch 'dygraph' into dygraph

...@@ -152,16 +152,6 @@ class MainWindow(QMainWindow): ...@@ -152,16 +152,6 @@ class MainWindow(QMainWindow):
self.fileListWidget.setIconSize(QSize(25, 25)) self.fileListWidget.setIconSize(QSize(25, 25))
filelistLayout.addWidget(self.fileListWidget) filelistLayout.addWidget(self.fileListWidget)
self.AutoRecognition = QToolButton()
self.AutoRecognition.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.AutoRecognition.setIcon(newIcon('Auto'))
autoRecLayout = QHBoxLayout()
autoRecLayout.setContentsMargins(0, 0, 0, 0)
autoRecLayout.addWidget(self.AutoRecognition)
autoRecContainer = QWidget()
autoRecContainer.setLayout(autoRecLayout)
filelistLayout.addWidget(autoRecContainer)
fileListContainer = QWidget() fileListContainer = QWidget()
fileListContainer.setLayout(filelistLayout) fileListContainer.setLayout(filelistLayout)
self.fileListName = getStr('fileList') self.fileListName = getStr('fileList')
...@@ -172,17 +162,30 @@ class MainWindow(QMainWindow): ...@@ -172,17 +162,30 @@ class MainWindow(QMainWindow):
# ================== Key List ================== # ================== Key List ==================
if self.kie_mode: if self.kie_mode:
# self.keyList = QListWidget()
self.keyList = UniqueLabelQListWidget() self.keyList = UniqueLabelQListWidget()
# self.keyList.itemSelectionChanged.connect(self.keyListSelectionChanged)
# self.keyList.itemDoubleClicked.connect(self.editBox) # set key list height
# self.keyList.itemChanged.connect(self.keyListItemChanged) key_list_height = int(QApplication.desktop().height() // 4)
if key_list_height < 50:
key_list_height = 50
self.keyList.setMaximumHeight(key_list_height)
self.keyListDockName = getStr('keyListTitle') self.keyListDockName = getStr('keyListTitle')
self.keyListDock = QDockWidget(self.keyListDockName, self) self.keyListDock = QDockWidget(self.keyListDockName, self)
self.keyListDock.setWidget(self.keyList) self.keyListDock.setWidget(self.keyList)
self.keyListDock.setFeatures(QDockWidget.NoDockWidgetFeatures) self.keyListDock.setFeatures(QDockWidget.NoDockWidgetFeatures)
filelistLayout.addWidget(self.keyListDock) filelistLayout.addWidget(self.keyListDock)
self.AutoRecognition = QToolButton()
self.AutoRecognition.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.AutoRecognition.setIcon(newIcon('Auto'))
autoRecLayout = QHBoxLayout()
autoRecLayout.setContentsMargins(0, 0, 0, 0)
autoRecLayout.addWidget(self.AutoRecognition)
autoRecContainer = QWidget()
autoRecContainer.setLayout(autoRecLayout)
filelistLayout.addWidget(autoRecContainer)
# ================== Right Area ================== # ================== Right Area ==================
listLayout = QVBoxLayout() listLayout = QVBoxLayout()
listLayout.setContentsMargins(0, 0, 0, 0) listLayout.setContentsMargins(0, 0, 0, 0)
...@@ -431,8 +434,7 @@ class MainWindow(QMainWindow): ...@@ -431,8 +434,7 @@ class MainWindow(QMainWindow):
# ================== New Actions ================== # ================== New Actions ==================
edit = action(getStr('editLabel'), self.editLabel, edit = action(getStr('editLabel'), self.editLabel,
'Ctrl+E', 'edit', getStr('editLabelDetail'), 'Ctrl+E', 'edit', getStr('editLabelDetail'), enabled=False)
enabled=False)
AutoRec = action(getStr('autoRecognition'), self.autoRecognition, AutoRec = action(getStr('autoRecognition'), self.autoRecognition,
'', 'Auto', getStr('autoRecognition'), enabled=False) '', 'Auto', getStr('autoRecognition'), enabled=False)
...@@ -465,11 +467,10 @@ class MainWindow(QMainWindow): ...@@ -465,11 +467,10 @@ class MainWindow(QMainWindow):
'Ctrl+Z', "undo", getStr("undo"), enabled=False) 'Ctrl+Z', "undo", getStr("undo"), enabled=False)
change_cls = action(getStr("keyChange"), self.change_box_key, change_cls = action(getStr("keyChange"), self.change_box_key,
'Ctrl+B', "edit", getStr("keyChange"), enabled=False) 'Ctrl+X', "edit", getStr("keyChange"), enabled=False)
lock = action(getStr("lockBox"), self.lockSelectedShape, lock = action(getStr("lockBox"), self.lockSelectedShape,
None, "lock", getStr("lockBoxDetail"), None, "lock", getStr("lockBoxDetail"), enabled=False)
enabled=False)
self.editButton.setDefaultAction(edit) self.editButton.setDefaultAction(edit)
self.newButton.setDefaultAction(create) self.newButton.setDefaultAction(create)
...@@ -534,7 +535,8 @@ class MainWindow(QMainWindow): ...@@ -534,7 +535,8 @@ class MainWindow(QMainWindow):
fileMenuActions=(opendir, open_dataset_dir, saveLabel, resetAll, quit), fileMenuActions=(opendir, open_dataset_dir, saveLabel, resetAll, quit),
beginner=(), advanced=(), beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete, singleRere, None, undo, undoLastPoint, editMenu=(createpoly, edit, copy, delete, singleRere, None, undo, undoLastPoint,
None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption, lock), None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption, lock,
None, change_cls),
beginnerContext=( beginnerContext=(
create, edit, copy, delete, singleRere, rotateLeft, rotateRight, lock, change_cls), create, edit, copy, delete, singleRere, rotateLeft, rotateRight, lock, change_cls),
advancedContext=(createMode, editMode, edit, copy, advancedContext=(createMode, editMode, edit, copy,
...@@ -1105,7 +1107,9 @@ class MainWindow(QMainWindow): ...@@ -1105,7 +1107,9 @@ class MainWindow(QMainWindow):
shapes = [format_shape(shape) for shape in self.canvas.shapes if shape.line_color != DEFAULT_LOCK_COLOR] shapes = [format_shape(shape) for shape in self.canvas.shapes if shape.line_color != DEFAULT_LOCK_COLOR]
# Can add differrent annotation formats here # Can add differrent annotation formats here
for box in self.result_dic: for box in self.result_dic:
trans_dic = {"label": box[1][0], "points": box[0], "difficult": False, "key_cls": "None"} trans_dic = {"label": box[1][0], "points": box[0], "difficult": False}
if self.kie_mode:
trans_dic.update({"key_cls": "None"})
if trans_dic["label"] == "" and mode == 'Auto': if trans_dic["label"] == "" and mode == 'Auto':
continue continue
shapes.append(trans_dic) shapes.append(trans_dic)
...@@ -1113,8 +1117,10 @@ class MainWindow(QMainWindow): ...@@ -1113,8 +1117,10 @@ class MainWindow(QMainWindow):
try: try:
trans_dic = [] trans_dic = []
for box in shapes: for box in shapes:
trans_dic.append({"transcription": box['label'], "points": box['points'], trans_dict = {"transcription": box['label'], "points": box['points'], "difficult": box['difficult']}
"difficult": box['difficult'], "key_cls": box['key_cls']}) if self.kie_mode:
trans_dict.update({"key_cls": box['key_cls']})
trans_dic.append(trans_dict)
self.PPlabel[annotationFilePath] = trans_dic self.PPlabel[annotationFilePath] = trans_dic
if mode == 'Auto': if mode == 'Auto':
self.Cachelabel[annotationFilePath] = trans_dic self.Cachelabel[annotationFilePath] = trans_dic
...@@ -1424,15 +1430,17 @@ class MainWindow(QMainWindow): ...@@ -1424,15 +1430,17 @@ class MainWindow(QMainWindow):
# box['ratio'] of the shapes saved in lockedShapes contains the ratio of the # box['ratio'] of the shapes saved in lockedShapes contains the ratio of the
# four corner coordinates of the shapes to the height and width of the image # four corner coordinates of the shapes to the height and width of the image
for box in self.canvas.lockedShapes: for box in self.canvas.lockedShapes:
key_cls = None if not self.kie_mode else box['key_cls']
if self.canvas.isInTheSameImage: if self.canvas.isInTheSameImage:
shapes.append((box['transcription'], [[s[0] * width, s[1] * height] for s in box['ratio']], shapes.append((box['transcription'], [[s[0] * width, s[1] * height] for s in box['ratio']],
DEFAULT_LOCK_COLOR, box['key_cls'], box['difficult'])) DEFAULT_LOCK_COLOR, key_cls, box['difficult']))
else: else:
shapes.append(('锁定框:待检测', [[s[0] * width, s[1] * height] for s in box['ratio']], shapes.append(('锁定框:待检测', [[s[0] * width, s[1] * height] for s in box['ratio']],
DEFAULT_LOCK_COLOR, box['key_cls'], box['difficult'])) DEFAULT_LOCK_COLOR, key_cls, box['difficult']))
if imgidx in self.PPlabel.keys(): if imgidx in self.PPlabel.keys():
for box in self.PPlabel[imgidx]: for box in self.PPlabel[imgidx]:
shapes.append((box['transcription'], box['points'], None, box['key_cls'], box['difficult'])) key_cls = None if not self.kie_mode else box['key_cls']
shapes.append((box['transcription'], box['points'], None, key_cls, box['difficult']))
self.loadLabels(shapes) self.loadLabels(shapes)
self.canvas.verified = False self.canvas.verified = False
...@@ -1460,6 +1468,7 @@ class MainWindow(QMainWindow): ...@@ -1460,6 +1468,7 @@ class MainWindow(QMainWindow):
def adjustScale(self, initial=False): def adjustScale(self, initial=False):
value = self.scalers[self.FIT_WINDOW if initial else self.zoomMode]() value = self.scalers[self.FIT_WINDOW if initial else self.zoomMode]()
self.zoomWidget.setValue(int(100 * value)) self.zoomWidget.setValue(int(100 * value))
self.imageSlider.setValue(self.zoomWidget.value()) # set zoom slider value
def scaleFitWindow(self): def scaleFitWindow(self):
"""Figure out the size of the pixmap in order to fit the main widget.""" """Figure out the size of the pixmap in order to fit the main widget."""
...@@ -1600,7 +1609,6 @@ class MainWindow(QMainWindow): ...@@ -1600,7 +1609,6 @@ class MainWindow(QMainWindow):
else: else:
self.keyDialog.labelList.addItems(self.existed_key_cls_set) self.keyDialog.labelList.addItems(self.existed_key_cls_set)
def importDirImages(self, dirpath, isDelete=False): def importDirImages(self, dirpath, isDelete=False):
if not self.mayContinue() or not dirpath: if not self.mayContinue() or not dirpath:
return return
...@@ -2238,13 +2246,22 @@ class MainWindow(QMainWindow): ...@@ -2238,13 +2246,22 @@ class MainWindow(QMainWindow):
print('The program will automatically save once after confirming 5 images (default)') print('The program will automatically save once after confirming 5 images (default)')
def change_box_key(self): def change_box_key(self):
if not self.kie_mode:
return
key_text, _ = self.keyDialog.popUp(self.key_previous_text) key_text, _ = self.keyDialog.popUp(self.key_previous_text)
if key_text is None: if key_text is None:
return return
self.key_previous_text = key_text self.key_previous_text = key_text
for shape in self.canvas.selectedShapes: for shape in self.canvas.selectedShapes:
shape.key_cls = key_text shape.key_cls = key_text
if not self.keyList.findItemsByLabel(key_text):
item = self.keyList.createItemFromLabel(key_text)
self.keyList.addItem(item)
rgb = self._get_rgb_by_label(key_text, self.kie_mode)
self.keyList.setItemLabel(item, key_text, rgb)
self._update_shape_color(shape) self._update_shape_color(shape)
self.keyDialog.addLabelHistory(key_text)
def undoShapeEdit(self): def undoShapeEdit(self):
self.canvas.restoreShape() self.canvas.restoreShape()
...@@ -2288,9 +2305,10 @@ class MainWindow(QMainWindow): ...@@ -2288,9 +2305,10 @@ class MainWindow(QMainWindow):
shapes = [format_shape(shape) for shape in self.canvas.selectedShapes] shapes = [format_shape(shape) for shape in self.canvas.selectedShapes]
trans_dic = [] trans_dic = []
for box in shapes: for box in shapes:
trans_dic.append({"transcription": box['label'], "ratio": box['ratio'], trans_dict = {"transcription": box['label'], "ratio": box['ratio'], "difficult": box['difficult']}
"difficult": box['difficult'], if self.kie_mode:
"key_cls": "None" if "key_cls" not in box else box["key_cls"]}) trans_dict.update({"key_cls": box["key_cls"]})
trans_dic.append(trans_dict)
self.canvas.lockedShapes = trans_dic self.canvas.lockedShapes = trans_dic
self.actions.save.setEnabled(True) self.actions.save.setEnabled(True)
......
...@@ -9,7 +9,7 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w ...@@ -9,7 +9,7 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
### Recent Update ### Recent Update
- 2022.02:(by [PeterH0323](https://github.com/peterh0323) - 2022.02:(by [PeterH0323](https://github.com/peterh0323)
- Added KIE mode, for [detection + identification + keyword extraction] labeling. - Add KIE Mode by using `--kie`, for [detection + identification + keyword extraction] labeling.
- 2022.01:(by [PeterH0323](https://github.com/peterh0323) - 2022.01:(by [PeterH0323](https://github.com/peterh0323)
- Improve user experience: prompt for the number of files and labels, optimize interaction, and fix bugs such as only use CPU when inference - Improve user experience: prompt for the number of files and labels, optimize interaction, and fix bugs such as only use CPU when inference
- 2021.11.17: - 2021.11.17:
...@@ -54,7 +54,10 @@ PPOCRLabel can be started in two ways: whl package and Python script. The whl pa ...@@ -54,7 +54,10 @@ PPOCRLabel can be started in two ways: whl package and Python script. The whl pa
```bash ```bash
pip install PPOCRLabel # install pip install PPOCRLabel # install
PPOCRLabel # run
# Select label mode and run
PPOCRLabel # [Normal mode] for [detection + recognition] labeling
PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
> If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. Please try to download Shapely whl file using http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely. > If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. Please try to download Shapely whl file using http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely.
...@@ -67,13 +70,18 @@ PPOCRLabel # run ...@@ -67,13 +70,18 @@ PPOCRLabel # run
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install trash-cli pip3 install trash-cli
PPOCRLabel
# Select label mode and run
PPOCRLabel # [Normal mode] for [detection + recognition] labeling
PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
#### MacOS #### MacOS
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install opencv-contrib-python-headless==4.2.0.32 pip3 install opencv-contrib-python-headless==4.2.0.32
# Select label mode and run
PPOCRLabel # [Normal mode] for [detection + recognition] labeling PPOCRLabel # [Normal mode] for [detection + recognition] labeling
PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
...@@ -90,6 +98,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl ...@@ -90,6 +98,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl
```bash ```bash
cd ./PPOCRLabel # Switch to the PPOCRLabel directory cd ./PPOCRLabel # Switch to the PPOCRLabel directory
# Select label mode and run
python PPOCRLabel.py # [Normal mode] for [detection + recognition] labeling python PPOCRLabel.py # [Normal mode] for [detection + recognition] labeling
python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
...@@ -156,6 +166,7 @@ python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyw ...@@ -156,6 +166,7 @@ python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyw
| X | Rotate the box anti-clockwise | | X | Rotate the box anti-clockwise |
| C | Rotate the box clockwise | | C | Rotate the box clockwise |
| Ctrl + E | Edit label of the selected box | | Ctrl + E | Edit label of the selected box |
| Ctrl + X | Change key class of the box when enable `--kie` |
| Ctrl + R | Re-recognize the selected box | | Ctrl + R | Re-recognize the selected box |
| Ctrl + C | Copy and paste the selected box | | Ctrl + C | Copy and paste the selected box |
| Ctrl + Left Mouse Button | Multi select the label box | | Ctrl + Left Mouse Button | Multi select the label box |
......
...@@ -9,7 +9,7 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P ...@@ -9,7 +9,7 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
#### 近期更新 #### 近期更新
- 2022.02:(by [PeterH0323](https://github.com/peterh0323) - 2022.02:(by [PeterH0323](https://github.com/peterh0323)
- 新增:KIE 功能,用于打【检测+识别+关键字提取】的标签 - 新增:使用 `--kie` 进入 KIE 功能,用于打【检测+识别+关键字提取】的标签
- 2022.01:(by [PeterH0323](https://github.com/peterh0323) - 2022.01:(by [PeterH0323](https://github.com/peterh0323)
- 提升用户体验:新增文件与标记数目提示、优化交互、修复gpu使用等问题 - 提升用户体验:新增文件与标记数目提示、优化交互、修复gpu使用等问题
- 2021.11.17: - 2021.11.17:
...@@ -57,7 +57,10 @@ PPOCRLabel可通过whl包与Python脚本两种方式启动,whl包形式启动 ...@@ -57,7 +57,10 @@ PPOCRLabel可通过whl包与Python脚本两种方式启动,whl包形式启动
```bash ```bash
pip install PPOCRLabel # 安装 pip install PPOCRLabel # 安装
PPOCRLabel --lang ch # 运行
# 选择标签模式来启动
PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
> 注意:通过whl包安装PPOCRLabel会自动下载 `paddleocr` whl包,其中shapely依赖可能会出现 `[winRrror 126] 找不到指定模块的问题。` 的错误,建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载并安装 > 注意:通过whl包安装PPOCRLabel会自动下载 `paddleocr` whl包,其中shapely依赖可能会出现 `[winRrror 126] 找不到指定模块的问题。` 的错误,建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载并安装
##### Ubuntu Linux ##### Ubuntu Linux
...@@ -65,13 +68,18 @@ PPOCRLabel --lang ch # 运行 ...@@ -65,13 +68,18 @@ PPOCRLabel --lang ch # 运行
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install trash-cli pip3 install trash-cli
PPOCRLabel --lang ch
# 选择标签模式来启动
PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
##### MacOS ##### MacOS
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install opencv-contrib-python-headless==4.2.0.32 # 如果下载过慢请添加"-i https://mirror.baidu.com/pypi/simple" pip3 install opencv-contrib-python-headless==4.2.0.32 # 如果下载过慢请添加"-i https://mirror.baidu.com/pypi/simple"
# 选择标签模式来启动
PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签 PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签 PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
...@@ -92,6 +100,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu. ...@@ -92,6 +100,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu.
```bash ```bash
cd ./PPOCRLabel # 切换到PPOCRLabel目录 cd ./PPOCRLabel # 切换到PPOCRLabel目录
# 选择标签模式来启动
python PPOCRLabel.py --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签 python PPOCRLabel.py --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签 python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
...@@ -138,17 +148,18 @@ python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打 ...@@ -138,17 +148,18 @@ python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打
### 3.1 快捷键 ### 3.1 快捷键
| 快捷键 | 说明 | | 快捷键 | 说明 |
|------------------|----------------| |------------------|---------------------------------|
| Ctrl + shift + R | 对当前图片的所有标记重新识别 | | Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 | | W | 新建矩形框 |
| Q | 新建四点框 | | Q | 新建四点框 |
| X | 框逆时针旋转 | | X | 框逆时针旋转 |
| C | 框顺时针旋转 | | C | 框顺时针旋转 |
| Ctrl + E | 编辑所选框标签 | | Ctrl + E | 编辑所选框标签 |
| Ctrl + X | `--kie` 模式下,修改 Box 的关键字种类 |
| Ctrl + R | 重新识别所选标记 | | Ctrl + R | 重新识别所选标记 |
| Ctrl + C | 复制并粘贴选中的标记框 | | Ctrl + C | 复制并粘贴选中的标记框 |
| Ctrl + 鼠标左键 | 多选标记框 | | Ctrl + 鼠标左键 | 多选标记框 |
| Backspace | 删除所选框 | | Backspac | 删除所选框 |
| Ctrl + V | 确认本张图片标记 | | Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 | | Ctrl + Shift + d | 删除本张图片 |
| D | 下一张图片 | | D | 下一张图片 |
...@@ -157,6 +168,7 @@ python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打 ...@@ -157,6 +168,7 @@ python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打
| Ctrl-- | 放大 | | Ctrl-- | 放大 |
| ↑→↓← | 移动标记框 | | ↑→↓← | 移动标记框 |
### 3.2 内置模型 ### 3.2 内置模型
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。 - 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
......
...@@ -546,7 +546,7 @@ class Canvas(QWidget): ...@@ -546,7 +546,7 @@ class Canvas(QWidget):
# Give up if both fail. # Give up if both fail.
for shape in shapes: for shape in shapes:
point = shape[0] point = shape[0]
offset = QPointF(2.0, 2.0) offset = QPointF(5.0, 5.0)
self.calculateOffsets(shape, point) self.calculateOffsets(shape, point)
self.prevPoint = point self.prevPoint = point
if not self.boundedMoveShape(shape, point - offset): if not self.boundedMoveShape(shape, point - offset):
......
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
from PyQt5.QtCore import Qt from PyQt5.QtCore import Qt, QSize
from PyQt5 import QtWidgets from PyQt5 import QtWidgets
...@@ -40,6 +40,7 @@ class UniqueLabelQListWidget(EscapableQListWidget): ...@@ -40,6 +40,7 @@ class UniqueLabelQListWidget(EscapableQListWidget):
qlabel.setText('<font color="#{:02x}{:02x}{:02x}">●</font> {} '.format(*color, label)) qlabel.setText('<font color="#{:02x}{:02x}{:02x}">●</font> {} '.format(*color, label))
qlabel.setAlignment(Qt.AlignBottom) qlabel.setAlignment(Qt.AlignBottom)
item.setSizeHint(qlabel.sizeHint()) # item.setSizeHint(qlabel.sizeHint())
item.setSizeHint(QSize(25, 25))
self.setItemWidget(item, qlabel) self.setItemWidget(item, qlabel)
...@@ -32,7 +32,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -32,7 +32,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- PP-OCR系列高质量预训练模型,准确的识别效果 - PP-OCR系列高质量预训练模型,准确的识别效果
- 超轻量PP-OCRv2系列:检测(3.1M)+ 方向分类器(1.4M)+ 识别(8.5M)= 13.0M - 超轻量PP-OCRv2系列:检测(3.1M)+ 方向分类器(1.4M)+ 识别(8.5M)= 13.0M
- 超轻量PP-OCR mobile移动端系列:检测(3.0M)+方向分类器(1.4M)+ 识别(5.0M)= 9.4M - 超轻量PP-OCR mobile移动端系列:检测(3.0M)+方向分类器(1.4M)+ 识别(5.0M)= 9.4M
- 通用PPOCR server系列:检测(47.1M)+方向分类器(1.4M)+ 识别(94.9M)= 143.4M - 通用PP-OCR server系列:检测(47.1M)+方向分类器(1.4M)+ 识别(94.9M)= 143.4M
- 支持中英文数字组合识别、竖排文本识别、长文本识别 - 支持中英文数字组合识别、竖排文本识别、长文本识别
- 支持多语言识别:韩语、日语、德语、法语等约80种语言 - 支持多语言识别:韩语、日语、德语、法语等约80种语言
- PP-Structure文档结构化系统 - PP-Structure文档结构化系统
......
#!/usr/bin/env bash #!/usr/bin/env bash
set -xe
# 运行示例:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # 运行示例:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
# 参数说明 # 参数说明
function _set_params(){ function _set_params(){
...@@ -34,11 +33,13 @@ function _train(){ ...@@ -34,11 +33,13 @@ function _train(){
train_cmd="python tools/train.py "${train_cmd}"" train_cmd="python tools/train.py "${train_cmd}""
;; ;;
mp) mp)
rm -rf ./mylog
train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}" train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}"
;; ;;
*) echo "choose run_mode(sp or mp)"; exit 1; *) echo "choose run_mode(sp or mp)"; exit 1;
esac esac
# 以下不用修改 # 以下不用修改
echo ${train_cmd}
timeout 15m ${train_cmd} > ${log_file} 2>&1 timeout 15m ${train_cmd} > ${log_file} 2>&1
if [ $? -ne 0 ];then if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL" echo -e "${model_name}, FAIL"
......
...@@ -56,7 +56,7 @@ PostProcess: ...@@ -56,7 +56,7 @@ PostProcess:
thresh: 0 thresh: 0
box_thresh: 0.85 box_thresh: 0.85
min_area: 16 min_area: 16
box_type: box # 'box' or 'poly' box_type: quad # 'quad' or 'poly'
scale: 1 scale: 1
Metric: Metric:
......
Global:
use_gpu: true
epoch_num: 1500
log_smooth_window: 20
print_batch_step: 20
save_model_dir: ./output/det_r50_dcn_fce_ctw/
save_epoch_step: 100
# evaluation is run every 835 iterations
eval_batch_step: [0, 835]
cal_metric_during_train: False
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_en/img_10.jpg
save_res_path: ./output/det_fce/predicts_fce.txt
Architecture:
model_type: det
algorithm: FCE
Transform:
Backbone:
name: ResNet
layers: 50
dcn_stage: [False, True, True, True]
out_indices: [1,2,3]
Neck:
name: FCEFPN
out_channels: 256
has_extra_convs: False
extra_stage: 0
Head:
name: FCEHead
fourier_degree: 5
Loss:
name: FCELoss
fourier_degree: 5
num_sample: 50
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0001
regularizer:
name: 'L2'
factor: 0
PostProcess:
name: FCEPostProcess
scales: [8, 16, 32]
alpha: 1.0
beta: 1.0
fourier_degree: 5
box_type: 'poly'
Metric:
name: DetFCEMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/ctw1500/imgs/
label_file_list:
- ./train_data/ctw1500/imgs/training.txt
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
ignore_orientation: True
- DetLabelEncode: # Class handling label
- ColorJitter:
brightness: 0.142
saturation: 0.5
contrast: 0.5
- RandomScaling:
- RandomCropFlip:
crop_ratio: 0.5
- RandomCropPolyInstances:
crop_ratio: 0.8
min_side_ratio: 0.3
- RandomRotatePolyInstances:
rotate_ratio: 0.5
max_angle: 30
pad_with_fixed_color: False
- SquareResizePad:
target_size: 800
pad_ratio: 0.6
- IaaAugment:
augmenter_args:
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
- FCENetTargets:
fourier_degree: 5
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'p3_maps', 'p4_maps', 'p5_maps'] # dataloader will return list in this order
loader:
shuffle: True
drop_last: False
batch_size_per_card: 6
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/ctw1500/imgs/
label_file_list:
- ./train_data/ctw1500/imgs/test.txt
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
ignore_orientation: True
- DetLabelEncode: # Class handling label
- DetResizeForTest:
limit_type: 'min'
limit_side_len: 736
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- Pad:
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1 # must be 1
num_workers: 2
\ No newline at end of file
...@@ -55,7 +55,7 @@ PostProcess: ...@@ -55,7 +55,7 @@ PostProcess:
thresh: 0 thresh: 0
box_thresh: 0.85 box_thresh: 0.85
min_area: 16 min_area: 16
box_type: box # 'box' or 'poly' box_type: quad # 'quad' or 'poly'
scale: 1 scale: 1
Metric: Metric:
......
Global:
use_gpu: True
epoch_num: 8
log_smooth_window: 20
print_batch_step: 5
save_model_dir: ./output/rec/pren_new
save_epoch_step: 3
# evaluation is run every 2000 iterations after the 4000th iteration
eval_batch_step: [4000, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path:
max_text_length: &max_text_length 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_pren.txt
Optimizer:
name: Adadelta
lr:
name: Piecewise
decay_epochs: [2, 5, 7]
values: [0.5, 0.1, 0.01, 0.001]
Architecture:
model_type: rec
algorithm: PREN
in_channels: 3
Backbone:
name: EfficientNetb3_PREN
Neck:
name: PRENFPN
n_r: 5
d_model: 384
max_len: *max_text_length
dropout: 0.1
Head:
name: PRENHead
Loss:
name: PRENLoss
PostProcess:
name: PRENLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage:
img_mode: BGR
channel_first: False
- PRENLabelEncode:
- RecAug:
- PRENResizeImg:
image_shape: [64, 256] # h,w
- KeepKeys:
keep_keys: ['image', 'label']
loader:
shuffle: True
batch_size_per_card: 128
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage:
img_mode: BGR
channel_first: False
- PRENLabelEncode:
- PRENResizeImg:
image_shape: [64, 256] # h,w
- KeepKeys:
keep_keys: ['image', 'label']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 64
num_workers: 8
# 如何快速测试 - [Android Demo](#android-demo)
### 1. 安装最新版本的Android Studio - [1. 简介](#1-简介)
可以从 https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。 - [2. 近期更新](#2-近期更新)
- [3. 快速使用](#3-快速使用)
- [3.1 环境准备](#31-环境准备)
- [3.2 导入项目](#32-导入项目)
- [3.3 运行demo](#33-运行demo)
- [3.4 运行模式](#34-运行模式)
- [3.5 设置](#35-设置)
- [4 更多支持](#4-更多支持)
### 2. 按照NDK 20 以上版本 # Android Demo
Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编译成功。
如果您是初学者,可以用以下方式安装和测试NDK编译环境。 ## 1. 简介
点击 File -> New ->New Project, 新建 "Native C++" project 此为PaddleOCR的Android Demo,目前支持文本检测,文本方向分类器和文本识别模型的使用。使用 [PaddleLite v2.10](https://github.com/PaddlePaddle/Paddle-Lite/tree/release/v2.10) 进行开发。
## 2. 近期更新
* 2022.02.27
* 预测库更新到PaddleLite v2.10
* 支持6种运行模式:
* 检测+分类+识别
* 检测+识别
* 分类+识别
* 检测
* 识别
* 分类
## 3. 快速使用
### 3.1 环境准备
1. 在本地环境安装好 Android Studio 工具,详细安装方法请见[Android Stuido 官网](https://developer.android.com/studio)
2. 准备一部 Android 手机,并开启 USB 调试模式。开启方法: `手机设置 -> 查找开发者选项 -> 打开开发者选项和 USB 调试模式`
**注意**:如果您的 Android Studio 尚未配置 NDK ,请根据 Android Studio 用户指南中的[安装及配置 NDK 和 CMake ](https://developer.android.com/studio/projects/install-ndk)内容,预先配置好 NDK 。您可以选择最新的 NDK 版本,或者使用 Paddle Lite 预测库版本一样的 NDK
### 3.2 导入项目
### 3. 导入项目
点击 File->New->Import Project..., 然后跟着Android Studio的引导导入 点击 File->New->Import Project..., 然后跟着Android Studio的引导导入
导入完成后呈现如下界面
![](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/import_demo.jpg)
### 3.3 运行demo
将手机连接上电脑后,点击Android Studio工具栏中的运行按钮即可运行demo。在此过程中,手机会弹出"允许从 USB 安装软件权限"的弹窗,点击允许即可。
软件安转到手机上后会在手机主屏最后一页看到如下app
<div align="left">
<img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/install_finish.jpeg" width="400">
</div>
点击app图标即可启动app,启动后app主页如下
<div align="left">
<img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/main_page.jpg" width="400">
</div>
app主页中有四个按钮,一个下拉列表和一个菜单按钮,他们的功能分别为
* 运行模型:按照已选择的模式,运行对应的模型组合
* 拍照识别:唤起手机相机拍照并获取拍照的图像,拍照完成后需要点击运行模型进行识别
* 选取图片:唤起手机相册拍照选择图像,选择完成后需要点击运行模型进行识别
* 清空绘图:清空当前显示图像上绘制的文本框,以便进行下一次识别(每次识别使用的图像都是当前显示的图像)
* 下拉列表:进行运行模式的选择,目前包含6种运行模式,默认模式为**检测+分类+识别**详细说明见下一节。
* 菜单按钮:点击后会进入菜单界面,进行模型和内置图像有关设置
点击运行模型后,会按照所选择的模式运行对应的模型,**检测+分类+识别**模式下运行的模型结果如下所示:
<img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_det_cls_rec.jpg" width="400">
模型运行完成后,模型和运行状态显示区`STATUS`字段显示了当前模型的运行状态,这里显示为`run model successed`表明模型运行成功。
模型的运行结果显示在运行结果显示区,显示格式为
```text
序号:Det:(x1,y1)(x2,y2)(x3,y3)(x4,y4) Rec: 识别文本,识别置信度 Cls:分类类别,分类分时
```
### 3.4 运行模式
PaddleOCR demo共提供了6种运行模式,如下图
<div align="left">
<img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/select_mode.jpg" width="400">
</div>
每种模式的运行结果如下表所示
| 检测+分类+识别 | 检测+识别 | 分类+识别 |
|------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------|
| <img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_det_cls_rec.jpg" width="400"> | <img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_det_rec.jpg" width="400"> | <img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_cls_rec.jpg" width="400"> |
| 检测 | 识别 | 分类 |
|----------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------|
| <img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_det.jpg" width="400"> | <img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_rec.jpg" width="400"> | <img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/run_cls.jpg" width="400"> |
### 3.5 设置
设置界面如下
<div align="left">
<img src="https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/imgs/settings.jpg" width="400">
</div>
# 获得更多支持 在设置界面可以进行如下几项设定:
前往[端计算模型生成平台EasyEdge](https://ai.baidu.com/easyedge/app/open_source_demo?referrerUrl=paddlelite),获得更多开发支持: 1. 普通设置
* Enable custom settings: 选中状态下才能更改设置
* Model Path: 所运行的模型地址,使用默认值就好
* Label Path: 识别模型的字典
* Image Path: 进行识别的内置图像名
2. 模型运行态设置,此项设置更改后返回主界面时,会自动重新加载模型
* CPU Thread Num: 模型运行使用的CPU核心数量
* CPU Power Mode: 模型运行模式,大小核设定
3. 输入设置
* det long size: DB模型预处理时图像的长边长度,超过此长度resize到该值,短边进行等比例缩放,小于此长度不进行处理。
4. 输出设置
* Score Threshold: DB模型后处理box的阈值,低于此阈值的box进行过滤,不显示。
- Demo APP:可使用手机扫码安装,方便手机端快速体验文字识别 ## 4 更多支持
- SDK:模型被封装为适配不同芯片硬件和操作系统SDK,包括完善的接口,方便进行二次开发 1. 实时识别,更新预测库可参考 https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/ocr/android/app/cxx/ppocr_demo
2. 更多Paddle-Lite相关问题可前往[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) ,获得更多开发支持
...@@ -8,8 +8,8 @@ android { ...@@ -8,8 +8,8 @@ android {
applicationId "com.baidu.paddle.lite.demo.ocr" applicationId "com.baidu.paddle.lite.demo.ocr"
minSdkVersion 23 minSdkVersion 23
targetSdkVersion 29 targetSdkVersion 29
versionCode 1 versionCode 2
versionName "1.0" versionName "2.0"
testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
externalNativeBuild { externalNativeBuild {
cmake { cmake {
...@@ -17,11 +17,6 @@ android { ...@@ -17,11 +17,6 @@ android {
arguments '-DANDROID_PLATFORM=android-23', '-DANDROID_STL=c++_shared' ,"-DANDROID_ARM_NEON=TRUE" arguments '-DANDROID_PLATFORM=android-23', '-DANDROID_STL=c++_shared' ,"-DANDROID_ARM_NEON=TRUE"
} }
} }
ndk {
// abiFilters "arm64-v8a", "armeabi-v7a"
abiFilters "arm64-v8a", "armeabi-v7a"
ldLibs "jnigraphics"
}
} }
buildTypes { buildTypes {
release { release {
...@@ -48,7 +43,7 @@ dependencies { ...@@ -48,7 +43,7 @@ dependencies {
def archives = [ def archives = [
[ [
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/paddle_lite_libs_v2_9_0.tar.gz', 'src' : 'https://paddleocr.bj.bcebos.com/libs/paddle_lite_libs_v2_10.tar.gz',
'dest': 'PaddleLite' 'dest': 'PaddleLite'
], ],
[ [
...@@ -56,7 +51,7 @@ def archives = [ ...@@ -56,7 +51,7 @@ def archives = [
'dest': 'OpenCV' 'dest': 'OpenCV'
], ],
[ [
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ocr_v2_for_cpu.tar.gz', 'src' : 'https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2.tar.gz',
'dest' : 'src/main/assets/models' 'dest' : 'src/main/assets/models'
], ],
[ [
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
android:roundIcon="@mipmap/ic_launcher_round" android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true" android:supportsRtl="true"
android:theme="@style/AppTheme"> android:theme="@style/AppTheme">
<!-- to test MiniActivity, change this to com.baidu.paddle.lite.demo.ocr.MiniActivity -->
<activity android:name="com.baidu.paddle.lite.demo.ocr.MainActivity"> <activity android:name="com.baidu.paddle.lite.demo.ocr.MainActivity">
<intent-filter> <intent-filter>
<action android:name="android.intent.action.MAIN"/> <action android:name="android.intent.action.MAIN"/>
......
...@@ -13,7 +13,7 @@ static paddle::lite_api::PowerMode str_to_cpu_mode(const std::string &cpu_mode); ...@@ -13,7 +13,7 @@ static paddle::lite_api::PowerMode str_to_cpu_mode(const std::string &cpu_mode);
extern "C" JNIEXPORT jlong JNICALL extern "C" JNIEXPORT jlong JNICALL
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init( Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init(
JNIEnv *env, jobject thiz, jstring j_det_model_path, JNIEnv *env, jobject thiz, jstring j_det_model_path,
jstring j_rec_model_path, jstring j_cls_model_path, jint j_thread_num, jstring j_rec_model_path, jstring j_cls_model_path, jint j_use_opencl, jint j_thread_num,
jstring j_cpu_mode) { jstring j_cpu_mode) {
std::string det_model_path = jstring_to_cpp_string(env, j_det_model_path); std::string det_model_path = jstring_to_cpp_string(env, j_det_model_path);
std::string rec_model_path = jstring_to_cpp_string(env, j_rec_model_path); std::string rec_model_path = jstring_to_cpp_string(env, j_rec_model_path);
...@@ -21,6 +21,7 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init( ...@@ -21,6 +21,7 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init(
int thread_num = j_thread_num; int thread_num = j_thread_num;
std::string cpu_mode = jstring_to_cpp_string(env, j_cpu_mode); std::string cpu_mode = jstring_to_cpp_string(env, j_cpu_mode);
ppredictor::OCR_Config conf; ppredictor::OCR_Config conf;
conf.use_opencl = j_use_opencl;
conf.thread_num = thread_num; conf.thread_num = thread_num;
conf.mode = str_to_cpu_mode(cpu_mode); conf.mode = str_to_cpu_mode(cpu_mode);
ppredictor::OCR_PPredictor *orc_predictor = ppredictor::OCR_PPredictor *orc_predictor =
...@@ -57,32 +58,31 @@ str_to_cpu_mode(const std::string &cpu_mode) { ...@@ -57,32 +58,31 @@ str_to_cpu_mode(const std::string &cpu_mode) {
extern "C" JNIEXPORT jfloatArray JNICALL extern "C" JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward( Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward(
JNIEnv *env, jobject thiz, jlong java_pointer, jfloatArray buf, JNIEnv *env, jobject thiz, jlong java_pointer, jobject original_image,jint j_max_size_len, jint j_run_det, jint j_run_cls, jint j_run_rec) {
jfloatArray ddims, jobject original_image) {
LOGI("begin to run native forward"); LOGI("begin to run native forward");
if (java_pointer == 0) { if (java_pointer == 0) {
LOGE("JAVA pointer is NULL"); LOGE("JAVA pointer is NULL");
return cpp_array_to_jfloatarray(env, nullptr, 0); return cpp_array_to_jfloatarray(env, nullptr, 0);
} }
cv::Mat origin = bitmap_to_cv_mat(env, original_image); cv::Mat origin = bitmap_to_cv_mat(env, original_image);
if (origin.size == 0) { if (origin.size == 0) {
LOGE("origin bitmap cannot convert to CV Mat"); LOGE("origin bitmap cannot convert to CV Mat");
return cpp_array_to_jfloatarray(env, nullptr, 0); return cpp_array_to_jfloatarray(env, nullptr, 0);
} }
int max_size_len = j_max_size_len;
int run_det = j_run_det;
int run_cls = j_run_cls;
int run_rec = j_run_rec;
ppredictor::OCR_PPredictor *ppredictor = ppredictor::OCR_PPredictor *ppredictor =
(ppredictor::OCR_PPredictor *)java_pointer; (ppredictor::OCR_PPredictor *)java_pointer;
std::vector<float> dims_float_arr = jfloatarray_to_float_vector(env, ddims);
std::vector<int64_t> dims_arr; std::vector<int64_t> dims_arr;
dims_arr.resize(dims_float_arr.size());
std::copy(dims_float_arr.cbegin(), dims_float_arr.cend(), dims_arr.begin());
// 这里值有点大,就不调用jfloatarray_to_float_vector了
int64_t buf_len = (int64_t)env->GetArrayLength(buf);
jfloat *buf_data = env->GetFloatArrayElements(buf, JNI_FALSE);
float *data = (jfloat *)buf_data;
std::vector<ppredictor::OCRPredictResult> results = std::vector<ppredictor::OCRPredictResult> results =
ppredictor->infer_ocr(dims_arr, data, buf_len, NET_OCR, origin); ppredictor->infer_ocr(origin, max_size_len, run_det, run_cls, run_rec);
LOGI("infer_ocr finished with boxes %ld", results.size()); LOGI("infer_ocr finished with boxes %ld", results.size());
// 这里将std::vector<ppredictor::OCRPredictResult> 序列化成 // 这里将std::vector<ppredictor::OCRPredictResult> 序列化成
// float数组,传输到java层再反序列化 // float数组,传输到java层再反序列化
std::vector<float> float_arr; std::vector<float> float_arr;
...@@ -90,13 +90,18 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward( ...@@ -90,13 +90,18 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward(
float_arr.push_back(r.points.size()); float_arr.push_back(r.points.size());
float_arr.push_back(r.word_index.size()); float_arr.push_back(r.word_index.size());
float_arr.push_back(r.score); float_arr.push_back(r.score);
// add det point
for (const std::vector<int> &point : r.points) { for (const std::vector<int> &point : r.points) {
float_arr.push_back(point.at(0)); float_arr.push_back(point.at(0));
float_arr.push_back(point.at(1)); float_arr.push_back(point.at(1));
} }
// add rec word idx
for (int index : r.word_index) { for (int index : r.word_index) {
float_arr.push_back(index); float_arr.push_back(index);
} }
// add cls result
float_arr.push_back(r.cls_label);
float_arr.push_back(r.cls_score);
} }
return cpp_array_to_jfloatarray(env, float_arr.data(), float_arr.size()); return cpp_array_to_jfloatarray(env, float_arr.data(), float_arr.size());
} }
......
...@@ -17,15 +17,15 @@ int OCR_PPredictor::init(const std::string &det_model_content, ...@@ -17,15 +17,15 @@ int OCR_PPredictor::init(const std::string &det_model_content,
const std::string &rec_model_content, const std::string &rec_model_content,
const std::string &cls_model_content) { const std::string &cls_model_content) {
_det_predictor = std::unique_ptr<PPredictor>( _det_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR, _config.mode}); new PPredictor{_config.use_opencl,_config.thread_num, NET_OCR, _config.mode});
_det_predictor->init_nb(det_model_content); _det_predictor->init_nb(det_model_content);
_rec_predictor = std::unique_ptr<PPredictor>( _rec_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode}); new PPredictor{_config.use_opencl,_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_rec_predictor->init_nb(rec_model_content); _rec_predictor->init_nb(rec_model_content);
_cls_predictor = std::unique_ptr<PPredictor>( _cls_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode}); new PPredictor{_config.use_opencl,_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_cls_predictor->init_nb(cls_model_content); _cls_predictor->init_nb(cls_model_content);
return RETURN_OK; return RETURN_OK;
} }
...@@ -34,15 +34,16 @@ int OCR_PPredictor::init_from_file(const std::string &det_model_path, ...@@ -34,15 +34,16 @@ int OCR_PPredictor::init_from_file(const std::string &det_model_path,
const std::string &rec_model_path, const std::string &rec_model_path,
const std::string &cls_model_path) { const std::string &cls_model_path) {
_det_predictor = std::unique_ptr<PPredictor>( _det_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR, _config.mode}); new PPredictor{_config.use_opencl, _config.thread_num, NET_OCR, _config.mode});
_det_predictor->init_from_file(det_model_path); _det_predictor->init_from_file(det_model_path);
_rec_predictor = std::unique_ptr<PPredictor>( _rec_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode}); new PPredictor{_config.use_opencl,_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_rec_predictor->init_from_file(rec_model_path); _rec_predictor->init_from_file(rec_model_path);
_cls_predictor = std::unique_ptr<PPredictor>( _cls_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode}); new PPredictor{_config.use_opencl,_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_cls_predictor->init_from_file(cls_model_path); _cls_predictor->init_from_file(cls_model_path);
return RETURN_OK; return RETURN_OK;
} }
...@@ -77,33 +78,126 @@ visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes, ...@@ -77,33 +78,126 @@ visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes,
} }
std::vector<OCRPredictResult> std::vector<OCRPredictResult>
OCR_PPredictor::infer_ocr(const std::vector<int64_t> &dims, OCR_PPredictor::infer_ocr(cv::Mat &origin,int max_size_len, int run_det, int run_cls, int run_rec) {
const float *input_data, int input_len, int net_flag, LOGI("ocr cpp start *****************");
cv::Mat &origin) { LOGI("ocr cpp det: %d, cls: %d, rec: %d", run_det, run_cls, run_rec);
std::vector<OCRPredictResult> ocr_results;
if(run_det){
infer_det(origin, max_size_len, ocr_results);
}
if(run_rec){
if(ocr_results.size()==0){
OCRPredictResult res;
ocr_results.emplace_back(std::move(res));
}
for(int i = 0; i < ocr_results.size();i++) {
infer_rec(origin, run_cls, ocr_results[i]);
}
}else if(run_cls){
ClsPredictResult cls_res = infer_cls(origin);
OCRPredictResult res;
res.cls_score = cls_res.cls_score;
res.cls_label = cls_res.cls_label;
ocr_results.push_back(res);
}
LOGI("ocr cpp end *****************");
return ocr_results;
}
cv::Mat DetResizeImg(const cv::Mat img, int max_size_len,
std::vector<float> &ratio_hw) {
int w = img.cols;
int h = img.rows;
float ratio = 1.f;
int max_wh = w >= h ? w : h;
if (max_wh > max_size_len) {
if (h > w) {
ratio = static_cast<float>(max_size_len) / static_cast<float>(h);
} else {
ratio = static_cast<float>(max_size_len) / static_cast<float>(w);
}
}
int resize_h = static_cast<int>(float(h) * ratio);
int resize_w = static_cast<int>(float(w) * ratio);
if (resize_h % 32 == 0)
resize_h = resize_h;
else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32;
else
resize_h = (resize_h / 32 - 1) * 32;
if (resize_w % 32 == 0)
resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32;
else
resize_w = (resize_w / 32 - 1) * 32;
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_hw.push_back(static_cast<float>(resize_h) / static_cast<float>(h));
ratio_hw.push_back(static_cast<float>(resize_w) / static_cast<float>(w));
return resize_img;
}
void OCR_PPredictor::infer_det(cv::Mat &origin, int max_size_len, std::vector<OCRPredictResult> &ocr_results) {
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
PredictorInput input = _det_predictor->get_first_input(); PredictorInput input = _det_predictor->get_first_input();
input.set_dims(dims);
input.set_data(input_data, input_len); std::vector<float> ratio_hw;
cv::Mat input_image = DetResizeImg(origin, max_size_len, ratio_hw);
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
const float *dimg = reinterpret_cast<const float *>(input_image.data);
int input_size = input_image.rows * input_image.cols;
input.set_dims({1, 3, input_image.rows, input_image.cols});
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
scale);
LOGI("ocr cpp det shape %d,%d", input_image.rows,input_image.cols);
std::vector<PredictorOutput> results = _det_predictor->infer(); std::vector<PredictorOutput> results = _det_predictor->infer();
PredictorOutput &res = results.at(0); PredictorOutput &res = results.at(0);
std::vector<std::vector<std::vector<int>>> filtered_box = calc_filtered_boxes( std::vector<std::vector<std::vector<int>>> filtered_box = calc_filtered_boxes(
res.get_float_data(), res.get_size(), (int)dims[2], (int)dims[3], origin); res.get_float_data(), res.get_size(), input_image.rows, input_image.cols, origin);
LOGI("Filter_box size %ld", filtered_box.size()); LOGI("ocr cpp det Filter_box size %ld", filtered_box.size());
return infer_rec(filtered_box, origin);
for(int i = 0;i<filtered_box.size();i++){
LOGI("ocr cpp box %d,%d,%d,%d,%d,%d,%d,%d", filtered_box[i][0][0],filtered_box[i][0][1], filtered_box[i][1][0],filtered_box[i][1][1], filtered_box[i][2][0],filtered_box[i][2][1], filtered_box[i][3][0],filtered_box[i][3][1]);
OCRPredictResult res;
res.points = filtered_box[i];
ocr_results.push_back(res);
}
} }
std::vector<OCRPredictResult> OCR_PPredictor::infer_rec( void OCR_PPredictor::infer_rec(const cv::Mat &origin_img, int run_cls, OCRPredictResult& ocr_result) {
const std::vector<std::vector<std::vector<int>>> &boxes,
const cv::Mat &origin_img) {
std::vector<float> mean = {0.5f, 0.5f, 0.5f}; std::vector<float> mean = {0.5f, 0.5f, 0.5f};
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
std::vector<int64_t> dims = {1, 3, 0, 0}; std::vector<int64_t> dims = {1, 3, 0, 0};
std::vector<OCRPredictResult> ocr_results;
PredictorInput input = _rec_predictor->get_first_input(); PredictorInput input = _rec_predictor->get_first_input();
for (auto bp = boxes.crbegin(); bp != boxes.crend(); ++bp) {
const std::vector<std::vector<int>> &box = *bp; const std::vector<std::vector<int>> &box = ocr_result.points;
cv::Mat crop_img = get_rotate_crop_image(origin_img, box); cv::Mat crop_img;
crop_img = infer_cls(crop_img); if(box.size()>0){
crop_img = get_rotate_crop_image(origin_img, box);
}
else{
crop_img = origin_img;
}
if(run_cls){
ClsPredictResult cls_res = infer_cls(crop_img);
crop_img = cls_res.img;
ocr_result.cls_score = cls_res.cls_score;
ocr_result.cls_label = cls_res.cls_label;
}
float wh_ratio = float(crop_img.cols) / float(crop_img.rows); float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
cv::Mat input_image = crnn_resize_img(crop_img, wh_ratio); cv::Mat input_image = crnn_resize_img(crop_img, wh_ratio);
...@@ -122,8 +216,6 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec( ...@@ -122,8 +216,6 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
const float *predict_batch = results.at(0).get_float_data(); const float *predict_batch = results.at(0).get_float_data();
const std::vector<int64_t> predict_shape = results.at(0).get_shape(); const std::vector<int64_t> predict_shape = results.at(0).get_shape();
OCRPredictResult res;
// ctc decode // ctc decode
int argmax_idx; int argmax_idx;
int last_index = 0; int last_index = 0;
...@@ -140,27 +232,19 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec( ...@@ -140,27 +232,19 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
score += max_value; score += max_value;
count += 1; count += 1;
res.word_index.push_back(argmax_idx); ocr_result.word_index.push_back(argmax_idx);
} }
last_index = argmax_idx; last_index = argmax_idx;
} }
score /= count; score /= count;
if (res.word_index.empty()) { ocr_result.score = score;
continue; LOGI("ocr cpp rec word size %ld", count);
}
res.score = score;
res.points = box;
ocr_results.emplace_back(std::move(res));
}
LOGI("ocr_results finished %lu", ocr_results.size());
return ocr_results;
} }
cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) { ClsPredictResult OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
std::vector<float> mean = {0.5f, 0.5f, 0.5f}; std::vector<float> mean = {0.5f, 0.5f, 0.5f};
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
std::vector<int64_t> dims = {1, 3, 0, 0}; std::vector<int64_t> dims = {1, 3, 0, 0};
std::vector<OCRPredictResult> ocr_results;
PredictorInput input = _cls_predictor->get_first_input(); PredictorInput input = _cls_predictor->get_first_input();
...@@ -182,7 +266,7 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) { ...@@ -182,7 +266,7 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
float score = 0; float score = 0;
int label = 0; int label = 0;
for (int64_t i = 0; i < results.at(0).get_size(); i++) { for (int64_t i = 0; i < results.at(0).get_size(); i++) {
LOGI("output scores [%f]", scores[i]); LOGI("ocr cpp cls output scores [%f]", scores[i]);
if (scores[i] > score) { if (scores[i] > score) {
score = scores[i]; score = scores[i];
label = i; label = i;
...@@ -193,7 +277,12 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) { ...@@ -193,7 +277,12 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
if (label % 2 == 1 && score > thresh) { if (label % 2 == 1 && score > thresh) {
cv::rotate(srcimg, srcimg, 1); cv::rotate(srcimg, srcimg, 1);
} }
return srcimg; ClsPredictResult res;
res.cls_label = label;
res.cls_score = score;
res.img = srcimg;
LOGI("ocr cpp cls word cls %ld, %f", label, score);
return res;
} }
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
......
...@@ -15,6 +15,7 @@ namespace ppredictor { ...@@ -15,6 +15,7 @@ namespace ppredictor {
* Config * Config
*/ */
struct OCR_Config { struct OCR_Config {
int use_opencl = 0;
int thread_num = 4; // Thread num int thread_num = 4; // Thread num
paddle::lite_api::PowerMode mode = paddle::lite_api::PowerMode mode =
paddle::lite_api::LITE_POWER_HIGH; // PaddleLite Mode paddle::lite_api::LITE_POWER_HIGH; // PaddleLite Mode
...@@ -27,8 +28,15 @@ struct OCRPredictResult { ...@@ -27,8 +28,15 @@ struct OCRPredictResult {
std::vector<int> word_index; std::vector<int> word_index;
std::vector<std::vector<int>> points; std::vector<std::vector<int>> points;
float score; float score;
float cls_score;
int cls_label=-1;
}; };
struct ClsPredictResult {
float cls_score;
int cls_label=-1;
cv::Mat img;
};
/** /**
* OCR there are 2 models * OCR there are 2 models
* 1. First model(det),select polygones to show where are the texts * 1. First model(det),select polygones to show where are the texts
...@@ -62,8 +70,7 @@ public: ...@@ -62,8 +70,7 @@ public:
* @return * @return
*/ */
virtual std::vector<OCRPredictResult> virtual std::vector<OCRPredictResult>
infer_ocr(const std::vector<int64_t> &dims, const float *input_data, infer_ocr(cv::Mat &origin, int max_size_len, int run_det, int run_cls, int run_rec);
int input_len, int net_flag, cv::Mat &origin);
virtual NET_TYPE get_net_flag() const; virtual NET_TYPE get_net_flag() const;
...@@ -80,16 +87,17 @@ private: ...@@ -80,16 +87,17 @@ private:
calc_filtered_boxes(const float *pred, int pred_size, int output_height, calc_filtered_boxes(const float *pred, int pred_size, int output_height,
int output_width, const cv::Mat &origin); int output_width, const cv::Mat &origin);
void
infer_det(cv::Mat &origin, int max_side_len, std::vector<OCRPredictResult>& ocr_results);
/** /**
* infer for second model * infer for rec model
* *
* @param boxes * @param boxes
* @param origin * @param origin
* @return * @return
*/ */
std::vector<OCRPredictResult> void
infer_rec(const std::vector<std::vector<std::vector<int>>> &boxes, infer_rec(const cv::Mat &origin, int run_cls, OCRPredictResult& ocr_result);
const cv::Mat &origin);
/** /**
* infer for cls model * infer for cls model
...@@ -98,7 +106,7 @@ private: ...@@ -98,7 +106,7 @@ private:
* @param origin * @param origin
* @return * @return
*/ */
cv::Mat infer_cls(const cv::Mat &origin, float thresh = 0.9); ClsPredictResult infer_cls(const cv::Mat &origin, float thresh = 0.9);
/** /**
* Postprocess or sencod model to extract text * Postprocess or sencod model to extract text
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#include "common.h" #include "common.h"
namespace ppredictor { namespace ppredictor {
PPredictor::PPredictor(int thread_num, int net_flag, PPredictor::PPredictor(int use_opencl, int thread_num, int net_flag,
paddle::lite_api::PowerMode mode) paddle::lite_api::PowerMode mode)
: _thread_num(thread_num), _net_flag(net_flag), _mode(mode) {} : _use_opencl(use_opencl), _thread_num(thread_num), _net_flag(net_flag), _mode(mode) {}
int PPredictor::init_nb(const std::string &model_content) { int PPredictor::init_nb(const std::string &model_content) {
paddle::lite_api::MobileConfig config; paddle::lite_api::MobileConfig config;
...@@ -19,10 +19,40 @@ int PPredictor::init_from_file(const std::string &model_content) { ...@@ -19,10 +19,40 @@ int PPredictor::init_from_file(const std::string &model_content) {
} }
template <typename ConfigT> int PPredictor::_init(ConfigT &config) { template <typename ConfigT> int PPredictor::_init(ConfigT &config) {
bool is_opencl_backend_valid = paddle::lite_api::IsOpenCLBackendValid(/*check_fp16_valid = false*/);
if (is_opencl_backend_valid) {
if (_use_opencl != 0) {
// Make sure you have write permission of the binary path.
// We strongly recommend each model has a unique binary name.
const std::string bin_path = "/data/local/tmp/";
const std::string bin_name = "lite_opencl_kernel.bin";
config.set_opencl_binary_path_name(bin_path, bin_name);
// opencl tune option
// CL_TUNE_NONE: 0
// CL_TUNE_RAPID: 1
// CL_TUNE_NORMAL: 2
// CL_TUNE_EXHAUSTIVE: 3
const std::string tuned_path = "/data/local/tmp/";
const std::string tuned_name = "lite_opencl_tuned.bin";
config.set_opencl_tune(paddle::lite_api::CL_TUNE_NORMAL, tuned_path, tuned_name);
// opencl precision option
// CL_PRECISION_AUTO: 0, first fp16 if valid, default
// CL_PRECISION_FP32: 1, force fp32
// CL_PRECISION_FP16: 2, force fp16
config.set_opencl_precision(paddle::lite_api::CL_PRECISION_FP32);
LOGI("ocr cpp device: running on gpu.");
}
} else {
LOGI("ocr cpp device: running on cpu.");
// you can give backup cpu nb model instead
// config.set_model_from_file(cpu_nb_model_dir);
}
config.set_threads(_thread_num); config.set_threads(_thread_num);
config.set_power_mode(_mode); config.set_power_mode(_mode);
_predictor = paddle::lite_api::CreatePaddlePredictor(config); _predictor = paddle::lite_api::CreatePaddlePredictor(config);
LOGI("paddle instance created"); LOGI("ocr cpp paddle instance created");
return RETURN_OK; return RETURN_OK;
} }
...@@ -43,18 +73,18 @@ std::vector<PredictorInput> PPredictor::get_inputs(int num) { ...@@ -43,18 +73,18 @@ std::vector<PredictorInput> PPredictor::get_inputs(int num) {
PredictorInput PPredictor::get_first_input() { return get_input(0); } PredictorInput PPredictor::get_first_input() { return get_input(0); }
std::vector<PredictorOutput> PPredictor::infer() { std::vector<PredictorOutput> PPredictor::infer() {
LOGI("infer Run start %d", _net_flag); LOGI("ocr cpp infer Run start %d", _net_flag);
std::vector<PredictorOutput> results; std::vector<PredictorOutput> results;
if (!_is_input_get) { if (!_is_input_get) {
return results; return results;
} }
_predictor->Run(); _predictor->Run();
LOGI("infer Run end"); LOGI("ocr cpp infer Run end");
for (int i = 0; i < _predictor->GetOutputNames().size(); i++) { for (int i = 0; i < _predictor->GetOutputNames().size(); i++) {
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor = std::unique_ptr<const paddle::lite_api::Tensor> output_tensor =
_predictor->GetOutput(i); _predictor->GetOutput(i);
LOGI("output tensor[%d] size %ld", i, product(output_tensor->shape())); LOGI("ocr cpp output tensor[%d] size %ld", i, product(output_tensor->shape()));
PredictorOutput result{std::move(output_tensor), i, _net_flag}; PredictorOutput result{std::move(output_tensor), i, _net_flag};
results.emplace_back(std::move(result)); results.emplace_back(std::move(result));
} }
......
...@@ -22,7 +22,7 @@ public: ...@@ -22,7 +22,7 @@ public:
class PPredictor : public PPredictor_Interface { class PPredictor : public PPredictor_Interface {
public: public:
PPredictor( PPredictor(
int thread_num, int net_flag = 0, int use_opencl, int thread_num, int net_flag = 0,
paddle::lite_api::PowerMode mode = paddle::lite_api::LITE_POWER_HIGH); paddle::lite_api::PowerMode mode = paddle::lite_api::LITE_POWER_HIGH);
virtual ~PPredictor() {} virtual ~PPredictor() {}
...@@ -54,6 +54,7 @@ protected: ...@@ -54,6 +54,7 @@ protected:
template <typename ConfigT> int _init(ConfigT &config); template <typename ConfigT> int _init(ConfigT &config);
private: private:
int _use_opencl;
int _thread_num; int _thread_num;
paddle::lite_api::PowerMode _mode; paddle::lite_api::PowerMode _mode;
std::shared_ptr<paddle::lite_api::PaddlePredictor> _predictor; std::shared_ptr<paddle::lite_api::PaddlePredictor> _predictor;
......
...@@ -13,6 +13,7 @@ import android.graphics.BitmapFactory; ...@@ -13,6 +13,7 @@ import android.graphics.BitmapFactory;
import android.graphics.drawable.BitmapDrawable; import android.graphics.drawable.BitmapDrawable;
import android.media.ExifInterface; import android.media.ExifInterface;
import android.content.res.AssetManager; import android.content.res.AssetManager;
import android.media.FaceDetector;
import android.net.Uri; import android.net.Uri;
import android.os.Bundle; import android.os.Bundle;
import android.os.Environment; import android.os.Environment;
...@@ -27,7 +28,9 @@ import android.view.Menu; ...@@ -27,7 +28,9 @@ import android.view.Menu;
import android.view.MenuInflater; import android.view.MenuInflater;
import android.view.MenuItem; import android.view.MenuItem;
import android.view.View; import android.view.View;
import android.widget.CheckBox;
import android.widget.ImageView; import android.widget.ImageView;
import android.widget.Spinner;
import android.widget.TextView; import android.widget.TextView;
import android.widget.Toast; import android.widget.Toast;
...@@ -68,23 +71,24 @@ public class MainActivity extends AppCompatActivity { ...@@ -68,23 +71,24 @@ public class MainActivity extends AppCompatActivity {
protected ImageView ivInputImage; protected ImageView ivInputImage;
protected TextView tvOutputResult; protected TextView tvOutputResult;
protected TextView tvInferenceTime; protected TextView tvInferenceTime;
protected CheckBox cbOpencl;
protected Spinner spRunMode;
// Model settings of object detection // Model settings of ocr
protected String modelPath = ""; protected String modelPath = "";
protected String labelPath = ""; protected String labelPath = "";
protected String imagePath = ""; protected String imagePath = "";
protected int cpuThreadNum = 1; protected int cpuThreadNum = 1;
protected String cpuPowerMode = ""; protected String cpuPowerMode = "";
protected String inputColorFormat = ""; protected int detLongSize = 960;
protected long[] inputShape = new long[]{};
protected float[] inputMean = new float[]{};
protected float[] inputStd = new float[]{};
protected float scoreThreshold = 0.1f; protected float scoreThreshold = 0.1f;
private String currentPhotoPath; private String currentPhotoPath;
private AssetManager assetManager =null; private AssetManager assetManager = null;
protected Predictor predictor = new Predictor(); protected Predictor predictor = new Predictor();
private Bitmap cur_predict_image = null;
@Override @Override
protected void onCreate(Bundle savedInstanceState) { protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState); super.onCreate(savedInstanceState);
...@@ -98,10 +102,12 @@ public class MainActivity extends AppCompatActivity { ...@@ -98,10 +102,12 @@ public class MainActivity extends AppCompatActivity {
// Setup the UI components // Setup the UI components
tvInputSetting = findViewById(R.id.tv_input_setting); tvInputSetting = findViewById(R.id.tv_input_setting);
cbOpencl = findViewById(R.id.cb_opencl);
tvStatus = findViewById(R.id.tv_model_img_status); tvStatus = findViewById(R.id.tv_model_img_status);
ivInputImage = findViewById(R.id.iv_input_image); ivInputImage = findViewById(R.id.iv_input_image);
tvInferenceTime = findViewById(R.id.tv_inference_time); tvInferenceTime = findViewById(R.id.tv_inference_time);
tvOutputResult = findViewById(R.id.tv_output_result); tvOutputResult = findViewById(R.id.tv_output_result);
spRunMode = findViewById(R.id.sp_run_mode);
tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance()); tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance());
tvOutputResult.setMovementMethod(ScrollingMovementMethod.getInstance()); tvOutputResult.setMovementMethod(ScrollingMovementMethod.getInstance());
...@@ -111,26 +117,26 @@ public class MainActivity extends AppCompatActivity { ...@@ -111,26 +117,26 @@ public class MainActivity extends AppCompatActivity {
public void handleMessage(Message msg) { public void handleMessage(Message msg) {
switch (msg.what) { switch (msg.what) {
case RESPONSE_LOAD_MODEL_SUCCESSED: case RESPONSE_LOAD_MODEL_SUCCESSED:
if(pbLoadModel!=null && pbLoadModel.isShowing()){ if (pbLoadModel != null && pbLoadModel.isShowing()) {
pbLoadModel.dismiss(); pbLoadModel.dismiss();
} }
onLoadModelSuccessed(); onLoadModelSuccessed();
break; break;
case RESPONSE_LOAD_MODEL_FAILED: case RESPONSE_LOAD_MODEL_FAILED:
if(pbLoadModel!=null && pbLoadModel.isShowing()){ if (pbLoadModel != null && pbLoadModel.isShowing()) {
pbLoadModel.dismiss(); pbLoadModel.dismiss();
} }
Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show(); Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
onLoadModelFailed(); onLoadModelFailed();
break; break;
case RESPONSE_RUN_MODEL_SUCCESSED: case RESPONSE_RUN_MODEL_SUCCESSED:
if(pbRunModel!=null && pbRunModel.isShowing()){ if (pbRunModel != null && pbRunModel.isShowing()) {
pbRunModel.dismiss(); pbRunModel.dismiss();
} }
onRunModelSuccessed(); onRunModelSuccessed();
break; break;
case RESPONSE_RUN_MODEL_FAILED: case RESPONSE_RUN_MODEL_FAILED:
if(pbRunModel!=null && pbRunModel.isShowing()){ if (pbRunModel != null && pbRunModel.isShowing()) {
pbRunModel.dismiss(); pbRunModel.dismiss();
} }
Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show(); Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
...@@ -175,71 +181,47 @@ public class MainActivity extends AppCompatActivity { ...@@ -175,71 +181,47 @@ public class MainActivity extends AppCompatActivity {
super.onResume(); super.onResume();
SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this); SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
boolean settingsChanged = false; boolean settingsChanged = false;
boolean model_settingsChanged = false;
String model_path = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY), String model_path = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
getString(R.string.MODEL_PATH_DEFAULT)); getString(R.string.MODEL_PATH_DEFAULT));
String label_path = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY), String label_path = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY),
getString(R.string.LABEL_PATH_DEFAULT)); getString(R.string.LABEL_PATH_DEFAULT));
String image_path = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY), String image_path = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
getString(R.string.IMAGE_PATH_DEFAULT)); getString(R.string.IMAGE_PATH_DEFAULT));
settingsChanged |= !model_path.equalsIgnoreCase(modelPath); model_settingsChanged |= !model_path.equalsIgnoreCase(modelPath);
settingsChanged |= !label_path.equalsIgnoreCase(labelPath); settingsChanged |= !label_path.equalsIgnoreCase(labelPath);
settingsChanged |= !image_path.equalsIgnoreCase(imagePath); settingsChanged |= !image_path.equalsIgnoreCase(imagePath);
int cpu_thread_num = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY), int cpu_thread_num = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
getString(R.string.CPU_THREAD_NUM_DEFAULT))); getString(R.string.CPU_THREAD_NUM_DEFAULT)));
settingsChanged |= cpu_thread_num != cpuThreadNum; model_settingsChanged |= cpu_thread_num != cpuThreadNum;
String cpu_power_mode = String cpu_power_mode =
sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY), sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
getString(R.string.CPU_POWER_MODE_DEFAULT)); getString(R.string.CPU_POWER_MODE_DEFAULT));
settingsChanged |= !cpu_power_mode.equalsIgnoreCase(cpuPowerMode); model_settingsChanged |= !cpu_power_mode.equalsIgnoreCase(cpuPowerMode);
String input_color_format =
sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY), int det_long_size = Integer.parseInt(sharedPreferences.getString(getString(R.string.DET_LONG_SIZE_KEY),
getString(R.string.INPUT_COLOR_FORMAT_DEFAULT)); getString(R.string.DET_LONG_SIZE_DEFAULT)));
settingsChanged |= !input_color_format.equalsIgnoreCase(inputColorFormat); settingsChanged |= det_long_size != detLongSize;
long[] input_shape =
Utils.parseLongsFromString(sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY),
getString(R.string.INPUT_SHAPE_DEFAULT)), ",");
float[] input_mean =
Utils.parseFloatsFromString(sharedPreferences.getString(getString(R.string.INPUT_MEAN_KEY),
getString(R.string.INPUT_MEAN_DEFAULT)), ",");
float[] input_std =
Utils.parseFloatsFromString(sharedPreferences.getString(getString(R.string.INPUT_STD_KEY)
, getString(R.string.INPUT_STD_DEFAULT)), ",");
settingsChanged |= input_shape.length != inputShape.length;
settingsChanged |= input_mean.length != inputMean.length;
settingsChanged |= input_std.length != inputStd.length;
if (!settingsChanged) {
for (int i = 0; i < input_shape.length; i++) {
settingsChanged |= input_shape[i] != inputShape[i];
}
for (int i = 0; i < input_mean.length; i++) {
settingsChanged |= input_mean[i] != inputMean[i];
}
for (int i = 0; i < input_std.length; i++) {
settingsChanged |= input_std[i] != inputStd[i];
}
}
float score_threshold = float score_threshold =
Float.parseFloat(sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY), Float.parseFloat(sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY),
getString(R.string.SCORE_THRESHOLD_DEFAULT))); getString(R.string.SCORE_THRESHOLD_DEFAULT)));
settingsChanged |= scoreThreshold != score_threshold; settingsChanged |= scoreThreshold != score_threshold;
if (settingsChanged) { if (settingsChanged) {
modelPath = model_path;
labelPath = label_path; labelPath = label_path;
imagePath = image_path; imagePath = image_path;
detLongSize = det_long_size;
scoreThreshold = score_threshold;
set_img();
}
if (model_settingsChanged) {
modelPath = model_path;
cpuThreadNum = cpu_thread_num; cpuThreadNum = cpu_thread_num;
cpuPowerMode = cpu_power_mode; cpuPowerMode = cpu_power_mode;
inputColorFormat = input_color_format;
inputShape = input_shape;
inputMean = input_mean;
inputStd = input_std;
scoreThreshold = score_threshold;
// Update UI // Update UI
tvInputSetting.setText("Model: " + modelPath.substring(modelPath.lastIndexOf("/") + 1) + "\n" + "CPU" + tvInputSetting.setText("Model: " + modelPath.substring(modelPath.lastIndexOf("/") + 1) + "\nOPENCL: " + cbOpencl.isChecked() + "\nCPU Thread Num: " + cpuThreadNum + "\nCPU Power Mode: " + cpuPowerMode);
" Thread Num: " + Integer.toString(cpuThreadNum) + "\n" + "CPU Power Mode: " + cpuPowerMode);
tvInputSetting.scrollTo(0, 0); tvInputSetting.scrollTo(0, 0);
// Reload model if configure has been changed // Reload model if configure has been changed
// loadModel(); loadModel();
set_img();
} }
} }
...@@ -254,20 +236,28 @@ public class MainActivity extends AppCompatActivity { ...@@ -254,20 +236,28 @@ public class MainActivity extends AppCompatActivity {
} }
public boolean onLoadModel() { public boolean onLoadModel() {
return predictor.init(MainActivity.this, modelPath, labelPath, cpuThreadNum, if (predictor.isLoaded()) {
predictor.releaseModel();
}
return predictor.init(MainActivity.this, modelPath, labelPath, cbOpencl.isChecked() ? 1 : 0, cpuThreadNum,
cpuPowerMode, cpuPowerMode,
inputColorFormat, detLongSize, scoreThreshold);
inputShape, inputMean,
inputStd, scoreThreshold);
} }
public boolean onRunModel() { public boolean onRunModel() {
return predictor.isLoaded() && predictor.runModel(); String run_mode = spRunMode.getSelectedItem().toString();
int run_det = run_mode.contains("检测") ? 1 : 0;
int run_cls = run_mode.contains("分类") ? 1 : 0;
int run_rec = run_mode.contains("识别") ? 1 : 0;
return predictor.isLoaded() && predictor.runModel(run_det, run_cls, run_rec);
} }
public void onLoadModelSuccessed() { public void onLoadModelSuccessed() {
// Load test image from path and run model // Load test image from path and run model
tvInputSetting.setText("Model: " + modelPath.substring(modelPath.lastIndexOf("/") + 1) + "\nOPENCL: " + cbOpencl.isChecked() + "\nCPU Thread Num: " + cpuThreadNum + "\nCPU Power Mode: " + cpuPowerMode);
tvInputSetting.scrollTo(0, 0);
tvStatus.setText("STATUS: load model successed"); tvStatus.setText("STATUS: load model successed");
} }
public void onLoadModelFailed() { public void onLoadModelFailed() {
...@@ -290,20 +280,13 @@ public class MainActivity extends AppCompatActivity { ...@@ -290,20 +280,13 @@ public class MainActivity extends AppCompatActivity {
tvStatus.setText("STATUS: run model failed"); tvStatus.setText("STATUS: run model failed");
} }
public void onImageChanged(Bitmap image) {
// Rerun model if users pick test image from gallery or camera
if (image != null && predictor.isLoaded()) {
predictor.setInputImage(image);
runModel();
}
}
public void set_img() { public void set_img() {
// Load test image from path and run model // Load test image from path and run model
try { try {
assetManager= getAssets(); assetManager = getAssets();
InputStream in=assetManager.open(imagePath); InputStream in = assetManager.open(imagePath);
Bitmap bmp=BitmapFactory.decodeStream(in); Bitmap bmp = BitmapFactory.decodeStream(in);
cur_predict_image = bmp;
ivInputImage.setImageBitmap(bmp); ivInputImage.setImageBitmap(bmp);
} catch (IOException e) { } catch (IOException e) {
Toast.makeText(MainActivity.this, "Load image failed!", Toast.LENGTH_SHORT).show(); Toast.makeText(MainActivity.this, "Load image failed!", Toast.LENGTH_SHORT).show();
...@@ -430,7 +413,7 @@ public class MainActivity extends AppCompatActivity { ...@@ -430,7 +413,7 @@ public class MainActivity extends AppCompatActivity {
Cursor cursor = managedQuery(uri, proj, null, null, null); Cursor cursor = managedQuery(uri, proj, null, null, null);
cursor.moveToFirst(); cursor.moveToFirst();
if (image != null) { if (image != null) {
// onImageChanged(image); cur_predict_image = image;
ivInputImage.setImageBitmap(image); ivInputImage.setImageBitmap(image);
} }
} catch (IOException e) { } catch (IOException e) {
...@@ -451,7 +434,7 @@ public class MainActivity extends AppCompatActivity { ...@@ -451,7 +434,7 @@ public class MainActivity extends AppCompatActivity {
Bitmap image = BitmapFactory.decodeFile(currentPhotoPath); Bitmap image = BitmapFactory.decodeFile(currentPhotoPath);
image = Utils.rotateBitmap(image, orientation); image = Utils.rotateBitmap(image, orientation);
if (image != null) { if (image != null) {
// onImageChanged(image); cur_predict_image = image;
ivInputImage.setImageBitmap(image); ivInputImage.setImageBitmap(image);
} }
} else { } else {
...@@ -464,28 +447,28 @@ public class MainActivity extends AppCompatActivity { ...@@ -464,28 +447,28 @@ public class MainActivity extends AppCompatActivity {
} }
} }
public void btn_load_model_click(View view) { public void btn_reset_img_click(View view) {
if (predictor.isLoaded()){ ivInputImage.setImageBitmap(cur_predict_image);
tvStatus.setText("STATUS: model has been loaded"); }
}else{
public void cb_opencl_click(View view) {
tvStatus.setText("STATUS: load model ......"); tvStatus.setText("STATUS: load model ......");
loadModel(); loadModel();
} }
}
public void btn_run_model_click(View view) { public void btn_run_model_click(View view) {
Bitmap image =((BitmapDrawable)ivInputImage.getDrawable()).getBitmap(); Bitmap image = ((BitmapDrawable) ivInputImage.getDrawable()).getBitmap();
if(image == null) { if (image == null) {
tvStatus.setText("STATUS: image is not exists"); tvStatus.setText("STATUS: image is not exists");
} } else if (!predictor.isLoaded()) {
else if (!predictor.isLoaded()){
tvStatus.setText("STATUS: model is not loaded"); tvStatus.setText("STATUS: model is not loaded");
}else{ } else {
tvStatus.setText("STATUS: run model ...... "); tvStatus.setText("STATUS: run model ...... ");
predictor.setInputImage(image); predictor.setInputImage(image);
runModel(); runModel();
} }
} }
public void btn_choice_img_click(View view) { public void btn_choice_img_click(View view) {
if (requestAllPermissions()) { if (requestAllPermissions()) {
openGallery(); openGallery();
...@@ -506,4 +489,32 @@ public class MainActivity extends AppCompatActivity { ...@@ -506,4 +489,32 @@ public class MainActivity extends AppCompatActivity {
worker.quit(); worker.quit();
super.onDestroy(); super.onDestroy();
} }
public int get_run_mode() {
String run_mode = spRunMode.getSelectedItem().toString();
int mode;
switch (run_mode) {
case "检测+分类+识别":
mode = 1;
break;
case "检测+识别":
mode = 2;
break;
case "识别+分类":
mode = 3;
break;
case "检测":
mode = 4;
break;
case "识别":
mode = 5;
break;
case "分类":
mode = 6;
break;
default:
mode = 1;
}
return mode;
}
} }
package com.baidu.paddle.lite.demo.ocr;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Build;
import android.os.Bundle;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.Message;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.ImageView;
import android.widget.TextView;
import android.widget.Toast;
import androidx.appcompat.app.AppCompatActivity;
import java.io.IOException;
import java.io.InputStream;
public class MiniActivity extends AppCompatActivity {
public static final int REQUEST_LOAD_MODEL = 0;
public static final int REQUEST_RUN_MODEL = 1;
public static final int REQUEST_UNLOAD_MODEL = 2;
public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
public static final int RESPONSE_RUN_MODEL_FAILED = 3;
private static final String TAG = "MiniActivity";
protected Handler receiver = null; // Receive messages from worker thread
protected Handler sender = null; // Send command to worker thread
protected HandlerThread worker = null; // Worker thread to load&run model
protected volatile Predictor predictor = null;
private String assetModelDirPath = "models/ocr_v2_for_cpu";
private String assetlabelFilePath = "labels/ppocr_keys_v1.txt";
private Button button;
private ImageView imageView; // image result
private TextView textView; // text result
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_mini);
Log.i(TAG, "SHOW in Logcat");
// Prepare the worker thread for mode loading and inference
worker = new HandlerThread("Predictor Worker");
worker.start();
sender = new Handler(worker.getLooper()) {
public void handleMessage(Message msg) {
switch (msg.what) {
case REQUEST_LOAD_MODEL:
// Load model and reload test image
if (!onLoadModel()) {
runOnUiThread(new Runnable() {
@Override
public void run() {
Toast.makeText(MiniActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
}
});
}
break;
case REQUEST_RUN_MODEL:
// Run model if model is loaded
final boolean isSuccessed = onRunModel();
runOnUiThread(new Runnable() {
@Override
public void run() {
if (isSuccessed){
onRunModelSuccessed();
}else{
Toast.makeText(MiniActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
}
}
});
break;
}
}
};
sender.sendEmptyMessage(REQUEST_LOAD_MODEL); // corresponding to REQUEST_LOAD_MODEL, to call onLoadModel()
imageView = findViewById(R.id.imageView);
textView = findViewById(R.id.sample_text);
button = findViewById(R.id.button);
button.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
sender.sendEmptyMessage(REQUEST_RUN_MODEL);
}
});
}
@Override
protected void onDestroy() {
onUnloadModel();
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR2) {
worker.quitSafely();
} else {
worker.quit();
}
super.onDestroy();
}
/**
* call in onCreate, model init
*
* @return
*/
private boolean onLoadModel() {
if (predictor == null) {
predictor = new Predictor();
}
return predictor.init(this, assetModelDirPath, assetlabelFilePath);
}
/**
* init engine
* call in onCreate
*
* @return
*/
private boolean onRunModel() {
try {
String assetImagePath = "images/0.jpg";
InputStream imageStream = getAssets().open(assetImagePath);
Bitmap image = BitmapFactory.decodeStream(imageStream);
// Input is Bitmap
predictor.setInputImage(image);
return predictor.isLoaded() && predictor.runModel();
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
private void onRunModelSuccessed() {
Log.i(TAG, "onRunModelSuccessed");
textView.setText(predictor.outputResult);
imageView.setImageBitmap(predictor.outputImage);
}
private void onUnloadModel() {
if (predictor != null) {
predictor.releaseModel();
}
}
}
...@@ -29,22 +29,22 @@ public class OCRPredictorNative { ...@@ -29,22 +29,22 @@ public class OCRPredictorNative {
public OCRPredictorNative(Config config) { public OCRPredictorNative(Config config) {
this.config = config; this.config = config;
loadLibrary(); loadLibrary();
nativePointer = init(config.detModelFilename, config.recModelFilename,config.clsModelFilename, nativePointer = init(config.detModelFilename, config.recModelFilename, config.clsModelFilename, config.useOpencl,
config.cpuThreadNum, config.cpuPower); config.cpuThreadNum, config.cpuPower);
Log.i("OCRPredictorNative", "load success " + nativePointer); Log.i("OCRPredictorNative", "load success " + nativePointer);
} }
public ArrayList<OcrResultModel> runImage(float[] inputData, int width, int height, int channels, Bitmap originalImage) { public ArrayList<OcrResultModel> runImage(Bitmap originalImage, int max_size_len, int run_det, int run_cls, int run_rec) {
Log.i("OCRPredictorNative", "begin to run image " + inputData.length + " " + width + " " + height); Log.i("OCRPredictorNative", "begin to run image ");
float[] dims = new float[]{1, channels, height, width}; float[] rawResults = forward(nativePointer, originalImage, max_size_len, run_det, run_cls, run_rec);
float[] rawResults = forward(nativePointer, inputData, dims, originalImage);
ArrayList<OcrResultModel> results = postprocess(rawResults); ArrayList<OcrResultModel> results = postprocess(rawResults);
return results; return results;
} }
public static class Config { public static class Config {
public int useOpencl;
public int cpuThreadNum; public int cpuThreadNum;
public String cpuPower; public String cpuPower;
public String detModelFilename; public String detModelFilename;
...@@ -53,16 +53,16 @@ public class OCRPredictorNative { ...@@ -53,16 +53,16 @@ public class OCRPredictorNative {
} }
public void destory(){ public void destory() {
if (nativePointer > 0) { if (nativePointer > 0) {
release(nativePointer); release(nativePointer);
nativePointer = 0; nativePointer = 0;
} }
} }
protected native long init(String detModelPath, String recModelPath,String clsModelPath, int threadNum, String cpuMode); protected native long init(String detModelPath, String recModelPath, String clsModelPath, int useOpencl, int threadNum, String cpuMode);
protected native float[] forward(long pointer, float[] buf, float[] ddims, Bitmap originalImage); protected native float[] forward(long pointer, Bitmap originalImage,int max_size_len, int run_det, int run_cls, int run_rec);
protected native void release(long pointer); protected native void release(long pointer);
...@@ -73,9 +73,9 @@ public class OCRPredictorNative { ...@@ -73,9 +73,9 @@ public class OCRPredictorNative {
while (begin < raw.length) { while (begin < raw.length) {
int point_num = Math.round(raw[begin]); int point_num = Math.round(raw[begin]);
int word_num = Math.round(raw[begin + 1]); int word_num = Math.round(raw[begin + 1]);
OcrResultModel model = parse(raw, begin + 2, point_num, word_num); OcrResultModel res = parse(raw, begin + 2, point_num, word_num);
begin += 2 + 1 + point_num * 2 + word_num; begin += 2 + 1 + point_num * 2 + word_num + 2;
results.add(model); results.add(res);
} }
return results; return results;
...@@ -83,19 +83,22 @@ public class OCRPredictorNative { ...@@ -83,19 +83,22 @@ public class OCRPredictorNative {
private OcrResultModel parse(float[] raw, int begin, int pointNum, int wordNum) { private OcrResultModel parse(float[] raw, int begin, int pointNum, int wordNum) {
int current = begin; int current = begin;
OcrResultModel model = new OcrResultModel(); OcrResultModel res = new OcrResultModel();
model.setConfidence(raw[current]); res.setConfidence(raw[current]);
current++; current++;
for (int i = 0; i < pointNum; i++) { for (int i = 0; i < pointNum; i++) {
model.addPoints(Math.round(raw[current + i * 2]), Math.round(raw[current + i * 2 + 1])); res.addPoints(Math.round(raw[current + i * 2]), Math.round(raw[current + i * 2 + 1]));
} }
current += (pointNum * 2); current += (pointNum * 2);
for (int i = 0; i < wordNum; i++) { for (int i = 0; i < wordNum; i++) {
int index = Math.round(raw[current + i]); int index = Math.round(raw[current + i]);
model.addWordIndex(index); res.addWordIndex(index);
} }
current += wordNum;
res.setClsIdx(raw[current]);
res.setClsConfidence(raw[current + 1]);
Log.i("OCRPredictorNative", "word finished " + wordNum); Log.i("OCRPredictorNative", "word finished " + wordNum);
return model; return res;
} }
......
...@@ -10,6 +10,9 @@ public class OcrResultModel { ...@@ -10,6 +10,9 @@ public class OcrResultModel {
private List<Integer> wordIndex; private List<Integer> wordIndex;
private String label; private String label;
private float confidence; private float confidence;
private float cls_idx;
private String cls_label;
private float cls_confidence;
public OcrResultModel() { public OcrResultModel() {
super(); super();
...@@ -49,4 +52,28 @@ public class OcrResultModel { ...@@ -49,4 +52,28 @@ public class OcrResultModel {
public void setConfidence(float confidence) { public void setConfidence(float confidence) {
this.confidence = confidence; this.confidence = confidence;
} }
public float getClsIdx() {
return cls_idx;
}
public void setClsIdx(float idx) {
this.cls_idx = idx;
}
public String getClsLabel() {
return cls_label;
}
public void setClsLabel(String label) {
this.cls_label = label;
}
public float getClsConfidence() {
return cls_confidence;
}
public void setClsConfidence(float confidence) {
this.cls_confidence = confidence;
}
} }
...@@ -31,23 +31,19 @@ public class Predictor { ...@@ -31,23 +31,19 @@ public class Predictor {
protected float inferenceTime = 0; protected float inferenceTime = 0;
// Only for object detection // Only for object detection
protected Vector<String> wordLabels = new Vector<String>(); protected Vector<String> wordLabels = new Vector<String>();
protected String inputColorFormat = "BGR"; protected int detLongSize = 960;
protected long[] inputShape = new long[]{1, 3, 960};
protected float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
protected float[] inputStd = new float[]{1.0f / 0.229f, 1.0f / 0.224f, 1.0f / 0.225f};
protected float scoreThreshold = 0.1f; protected float scoreThreshold = 0.1f;
protected Bitmap inputImage = null; protected Bitmap inputImage = null;
protected Bitmap outputImage = null; protected Bitmap outputImage = null;
protected volatile String outputResult = ""; protected volatile String outputResult = "";
protected float preprocessTime = 0;
protected float postprocessTime = 0; protected float postprocessTime = 0;
public Predictor() { public Predictor() {
} }
public boolean init(Context appCtx, String modelPath, String labelPath) { public boolean init(Context appCtx, String modelPath, String labelPath, int useOpencl, int cpuThreadNum, String cpuPowerMode) {
isLoaded = loadModel(appCtx, modelPath, cpuThreadNum, cpuPowerMode); isLoaded = loadModel(appCtx, modelPath, useOpencl, cpuThreadNum, cpuPowerMode);
if (!isLoaded) { if (!isLoaded) {
return false; return false;
} }
...@@ -56,49 +52,18 @@ public class Predictor { ...@@ -56,49 +52,18 @@ public class Predictor {
} }
public boolean init(Context appCtx, String modelPath, String labelPath, int cpuThreadNum, String cpuPowerMode, public boolean init(Context appCtx, String modelPath, String labelPath, int useOpencl, int cpuThreadNum, String cpuPowerMode,
String inputColorFormat, int detLongSize, float scoreThreshold) {
long[] inputShape, float[] inputMean, boolean isLoaded = init(appCtx, modelPath, labelPath, useOpencl, cpuThreadNum, cpuPowerMode);
float[] inputStd, float scoreThreshold) {
if (inputShape.length != 3) {
Log.e(TAG, "Size of input shape should be: 3");
return false;
}
if (inputMean.length != inputShape[1]) {
Log.e(TAG, "Size of input mean should be: " + Long.toString(inputShape[1]));
return false;
}
if (inputStd.length != inputShape[1]) {
Log.e(TAG, "Size of input std should be: " + Long.toString(inputShape[1]));
return false;
}
if (inputShape[0] != 1) {
Log.e(TAG, "Only one batch is supported in the image classification demo, you can use any batch size in " +
"your Apps!");
return false;
}
if (inputShape[1] != 1 && inputShape[1] != 3) {
Log.e(TAG, "Only one/three channels are supported in the image classification demo, you can use any " +
"channel size in your Apps!");
return false;
}
if (!inputColorFormat.equalsIgnoreCase("BGR")) {
Log.e(TAG, "Only BGR color format is supported.");
return false;
}
boolean isLoaded = init(appCtx, modelPath, labelPath);
if (!isLoaded) { if (!isLoaded) {
return false; return false;
} }
this.inputColorFormat = inputColorFormat; this.detLongSize = detLongSize;
this.inputShape = inputShape;
this.inputMean = inputMean;
this.inputStd = inputStd;
this.scoreThreshold = scoreThreshold; this.scoreThreshold = scoreThreshold;
return true; return true;
} }
protected boolean loadModel(Context appCtx, String modelPath, int cpuThreadNum, String cpuPowerMode) { protected boolean loadModel(Context appCtx, String modelPath, int useOpencl, int cpuThreadNum, String cpuPowerMode) {
// Release model if exists // Release model if exists
releaseModel(); releaseModel();
...@@ -118,12 +83,13 @@ public class Predictor { ...@@ -118,12 +83,13 @@ public class Predictor {
} }
OCRPredictorNative.Config config = new OCRPredictorNative.Config(); OCRPredictorNative.Config config = new OCRPredictorNative.Config();
config.useOpencl = useOpencl;
config.cpuThreadNum = cpuThreadNum; config.cpuThreadNum = cpuThreadNum;
config.detModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_det_opt.nb";
config.recModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_rec_opt.nb";
config.clsModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_cls_opt.nb";
Log.e("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
config.cpuPower = cpuPowerMode; config.cpuPower = cpuPowerMode;
config.detModelFilename = realPath + File.separator + "det_db.nb";
config.recModelFilename = realPath + File.separator + "rec_crnn.nb";
config.clsModelFilename = realPath + File.separator + "cls.nb";
Log.i("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
paddlePredictor = new OCRPredictorNative(config); paddlePredictor = new OCRPredictorNative(config);
this.cpuThreadNum = cpuThreadNum; this.cpuThreadNum = cpuThreadNum;
...@@ -170,82 +136,29 @@ public class Predictor { ...@@ -170,82 +136,29 @@ public class Predictor {
} }
public boolean runModel() { public boolean runModel(int run_det, int run_cls, int run_rec) {
if (inputImage == null || !isLoaded()) { if (inputImage == null || !isLoaded()) {
return false; return false;
} }
// Pre-process image, and feed input tensor with pre-processed data
Bitmap scaleImage = Utils.resizeWithStep(inputImage, Long.valueOf(inputShape[2]).intValue(), 32);
Date start = new Date();
int channels = (int) inputShape[1];
int width = scaleImage.getWidth();
int height = scaleImage.getHeight();
float[] inputData = new float[channels * width * height];
if (channels == 3) {
int[] channelIdx = null;
if (inputColorFormat.equalsIgnoreCase("RGB")) {
channelIdx = new int[]{0, 1, 2};
} else if (inputColorFormat.equalsIgnoreCase("BGR")) {
channelIdx = new int[]{2, 1, 0};
} else {
Log.i(TAG, "Unknown color format " + inputColorFormat + ", only RGB and BGR color format is " +
"supported!");
return false;
}
int[] channelStride = new int[]{width * height, width * height * 2};
int[] pixels=new int[width*height];
scaleImage.getPixels(pixels,0,scaleImage.getWidth(),0,0,scaleImage.getWidth(),scaleImage.getHeight());
for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
(float) blue(color) / 255.0f};
inputData[i] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
inputData[i + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
inputData[i+ channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
}
} else if (channels == 1) {
int[] pixels=new int[width*height];
scaleImage.getPixels(pixels,0,scaleImage.getWidth(),0,0,scaleImage.getWidth(),scaleImage.getHeight());
for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
inputData[i] = (gray - inputMean[0]) / inputStd[0];
}
} else {
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
"supported!");
return false;
}
float[] pixels = inputData;
Log.i(TAG, "pixels " + pixels[0] + " " + pixels[1] + " " + pixels[2] + " " + pixels[3]
+ " " + pixels[pixels.length / 2] + " " + pixels[pixels.length / 2 + 1] + " " + pixels[pixels.length - 2] + " " + pixels[pixels.length - 1]);
Date end = new Date();
preprocessTime = (float) (end.getTime() - start.getTime());
// Warm up // Warm up
for (int i = 0; i < warmupIterNum; i++) { for (int i = 0; i < warmupIterNum; i++) {
paddlePredictor.runImage(inputData, width, height, channels, inputImage); paddlePredictor.runImage(inputImage, detLongSize, run_det, run_cls, run_rec);
} }
warmupIterNum = 0; // do not need warm warmupIterNum = 0; // do not need warm
// Run inference // Run inference
start = new Date(); Date start = new Date();
ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputData, width, height, channels, inputImage); ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputImage, detLongSize, run_det, run_cls, run_rec);
end = new Date(); Date end = new Date();
inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum; inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum;
results = postprocess(results); results = postprocess(results);
Log.i(TAG, "[stat] Preprocess Time: " + preprocessTime Log.i(TAG, "[stat] Inference Time: " + inferenceTime + " ;Box Size " + results.size());
+ " ; Inference Time: " + inferenceTime + " ;Box Size " + results.size());
drawResults(results); drawResults(results);
return true; return true;
} }
public boolean isLoaded() { public boolean isLoaded() {
return paddlePredictor != null && isLoaded; return paddlePredictor != null && isLoaded;
} }
...@@ -282,10 +195,6 @@ public class Predictor { ...@@ -282,10 +195,6 @@ public class Predictor {
return outputResult; return outputResult;
} }
public float preprocessTime() {
return preprocessTime;
}
public float postprocessTime() { public float postprocessTime() {
return postprocessTime; return postprocessTime;
} }
...@@ -310,6 +219,7 @@ public class Predictor { ...@@ -310,6 +219,7 @@ public class Predictor {
} }
} }
r.setLabel(word.toString()); r.setLabel(word.toString());
r.setClsLabel(r.getClsIdx() == 1 ? "180" : "0");
} }
return results; return results;
} }
...@@ -319,14 +229,22 @@ public class Predictor { ...@@ -319,14 +229,22 @@ public class Predictor {
for (int i = 0; i < results.size(); i++) { for (int i = 0; i < results.size(); i++) {
OcrResultModel result = results.get(i); OcrResultModel result = results.get(i);
StringBuilder sb = new StringBuilder(""); StringBuilder sb = new StringBuilder("");
sb.append(result.getLabel()); if(result.getPoints().size()>0){
sb.append(" ").append(result.getConfidence()); sb.append("Det: ");
sb.append("; Points: ");
for (Point p : result.getPoints()) { for (Point p : result.getPoints()) {
sb.append("(").append(p.x).append(",").append(p.y).append(") "); sb.append("(").append(p.x).append(",").append(p.y).append(") ");
} }
}
if(result.getLabel().length() > 0){
sb.append("\n Rec: ").append(result.getLabel());
sb.append(",").append(result.getConfidence());
}
if(result.getClsIdx()!=-1){
sb.append(" Cls: ").append(result.getClsLabel());
sb.append(",").append(result.getClsConfidence());
}
Log.i(TAG, sb.toString()); // show LOG in Logcat panel Log.i(TAG, sb.toString()); // show LOG in Logcat panel
outputResultSb.append(i + 1).append(": ").append(result.getLabel()).append("\n"); outputResultSb.append(i + 1).append(": ").append(sb.toString()).append("\n");
} }
outputResult = outputResultSb.toString(); outputResult = outputResultSb.toString();
outputImage = inputImage; outputImage = inputImage;
...@@ -344,6 +262,9 @@ public class Predictor { ...@@ -344,6 +262,9 @@ public class Predictor {
for (OcrResultModel result : results) { for (OcrResultModel result : results) {
Path path = new Path(); Path path = new Path();
List<Point> points = result.getPoints(); List<Point> points = result.getPoints();
if(points.size()==0){
continue;
}
path.moveTo(points.get(0).x, points.get(0).y); path.moveTo(points.get(0).x, points.get(0).y);
for (int i = points.size() - 1; i >= 0; i--) { for (int i = points.size() - 1; i >= 0; i--) {
Point p = points.get(i); Point p = points.get(i);
......
...@@ -20,16 +20,13 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -20,16 +20,13 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
ListPreference etImagePath = null; ListPreference etImagePath = null;
ListPreference lpCPUThreadNum = null; ListPreference lpCPUThreadNum = null;
ListPreference lpCPUPowerMode = null; ListPreference lpCPUPowerMode = null;
ListPreference lpInputColorFormat = null; EditTextPreference etDetLongSize = null;
EditTextPreference etInputShape = null;
EditTextPreference etInputMean = null;
EditTextPreference etInputStd = null;
EditTextPreference etScoreThreshold = null; EditTextPreference etScoreThreshold = null;
List<String> preInstalledModelPaths = null; List<String> preInstalledModelPaths = null;
List<String> preInstalledLabelPaths = null; List<String> preInstalledLabelPaths = null;
List<String> preInstalledImagePaths = null; List<String> preInstalledImagePaths = null;
List<String> preInstalledInputShapes = null; List<String> preInstalledDetLongSizes = null;
List<String> preInstalledCPUThreadNums = null; List<String> preInstalledCPUThreadNums = null;
List<String> preInstalledCPUPowerModes = null; List<String> preInstalledCPUPowerModes = null;
List<String> preInstalledInputColorFormats = null; List<String> preInstalledInputColorFormats = null;
...@@ -50,7 +47,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -50,7 +47,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
preInstalledModelPaths = new ArrayList<String>(); preInstalledModelPaths = new ArrayList<String>();
preInstalledLabelPaths = new ArrayList<String>(); preInstalledLabelPaths = new ArrayList<String>();
preInstalledImagePaths = new ArrayList<String>(); preInstalledImagePaths = new ArrayList<String>();
preInstalledInputShapes = new ArrayList<String>(); preInstalledDetLongSizes = new ArrayList<String>();
preInstalledCPUThreadNums = new ArrayList<String>(); preInstalledCPUThreadNums = new ArrayList<String>();
preInstalledCPUPowerModes = new ArrayList<String>(); preInstalledCPUPowerModes = new ArrayList<String>();
preInstalledInputColorFormats = new ArrayList<String>(); preInstalledInputColorFormats = new ArrayList<String>();
...@@ -63,10 +60,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -63,10 +60,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
preInstalledImagePaths.add(getString(R.string.IMAGE_PATH_DEFAULT)); preInstalledImagePaths.add(getString(R.string.IMAGE_PATH_DEFAULT));
preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT)); preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT));
preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT)); preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT));
preInstalledInputColorFormats.add(getString(R.string.INPUT_COLOR_FORMAT_DEFAULT)); preInstalledDetLongSizes.add(getString(R.string.DET_LONG_SIZE_DEFAULT));
preInstalledInputShapes.add(getString(R.string.INPUT_SHAPE_DEFAULT));
preInstalledInputMeans.add(getString(R.string.INPUT_MEAN_DEFAULT));
preInstalledInputStds.add(getString(R.string.INPUT_STD_DEFAULT));
preInstalledScoreThresholds.add(getString(R.string.SCORE_THRESHOLD_DEFAULT)); preInstalledScoreThresholds.add(getString(R.string.SCORE_THRESHOLD_DEFAULT));
// Setup UI components // Setup UI components
...@@ -89,11 +83,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -89,11 +83,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
(ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY)); (ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY));
lpCPUPowerMode = lpCPUPowerMode =
(ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY)); (ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY));
lpInputColorFormat = etDetLongSize = (EditTextPreference) findPreference(getString(R.string.DET_LONG_SIZE_KEY));
(ListPreference) findPreference(getString(R.string.INPUT_COLOR_FORMAT_KEY));
etInputShape = (EditTextPreference) findPreference(getString(R.string.INPUT_SHAPE_KEY));
etInputMean = (EditTextPreference) findPreference(getString(R.string.INPUT_MEAN_KEY));
etInputStd = (EditTextPreference) findPreference(getString(R.string.INPUT_STD_KEY));
etScoreThreshold = (EditTextPreference) findPreference(getString(R.string.SCORE_THRESHOLD_KEY)); etScoreThreshold = (EditTextPreference) findPreference(getString(R.string.SCORE_THRESHOLD_KEY));
} }
...@@ -112,11 +102,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -112,11 +102,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
editor.putString(getString(R.string.IMAGE_PATH_KEY), preInstalledImagePaths.get(modelIdx)); editor.putString(getString(R.string.IMAGE_PATH_KEY), preInstalledImagePaths.get(modelIdx));
editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx)); editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx));
editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx)); editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx));
editor.putString(getString(R.string.INPUT_COLOR_FORMAT_KEY), editor.putString(getString(R.string.DET_LONG_SIZE_KEY), preInstalledDetLongSizes.get(modelIdx));
preInstalledInputColorFormats.get(modelIdx));
editor.putString(getString(R.string.INPUT_SHAPE_KEY), preInstalledInputShapes.get(modelIdx));
editor.putString(getString(R.string.INPUT_MEAN_KEY), preInstalledInputMeans.get(modelIdx));
editor.putString(getString(R.string.INPUT_STD_KEY), preInstalledInputStds.get(modelIdx));
editor.putString(getString(R.string.SCORE_THRESHOLD_KEY), editor.putString(getString(R.string.SCORE_THRESHOLD_KEY),
preInstalledScoreThresholds.get(modelIdx)); preInstalledScoreThresholds.get(modelIdx));
editor.apply(); editor.apply();
...@@ -129,10 +115,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -129,10 +115,7 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
etImagePath.setEnabled(enableCustomSettings); etImagePath.setEnabled(enableCustomSettings);
lpCPUThreadNum.setEnabled(enableCustomSettings); lpCPUThreadNum.setEnabled(enableCustomSettings);
lpCPUPowerMode.setEnabled(enableCustomSettings); lpCPUPowerMode.setEnabled(enableCustomSettings);
lpInputColorFormat.setEnabled(enableCustomSettings); etDetLongSize.setEnabled(enableCustomSettings);
etInputShape.setEnabled(enableCustomSettings);
etInputMean.setEnabled(enableCustomSettings);
etInputStd.setEnabled(enableCustomSettings);
etScoreThreshold.setEnabled(enableCustomSettings); etScoreThreshold.setEnabled(enableCustomSettings);
modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY), modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
getString(R.string.MODEL_PATH_DEFAULT)); getString(R.string.MODEL_PATH_DEFAULT));
...@@ -144,14 +127,8 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -144,14 +127,8 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
getString(R.string.CPU_THREAD_NUM_DEFAULT)); getString(R.string.CPU_THREAD_NUM_DEFAULT));
String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY), String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
getString(R.string.CPU_POWER_MODE_DEFAULT)); getString(R.string.CPU_POWER_MODE_DEFAULT));
String inputColorFormat = sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY), String detLongSize = sharedPreferences.getString(getString(R.string.DET_LONG_SIZE_KEY),
getString(R.string.INPUT_COLOR_FORMAT_DEFAULT)); getString(R.string.DET_LONG_SIZE_DEFAULT));
String inputShape = sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY),
getString(R.string.INPUT_SHAPE_DEFAULT));
String inputMean = sharedPreferences.getString(getString(R.string.INPUT_MEAN_KEY),
getString(R.string.INPUT_MEAN_DEFAULT));
String inputStd = sharedPreferences.getString(getString(R.string.INPUT_STD_KEY),
getString(R.string.INPUT_STD_DEFAULT));
String scoreThreshold = sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY), String scoreThreshold = sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY),
getString(R.string.SCORE_THRESHOLD_DEFAULT)); getString(R.string.SCORE_THRESHOLD_DEFAULT));
etModelPath.setSummary(modelPath); etModelPath.setSummary(modelPath);
...@@ -164,14 +141,8 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha ...@@ -164,14 +141,8 @@ public class SettingsActivity extends AppCompatPreferenceActivity implements Sha
lpCPUThreadNum.setSummary(cpuThreadNum); lpCPUThreadNum.setSummary(cpuThreadNum);
lpCPUPowerMode.setValue(cpuPowerMode); lpCPUPowerMode.setValue(cpuPowerMode);
lpCPUPowerMode.setSummary(cpuPowerMode); lpCPUPowerMode.setSummary(cpuPowerMode);
lpInputColorFormat.setValue(inputColorFormat); etDetLongSize.setSummary(detLongSize);
lpInputColorFormat.setSummary(inputColorFormat); etDetLongSize.setText(detLongSize);
etInputShape.setSummary(inputShape);
etInputShape.setText(inputShape);
etInputMean.setSummary(inputMean);
etInputMean.setText(inputMean);
etInputStd.setSummary(inputStd);
etInputStd.setText(inputStd);
etScoreThreshold.setText(scoreThreshold); etScoreThreshold.setText(scoreThreshold);
etScoreThreshold.setSummary(scoreThreshold); etScoreThreshold.setSummary(scoreThreshold);
} }
......
...@@ -23,13 +23,7 @@ ...@@ -23,13 +23,7 @@
android:layout_height="wrap_content" android:layout_height="wrap_content"
android:orientation="horizontal"> android:orientation="horizontal">
<Button
android:id="@+id/btn_load_model"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:layout_weight="1"
android:onClick="btn_load_model_click"
android:text="加载模型" />
<Button <Button
android:id="@+id/btn_run_model" android:id="@+id/btn_run_model"
android:layout_width="0dp" android:layout_width="0dp"
...@@ -52,7 +46,45 @@ ...@@ -52,7 +46,45 @@
android:onClick="btn_choice_img_click" android:onClick="btn_choice_img_click"
android:text="选取图片" /> android:text="选取图片" />
<Button
android:id="@+id/btn_reset_img"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:layout_weight="1"
android:onClick="btn_reset_img_click"
android:text="清空绘图" />
</LinearLayout>
<LinearLayout
android:id="@+id/run_mode_layout"
android:layout_width="fill_parent"
android:layout_height="wrap_content"
android:orientation="horizontal">
<CheckBox
android:id="@+id/cb_opencl"
android:layout_width="0dp"
android:layout_weight="1"
android:layout_height="wrap_content"
android:text="开启OPENCL"
android:onClick="cb_opencl_click"
android:visibility="gone"/>
<TextView
android:layout_width="0dp"
android:layout_weight="0.5"
android:layout_height="wrap_content"
android:text="运行模式:"/>
<Spinner
android:id="@+id/sp_run_mode"
android:layout_width="0dp"
android:layout_weight="1.5"
android:layout_height="wrap_content"
android:entries="@array/run_Model"
/>
</LinearLayout> </LinearLayout>
<TextView <TextView
android:id="@+id/tv_input_setting" android:id="@+id/tv_input_setting"
android:layout_width="wrap_content" android:layout_width="wrap_content"
...@@ -60,7 +92,7 @@ ...@@ -60,7 +92,7 @@
android:scrollbars="vertical" android:scrollbars="vertical"
android:layout_marginLeft="12dp" android:layout_marginLeft="12dp"
android:layout_marginRight="12dp" android:layout_marginRight="12dp"
android:layout_marginTop="10dp" android:layout_marginTop="5dp"
android:layout_marginBottom="5dp" android:layout_marginBottom="5dp"
android:lineSpacingExtra="4dp" android:lineSpacingExtra="4dp"
android:singleLine="false" android:singleLine="false"
......
<?xml version="1.0" encoding="utf-8"?>
<!-- for MiniActivity Use Only -->
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintLeft_toRightOf="parent"
tools:context=".MainActivity">
<TextView
android:id="@+id/sample_text"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toBottomOf="@id/imageView"
android:scrollbars="vertical"
/>
<ImageView
android:id="@+id/imageView"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:paddingTop="20dp"
android:paddingBottom="20dp"
app:layout_constraintBottom_toTopOf="@id/imageView"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="parent"
tools:srcCompat="@tools:sample/avatars" />
<Button
android:id="@+id/button"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_marginBottom="4dp"
android:text="Button"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
tools:layout_editor_absoluteX="161dp" />
</androidx.constraintlayout.widget.ConstraintLayout>
\ No newline at end of file
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<resources> <resources>
<string-array name="image_name_entries"> <string-array name="image_name_entries">
<item>0.jpg</item> <item>det_0.jpg</item>
<item>90.jpg</item> <item>det_90.jpg</item>
<item>180.jpg</item> <item>det_180.jpg</item>
<item>270.jpg</item> <item>det_270.jpg</item>
<item>rec_0.jpg</item>
<item>rec_0_180.jpg</item>
<item>rec_1.jpg</item>
<item>rec_1_180.jpg</item>
</string-array> </string-array>
<string-array name="image_name_values"> <string-array name="image_name_values">
<item>images/0.jpg</item> <item>images/det_0.jpg</item>
<item>images/90.jpg</item> <item>images/det_90.jpg</item>
<item>images/180.jpg</item> <item>images/det_180.jpg</item>
<item>images/270.jpg</item> <item>images/det_270.jpg</item>
<item>images/rec_0.jpg</item>
<item>images/rec_0_180.jpg</item>
<item>images/rec_1.jpg</item>
<item>images/rec_1_180.jpg</item>
</string-array> </string-array>
<string-array name="cpu_thread_num_entries"> <string-array name="cpu_thread_num_entries">
<item>1 threads</item> <item>1 threads</item>
...@@ -48,4 +56,12 @@ ...@@ -48,4 +56,12 @@
<item>BGR</item> <item>BGR</item>
<item>RGB</item> <item>RGB</item>
</string-array> </string-array>
<string-array name="run_Model">
<item>检测+分类+识别</item>
<item>检测+识别</item>
<item>分类+识别</item>
<item>检测</item>
<item>识别</item>
<item>分类</item>
</string-array>
</resources> </resources>
\ No newline at end of file
<resources> <resources>
<string name="app_name">OCR Chinese</string> <string name="app_name">PaddleOCR</string>
<string name="CHOOSE_PRE_INSTALLED_MODEL_KEY">CHOOSE_PRE_INSTALLED_MODEL_KEY</string> <string name="CHOOSE_PRE_INSTALLED_MODEL_KEY">CHOOSE_PRE_INSTALLED_MODEL_KEY</string>
<string name="ENABLE_CUSTOM_SETTINGS_KEY">ENABLE_CUSTOM_SETTINGS_KEY</string> <string name="ENABLE_CUSTOM_SETTINGS_KEY">ENABLE_CUSTOM_SETTINGS_KEY</string>
<string name="MODEL_PATH_KEY">MODEL_PATH_KEY</string> <string name="MODEL_PATH_KEY">MODEL_PATH_KEY</string>
...@@ -7,20 +7,14 @@ ...@@ -7,20 +7,14 @@
<string name="IMAGE_PATH_KEY">IMAGE_PATH_KEY</string> <string name="IMAGE_PATH_KEY">IMAGE_PATH_KEY</string>
<string name="CPU_THREAD_NUM_KEY">CPU_THREAD_NUM_KEY</string> <string name="CPU_THREAD_NUM_KEY">CPU_THREAD_NUM_KEY</string>
<string name="CPU_POWER_MODE_KEY">CPU_POWER_MODE_KEY</string> <string name="CPU_POWER_MODE_KEY">CPU_POWER_MODE_KEY</string>
<string name="INPUT_COLOR_FORMAT_KEY">INPUT_COLOR_FORMAT_KEY</string> <string name="DET_LONG_SIZE_KEY">DET_LONG_SIZE_KEY</string>
<string name="INPUT_SHAPE_KEY">INPUT_SHAPE_KEY</string>
<string name="INPUT_MEAN_KEY">INPUT_MEAN_KEY</string>
<string name="INPUT_STD_KEY">INPUT_STD_KEY</string>
<string name="SCORE_THRESHOLD_KEY">SCORE_THRESHOLD_KEY</string> <string name="SCORE_THRESHOLD_KEY">SCORE_THRESHOLD_KEY</string>
<string name="MODEL_PATH_DEFAULT">models/ocr_v2_for_cpu</string> <string name="MODEL_PATH_DEFAULT">models/ch_PP-OCRv2</string>
<string name="LABEL_PATH_DEFAULT">labels/ppocr_keys_v1.txt</string> <string name="LABEL_PATH_DEFAULT">labels/ppocr_keys_v1.txt</string>
<string name="IMAGE_PATH_DEFAULT">images/0.jpg</string> <string name="IMAGE_PATH_DEFAULT">images/det_0.jpg</string>
<string name="CPU_THREAD_NUM_DEFAULT">4</string> <string name="CPU_THREAD_NUM_DEFAULT">4</string>
<string name="CPU_POWER_MODE_DEFAULT">LITE_POWER_HIGH</string> <string name="CPU_POWER_MODE_DEFAULT">LITE_POWER_HIGH</string>
<string name="INPUT_COLOR_FORMAT_DEFAULT">BGR</string> <string name="DET_LONG_SIZE_DEFAULT">960</string>
<string name="INPUT_SHAPE_DEFAULT">1,3,960</string>
<string name="INPUT_MEAN_DEFAULT">0.485, 0.456, 0.406</string>
<string name="INPUT_STD_DEFAULT">0.229,0.224,0.225</string>
<string name="SCORE_THRESHOLD_DEFAULT">0.1</string> <string name="SCORE_THRESHOLD_DEFAULT">0.1</string>
</resources> </resources>
...@@ -47,26 +47,10 @@ ...@@ -47,26 +47,10 @@
android:entryValues="@array/cpu_power_mode_values"/> android:entryValues="@array/cpu_power_mode_values"/>
</PreferenceCategory> </PreferenceCategory>
<PreferenceCategory android:title="Input Settings"> <PreferenceCategory android:title="Input Settings">
<ListPreference
android:defaultValue="@string/INPUT_COLOR_FORMAT_DEFAULT"
android:key="@string/INPUT_COLOR_FORMAT_KEY"
android:negativeButtonText="@null"
android:positiveButtonText="@null"
android:title="Input Color Format: BGR or RGB"
android:entries="@array/input_color_format_entries"
android:entryValues="@array/input_color_format_values"/>
<EditTextPreference
android:key="@string/INPUT_SHAPE_KEY"
android:defaultValue="@string/INPUT_SHAPE_DEFAULT"
android:title="Input Shape: (1,1,max_width_height) or (1,3,max_width_height)" />
<EditTextPreference
android:key="@string/INPUT_MEAN_KEY"
android:defaultValue="@string/INPUT_MEAN_DEFAULT"
android:title="Input Mean: (channel/255-mean)/std" />
<EditTextPreference <EditTextPreference
android:key="@string/INPUT_STD_KEY" android:key="@string/DET_LONG_SIZE_KEY"
android:defaultValue="@string/INPUT_STD_DEFAULT" android:defaultValue="@string/DET_LONG_SIZE_DEFAULT"
android:title="Input Std: (channel/255-mean)/std" /> android:title="det long size" />
</PreferenceCategory> </PreferenceCategory>
<PreferenceCategory android:title="Output Settings"> <PreferenceCategory android:title="Output Settings">
<EditTextPreference <EditTextPreference
......
...@@ -45,8 +45,9 @@ public: ...@@ -45,8 +45,9 @@ public:
const double &det_db_thresh, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
const bool &use_polygon_score, const bool &visualize, const bool &use_polygon_score, const bool &use_dilation,
const bool &use_tensorrt, const std::string &precision) { const bool &visualize, const bool &use_tensorrt,
const std::string &precision) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -59,6 +60,7 @@ public: ...@@ -59,6 +60,7 @@ public:
this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_box_thresh_ = det_db_box_thresh;
this->det_db_unclip_ratio_ = det_db_unclip_ratio; this->det_db_unclip_ratio_ = det_db_unclip_ratio;
this->use_polygon_score_ = use_polygon_score; this->use_polygon_score_ = use_polygon_score;
this->use_dilation_ = use_dilation;
this->visualize_ = visualize; this->visualize_ = visualize;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
...@@ -71,7 +73,8 @@ public: ...@@ -71,7 +73,8 @@ public:
void LoadModel(const std::string &model_dir); void LoadModel(const std::string &model_dir);
// Run predictor // Run predictor
void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes, std::vector<double> *times); void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes,
std::vector<double> *times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
...@@ -88,6 +91,7 @@ private: ...@@ -88,6 +91,7 @@ private:
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 2.0;
bool use_polygon_score_ = false; bool use_polygon_score_ = false;
bool use_dilation_ = false;
bool visualize_ = true; bool visualize_ = true;
bool use_tensorrt_ = false; bool use_tensorrt_ = false;
......
...@@ -4,16 +4,20 @@ ...@@ -4,16 +4,20 @@
C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成 C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成
PaddleOCR模型部署。 PaddleOCR模型部署。
* [1. 准备环境](#1) - [服务器端C++预测](#服务器端c预测)
+ [1.0 运行准备](#10) - [1. 准备环境](#1-准备环境)
+ [1.1 编译opencv库](#11) - [1.0 运行准备](#10-运行准备)
+ [1.2 下载或者编译Paddle预测库](#12) - [1.1 编译opencv库](#11-编译opencv库)
- [1.2.1 直接下载安装](#121) - [1.2 下载或者编译Paddle预测库](#12-下载或者编译paddle预测库)
- [1.2.2 预测库源码编译](#122) - [1.2.1 直接下载安装](#121-直接下载安装)
* [2 开始运行](#2) - [1.2.2 预测库源码编译](#122-预测库源码编译)
+ [2.1 将模型导出为inference model](#21) - [2 开始运行](#2-开始运行)
+ [2.2 编译PaddleOCR C++预测demo](#22) - [2.1 将模型导出为inference model](#21-将模型导出为inference-model)
+ [2.3运行demo](#23) - [2.2 编译PaddleOCR C++预测demo](#22-编译paddleocr-c预测demo)
- [2.3 运行demo](#23-运行demo)
- [1. 只调用检测:](#1-只调用检测)
- [2. 只调用识别:](#2-只调用识别)
- [3. 调用串联:](#3-调用串联)
<a name="1"></a> <a name="1"></a>
...@@ -103,7 +107,7 @@ opencv3/ ...@@ -103,7 +107,7 @@ opencv3/
#### 1.2.1 直接下载安装 #### 1.2.1 直接下载安装
* [Paddle预测库官网](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 * [Paddle预测库官网](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#linux) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。
* 下载之后使用下面的方法解压。 * 下载之后使用下面的方法解压。
...@@ -249,7 +253,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -249,7 +253,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|gpu_id|int|0|GPU id,使用GPU时有效| |gpu_id|int|0|GPU id,使用GPU时有效|
|gpu_mem|int|4000|申请的GPU内存| |gpu_mem|int|4000|申请的GPU内存|
|cpu_math_library_num_threads|int|10|CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快| |cpu_math_library_num_threads|int|10|CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快|
|use_mkldnn|bool|true|是否使用mkldnn库| |enable_mkldnn|bool|true|是否使用mkldnn库|
- 检测模型相关 - 检测模型相关
......
...@@ -78,7 +78,7 @@ opencv3/ ...@@ -78,7 +78,7 @@ opencv3/
#### 1.2.1 Direct download and installation #### 1.2.1 Direct download and installation
[Paddle inference library official website](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html). You can review and select the appropriate version of the inference library on the official website. [Paddle inference library official website](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#linux). You can review and select the appropriate version of the inference library on the official website.
* After downloading, use the following command to extract files. * After downloading, use the following command to extract files.
...@@ -231,7 +231,7 @@ More parameters are as follows, ...@@ -231,7 +231,7 @@ More parameters are as follows,
|gpu_id|int|0|GPU id when use_gpu is true| |gpu_id|int|0|GPU id when use_gpu is true|
|gpu_mem|int|4000|GPU memory requested| |gpu_mem|int|4000|GPU memory requested|
|cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed| |cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed|
|use_mkldnn|bool|true|Whether to use mkdlnn library| |enable_mkldnn|bool|true|Whether to use mkdlnn library|
- Detection related parameters - Detection related parameters
......
...@@ -28,14 +28,14 @@ ...@@ -28,14 +28,14 @@
#include <numeric> #include <numeric>
#include <glog/logging.h> #include <glog/logging.h>
#include <include/ocr_det.h>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
#include <include/utility.h> #include <include/utility.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <gflags/gflags.h>
#include "auto_log/autolog.h" #include "auto_log/autolog.h"
#include <gflags/gflags.h>
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU."); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute."); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
...@@ -51,9 +51,10 @@ DEFINE_string(image_dir, "", "Dir of input image."); ...@@ -51,9 +51,10 @@ DEFINE_string(image_dir, "", "Dir of input image.");
DEFINE_string(det_model_dir, "", "Path of det inference model."); DEFINE_string(det_model_dir, "", "Path of det inference model.");
DEFINE_int32(max_side_len, 960, "max_side_len of input image."); DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh."); DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
DEFINE_double(det_db_box_thresh, 0.5, "Threshold of det_db_box_thresh."); DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh.");
DEFINE_double(det_db_unclip_ratio, 1.6, "Threshold of det_db_unclip_ratio."); DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio.");
DEFINE_bool(use_polygon_score, false, "Whether use polygon score."); DEFINE_bool(use_polygon_score, false, "Whether use polygon score.");
DEFINE_bool(use_dilation, false, "Whether use the dilation on output map.");
DEFINE_bool(visualize, true, "Whether show the detection results."); DEFINE_bool(visualize, true, "Whether show the detection results.");
// classification related // classification related
DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls."); DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
...@@ -62,15 +63,14 @@ DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh."); ...@@ -62,15 +63,14 @@ DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
// recognition related // recognition related
DEFINE_string(rec_model_dir, "", "Path of rec inference model."); DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
DEFINE_int32(rec_batch_num, 6, "rec_batch_num."); DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary."); DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt",
"Path of dictionary.");
using namespace std; using namespace std;
using namespace cv; using namespace cv;
using namespace PaddleOCR; using namespace PaddleOCR;
static bool PathExists(const std::string &path) {
static bool PathExists(const std::string& path){
#ifdef _WIN32 #ifdef _WIN32
struct _stat buffer; struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0); return (_stat(path.c_str(), &buffer) == 0);
...@@ -80,22 +80,22 @@ static bool PathExists(const std::string& path){ ...@@ -80,22 +80,22 @@ static bool PathExists(const std::string& path){
#endif // !_WIN32 #endif // !_WIN32
} }
int main_det(std::vector<cv::String> cv_all_img_names) { int main_det(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0}; std::vector<double> time_info = {0, 0, 0};
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_visualize, FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision);
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
// LOG(INFO) << "The predict img: " << cv_all_img_names[i]; // LOG(INFO) << "The predict img: " << cv_all_img_names[i];
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) { if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1); exit(1);
} }
std::vector<std::vector<std::vector<int>>> boxes; std::vector<std::vector<std::vector<int>>> boxes;
...@@ -119,22 +119,14 @@ int main_det(std::vector<cv::String> cv_all_img_names) { ...@@ -119,22 +119,14 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
AutoLogger autolog("ocr_det", AutoLogger autolog("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_use_gpu, FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_use_tensorrt, FLAGS_precision, time_info, cv_all_img_names.size());
FLAGS_enable_mkldnn,
FLAGS_cpu_threads,
1,
"dynamic",
FLAGS_precision,
time_info,
cv_all_img_names.size());
autolog.report(); autolog.report();
} }
return 0; return 0;
} }
int main_rec(std::vector<cv::String> cv_all_img_names) { int main_rec(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0}; std::vector<double> time_info = {0, 0, 0};
...@@ -144,9 +136,9 @@ int main_rec(std::vector<cv::String> cv_all_img_names) { ...@@ -144,9 +136,9 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
cout << "label file: " << char_list_file << endl; cout << "label file: " << char_list_file << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_enable_mkldnn, char_list_file, char_list_file, FLAGS_use_tensorrt, FLAGS_precision,
FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num); FLAGS_rec_batch_num);
std::vector<cv::Mat> img_list; std::vector<cv::Mat> img_list;
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
...@@ -154,7 +146,8 @@ int main_rec(std::vector<cv::String> cv_all_img_names) { ...@@ -154,7 +146,8 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) { if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1); exit(1);
} }
img_list.push_back(srcimg); img_list.push_back(srcimg);
...@@ -166,39 +159,31 @@ int main_rec(std::vector<cv::String> cv_all_img_names) { ...@@ -166,39 +159,31 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
time_info[2] += rec_times[2]; time_info[2] += rec_times[2];
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
AutoLogger autolog("ocr_rec", AutoLogger autolog("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_use_gpu, FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_use_tensorrt, FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
FLAGS_enable_mkldnn, time_info, cv_all_img_names.size());
FLAGS_cpu_threads,
FLAGS_rec_batch_num,
"dynamic",
FLAGS_precision,
time_info,
cv_all_img_names.size());
autolog.report(); autolog.report();
} }
return 0; return 0;
} }
int main_system(std::vector<cv::String> cv_all_img_names) { int main_system(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info_det = {0, 0, 0}; std::vector<double> time_info_det = {0, 0, 0};
std::vector<double> time_info_rec = {0, 0, 0}; std::vector<double> time_info_rec = {0, 0, 0};
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_visualize, FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision);
Classifier *cls = nullptr; Classifier *cls = nullptr;
if (FLAGS_use_angle_cls) { if (FLAGS_use_angle_cls) {
cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_enable_mkldnn, FLAGS_cls_thresh, FLAGS_cls_thresh, FLAGS_use_tensorrt, FLAGS_precision);
FLAGS_use_tensorrt, FLAGS_precision);
} }
std::string char_list_file = FLAGS_char_list_file; std::string char_list_file = FLAGS_char_list_file;
...@@ -207,16 +192,17 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -207,16 +192,17 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
cout << "label file: " << char_list_file << endl; cout << "label file: " << char_list_file << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_enable_mkldnn, char_list_file, char_list_file, FLAGS_use_tensorrt, FLAGS_precision,
FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num); FLAGS_rec_batch_num);
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
LOG(INFO) << "The predict img: " << cv_all_img_names[i]; LOG(INFO) << "The predict img: " << cv_all_img_names[i];
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) { if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1); exit(1);
} }
std::vector<std::vector<std::vector<int>>> boxes; std::vector<std::vector<std::vector<int>>> boxes;
...@@ -245,26 +231,14 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -245,26 +231,14 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
AutoLogger autolog_det("ocr_det", AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_use_gpu, FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_use_tensorrt, FLAGS_precision, time_info_det,
FLAGS_enable_mkldnn,
FLAGS_cpu_threads,
1,
"dynamic",
FLAGS_precision,
time_info_det,
cv_all_img_names.size());
AutoLogger autolog_rec("ocr_rec",
FLAGS_use_gpu,
FLAGS_use_tensorrt,
FLAGS_enable_mkldnn,
FLAGS_cpu_threads,
FLAGS_rec_batch_num,
"dynamic",
FLAGS_precision,
time_info_rec,
cv_all_img_names.size()); cv_all_img_names.size());
AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
time_info_rec, cv_all_img_names.size());
autolog_det.report(); autolog_det.report();
std::cout << endl; std::cout << endl;
autolog_rec.report(); autolog_rec.report();
...@@ -272,29 +246,33 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -272,29 +246,33 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
return 0; return 0;
} }
void check_params(char *mode) {
void check_params(char* mode) { if (strcmp(mode, "det") == 0) {
if (strcmp(mode, "det")==0) {
if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) { if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[det]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " std::cout << "Usage[det]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1); exit(1);
} }
} }
if (strcmp(mode, "rec")==0) { if (strcmp(mode, "rec") == 0) {
if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) { if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[rec]: ./ppocr --rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " std::cout << "Usage[rec]: ./ppocr "
"--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1); exit(1);
} }
} }
if (strcmp(mode, "system")==0) { if (strcmp(mode, "system") == 0) {
if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) || if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() ||
FLAGS_image_dir.empty()) ||
(FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) { (FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) {
std::cout << "Usage[system without angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " std::cout << "Usage[system without angle cls]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
std::cout << "Usage[system with angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " std::cout << "Usage[system with angle cls]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--use_angle_cls=true " << "--use_angle_cls=true "
<< "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ " << "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
...@@ -302,15 +280,17 @@ void check_params(char* mode) { ...@@ -302,15 +280,17 @@ void check_params(char* mode) {
exit(1); exit(1);
} }
} }
if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && FLAGS_precision != "int8") { if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" &&
FLAGS_precision != "int8") {
cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl; cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl;
exit(1); exit(1);
} }
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) { if (argc <= 1 ||
(strcmp(argv[1], "det") != 0 && strcmp(argv[1], "rec") != 0 &&
strcmp(argv[1], "system") != 0)) {
std::cout << "Please choose one mode of [det, rec, system] !" << std::endl; std::cout << "Please choose one mode of [det, rec, system] !" << std::endl;
return -1; return -1;
} }
...@@ -321,7 +301,8 @@ int main(int argc, char **argv) { ...@@ -321,7 +301,8 @@ int main(int argc, char **argv) {
check_params(argv[1]); check_params(argv[1]);
if (!PathExists(FLAGS_image_dir)) { if (!PathExists(FLAGS_image_dir)) {
std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir << endl; std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
<< endl;
exit(1); exit(1);
} }
...@@ -329,14 +310,13 @@ int main(int argc, char **argv) { ...@@ -329,14 +310,13 @@ int main(int argc, char **argv) {
cv::glob(FLAGS_image_dir, cv_all_img_names); cv::glob(FLAGS_image_dir, cv_all_img_names);
std::cout << "total images num: " << cv_all_img_names.size() << endl; std::cout << "total images num: " << cv_all_img_names.size() << endl;
if (strcmp(argv[1], "det")==0) { if (strcmp(argv[1], "det") == 0) {
return main_det(cv_all_img_names); return main_det(cv_all_img_names);
} }
if (strcmp(argv[1], "rec")==0) { if (strcmp(argv[1], "rec") == 0) {
return main_rec(cv_all_img_names); return main_rec(cv_all_img_names);
} }
if (strcmp(argv[1], "system")==0) { if (strcmp(argv[1], "system") == 0) {
return main_system(cv_all_img_names); return main_system(cv_all_img_names);
} }
} }
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <include/ocr_det.h> #include <include/ocr_det.h>
namespace PaddleOCR { namespace PaddleOCR {
void DBDetector::LoadModel(const std::string &model_dir) { void DBDetector::LoadModel(const std::string &model_dir) {
...@@ -33,10 +32,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -33,10 +32,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine( config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
1 << 20, 10, 3,
precision,
false, false);
std::map<std::string, std::vector<int>> min_input_shape = { std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 50, 50}}, {"x", {1, 3, 50, 50}},
{"conv2d_92.tmp_0", {1, 96, 20, 20}}, {"conv2d_92.tmp_0", {1, 96, 20, 20}},
...@@ -157,22 +153,27 @@ void DBDetector::Run(cv::Mat &img, ...@@ -157,22 +153,27 @@ void DBDetector::Run(cv::Mat &img,
const double maxvalue = 255; const double maxvalue = 255;
cv::Mat bit_map; cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
cv::Mat dilation_map; if (this->use_dilation_) {
cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::Mat dila_ele =
cv::dilate(bit_map, dilation_map, dila_ele); cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(bit_map, bit_map, dila_ele);
}
boxes = post_processor_.BoxesFromBitmap( boxes = post_processor_.BoxesFromBitmap(
pred_map, dilation_map, this->det_db_box_thresh_, pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_,
this->det_db_unclip_ratio_, this->use_polygon_score_); this->use_polygon_score_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
auto postprocess_end = std::chrono::steady_clock::now(); auto postprocess_end = std::chrono::steady_clock::now();
std::cout << "Detected boxes num: " << boxes.size() << endl; std::cout << "Detected boxes num: " << boxes.size() << endl;
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start; std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000)); times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start; std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() * 1000)); times->push_back(double(inference_diff.count() * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start; std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000)); times->push_back(double(postprocess_diff.count() * 1000));
//// visualization //// visualization
......
...@@ -16,10 +16,14 @@ ...@@ -16,10 +16,14 @@
namespace PaddleOCR { namespace PaddleOCR {
void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *times) { void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
std::chrono::duration<float> preprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); std::vector<double> *times) {
std::chrono::duration<float> inference_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); std::chrono::duration<float> preprocess_diff =
std::chrono::duration<float> postprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
std::chrono::duration<float> inference_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
std::chrono::duration<float> postprocess_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
int img_num = img_list.size(); int img_num = img_list.size();
std::vector<float> width_list; std::vector<float> width_list;
...@@ -28,27 +32,31 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *tim ...@@ -28,27 +32,31 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *tim
} }
std::vector<int> indices = Utility::argsort(width_list); std::vector<int> indices = Utility::argsort(width_list);
for (int beg_img_no = 0; beg_img_no < img_num; beg_img_no += this->rec_batch_num_) { for (int beg_img_no = 0; beg_img_no < img_num;
beg_img_no += this->rec_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_); int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_);
float max_wh_ratio = 0; float max_wh_ratio = 0;
for (int ino = beg_img_no; ino < end_img_no; ino ++) { for (int ino = beg_img_no; ino < end_img_no; ino++) {
int h = img_list[indices[ino]].rows; int h = img_list[indices[ino]].rows;
int w = img_list[indices[ino]].cols; int w = img_list[indices[ino]].cols;
float wh_ratio = w * 1.0 / h; float wh_ratio = w * 1.0 / h;
max_wh_ratio = max(max_wh_ratio, wh_ratio); max_wh_ratio = max(max_wh_ratio, wh_ratio);
} }
int batch_width = 0;
std::vector<cv::Mat> norm_img_batch; std::vector<cv::Mat> norm_img_batch;
for (int ino = beg_img_no; ino < end_img_no; ino ++) { for (int ino = beg_img_no; ino < end_img_no; ino++) {
cv::Mat srcimg; cv::Mat srcimg;
img_list[indices[ino]].copyTo(srcimg); img_list[indices[ino]].copyTo(srcimg);
cv::Mat resize_img; cv::Mat resize_img;
this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, this->use_tensorrt_); this->resize_op_.Run(srcimg, resize_img, max_wh_ratio,
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->is_scale_); this->use_tensorrt_);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_);
norm_img_batch.push_back(resize_img); norm_img_batch.push_back(resize_img);
batch_width = max(resize_img.cols, batch_width);
} }
int batch_width = int(ceilf(32 * max_wh_ratio)) - 1;
std::vector<float> input(this->rec_batch_num_ * 3 * 32 * batch_width, 0.0f); std::vector<float> input(this->rec_batch_num_ * 3 * 32 * batch_width, 0.0f);
this->permute_op_.Run(norm_img_batch, input.data()); this->permute_op_.Run(norm_img_batch, input.data());
auto preprocess_end = std::chrono::steady_clock::now(); auto preprocess_end = std::chrono::steady_clock::now();
...@@ -86,11 +94,11 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *tim ...@@ -86,11 +94,11 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *tim
float max_value = 0.0f; float max_value = 0.0f;
for (int n = 0; n < predict_shape[1]; n++) { for (int n = 0; n < predict_shape[1]; n++) {
argmax_idx = argmax_idx = int(Utility::argmax(
int(Utility::argmax(&predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
&predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
max_value = max_value = float(*std::max_element(
float(*std::max_element(&predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
&predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
...@@ -116,7 +124,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *tim ...@@ -116,7 +124,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *tim
times->push_back(double(postprocess_diff.count() * 1000)); times->push_back(double(postprocess_diff.count() * 1000));
} }
void CRNNRecognizer::LoadModel(const std::string &model_dir) { void CRNNRecognizer::LoadModel(const std::string &model_dir) {
// AnalysisConfig config; // AnalysisConfig config;
paddle_infer::Config config; paddle_infer::Config config;
...@@ -133,20 +140,14 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -133,20 +140,14 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine( config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
1 << 20, 10, 3,
precision,
false, false);
std::map<std::string, std::vector<int>> min_input_shape = { std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 32, 10}}, {"x", {1, 3, 32, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
{"lstm_0.tmp_0", {10, 1, 96}}};
std::map<std::string, std::vector<int>> max_input_shape = { std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {1, 3, 32, 2000}}, {"x", {1, 3, 32, 2000}}, {"lstm_0.tmp_0", {1000, 1, 96}}};
{"lstm_0.tmp_0", {1000, 1, 96}}};
std::map<std::string, std::vector<int>> opt_input_shape = { std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {1, 3, 32, 320}}, {"x", {1, 3, 32, 320}}, {"lstm_0.tmp_0", {25, 1, 96}}};
{"lstm_0.tmp_0", {25, 1, 96}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape); opt_input_shape);
...@@ -168,7 +169,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -168,7 +169,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
config.SwitchIrOptim(true); config.SwitchIrOptim(true);
config.EnableMemoryOptim(); config.EnableMemoryOptim();
// config.DisableGlogInfo(); // config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = CreatePredictor(config);
} }
......
- [端侧部署](#端侧部署)
- [1. 准备环境](#1-准备环境)
- [运行准备](#运行准备)
- [1.1 准备交叉编译环境](#11-准备交叉编译环境)
- [1.2 准备预测库](#12-准备预测库)
- [2 开始运行](#2-开始运行)
- [2.1 模型优化](#21-模型优化)
- [2.2 与手机联调](#22-与手机联调)
- [注意:](#注意)
- [FAQ](#faq)
# 端侧部署 # 端侧部署
本教程将介绍基于[Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) 在移动端部署PaddleOCR超轻量中文检测、识别模型的详细步骤。 本教程将介绍基于[Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) 在移动端部署PaddleOCR超轻量中文检测、识别模型的详细步骤。
...@@ -26,17 +37,17 @@ Paddle Lite是飞桨轻量化推理引擎,为手机、IOT端提供高效推理 ...@@ -26,17 +37,17 @@ Paddle Lite是飞桨轻量化推理引擎,为手机、IOT端提供高效推理
| 平台 | 预测库下载链接 | | 平台 | 预测库下载链接 |
|---|---| |---|---|
|Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz)| |Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz)|
|IOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.ios.armv7.with_cv.with_extra.with_log.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.ios.armv8.with_cv.with_extra.with_log.tiny_publish.tar.gz)| |IOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv7.with_cv.with_extra.with_log.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv8.with_cv.with_extra.with_log.tiny_publish.tar.gz)|
注:1. 上述预测库为PaddleLite 2.9分支编译得到,有关PaddleLite 2.9 详细信息可参考 [链接](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.9) 。 注:1. 上述预测库为PaddleLite 2.10分支编译得到,有关PaddleLite 2.10 详细信息可参考 [链接](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.10) 。
- 2. [推荐]编译Paddle-Lite得到预测库,Paddle-Lite的编译方式如下: - 2. [推荐]编译Paddle-Lite得到预测库,Paddle-Lite的编译方式如下:
``` ```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite cd Paddle-Lite
# 切换到Paddle-Lite release/v2.9 稳定分支 # 切换到Paddle-Lite release/v2.10 稳定分支
git checkout release/v2.9 git checkout release/v2.10
./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON ./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON
``` ```
...@@ -85,8 +96,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括 ...@@ -85,8 +96,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括
|模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本| |模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
|V2.0|超轻量中文OCR 移动端模型|7.8M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| |PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|V2.0(slim)|超轻量中文OCR 移动端模型|3.3M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9| |PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
如果直接使用上述表格中的模型进行部署,可略过下述步骤,直接阅读 [2.2节](#2.2与手机联调) 如果直接使用上述表格中的模型进行部署,可略过下述步骤,直接阅读 [2.2节](#2.2与手机联调)
...@@ -97,7 +108,7 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括 ...@@ -97,7 +108,7 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括
# 如果准备环境时已经clone了Paddle-Lite,则不用重新clone Paddle-Lite # 如果准备环境时已经clone了Paddle-Lite,则不用重新clone Paddle-Lite
git clone https://github.com/PaddlePaddle/Paddle-Lite.git git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite cd Paddle-Lite
git checkout release/v2.9 git checkout release/v2.10
# 启动编译 # 启动编译
./lite/tools/build.sh build_optimize_tool ./lite/tools/build.sh build_optimize_tool
``` ```
...@@ -123,15 +134,15 @@ cd build.opt/lite/api/ ...@@ -123,15 +134,15 @@ cd build.opt/lite/api/
下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。 下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。
``` ```
# 【推荐】 下载PaddleOCR V2.0版本的中英文 inference模型 # 【推荐】 下载 PP-OCRv2版本的中英文 inference模型
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_rec_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# 转换V2.0检测模型 # 转换检测模型
./opt --model_file=./ch_ppocr_mobile_v2.0_det_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_det_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer ./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换V2.0识别模型 # 转换识别模型
./opt --model_file=./ch_ppocr_mobile_v2.0_rec_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_rec_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer ./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换V2.0方向分类器模型 # 转换方向分类器模型
./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer ./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
``` ```
...@@ -186,15 +197,15 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls ...@@ -186,15 +197,15 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls
``` ```
准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。 准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
准备lite opt工具优化后的模型文件,比如使用`ch_ppocr_mobile_v2.0_det_slim_opt.nb,ch_ppocr_mobile_v2.0_rec_slim_opt.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。 准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv2_det_slim_opt.ch_PP-OCRv2_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
执行完成后,ocr文件夹下将有如下文件格式: 执行完成后,ocr文件夹下将有如下文件格式:
``` ```
demo/cxx/ocr/ demo/cxx/ocr/
|-- debug/ |-- debug/
| |--ch_ppocr_mobile_v2.0_det_slim_opt.nb 优化后的检测模型文件 | |--ch_PP-OCRv2_det_slim_opt.nb 优化后的检测模型文件
| |--ch_ppocr_mobile_v2.0_rec_slim_opt.nb 优化后的识别模型文件 | |--ch_PP-OCRv2_rec_slim_opt.nb 优化后的识别模型文件
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb 优化后的文字方向分类器模型文件 | |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb 优化后的文字方向分类器模型文件
| |--11.jpg 待测试图像 | |--11.jpg 待测试图像
| |--ppocr_keys_v1.txt 中文字典文件 | |--ppocr_keys_v1.txt 中文字典文件
...@@ -250,7 +261,7 @@ use_direction_classify 0 # 是否使用方向分类器,0表示不使用,1 ...@@ -250,7 +261,7 @@ use_direction_classify 0 # 是否使用方向分类器,0表示不使用,1
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# 开始使用,ocr_db_crnn可执行文件的使用方式为: # 开始使用,ocr_db_crnn可执行文件的使用方式为:
# ./ocr_db_crnn 检测模型文件 方向分类器模型文件 识别模型文件 测试图像路径 字典文件路径 # ./ocr_db_crnn 检测模型文件 方向分类器模型文件 识别模型文件 测试图像路径 字典文件路径
./ocr_db_crnn ch_ppocr_mobile_v2.0_det_slim_opt.nb ch_ppocr_mobile_v2.0_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb ./11.jpg ppocr_keys_v1.txt ./ocr_db_crnn ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb ./11.jpg ppocr_keys_v1.txt
``` ```
如果对代码做了修改,则需要重新编译并push到手机上。 如果对代码做了修改,则需要重新编译并push到手机上。
......
- [Tutorial of PaddleOCR Mobile deployment](#tutorial-of-paddleocr-mobile-deployment)
- [1. Preparation](#1-preparation)
- [Preparation environment](#preparation-environment)
- [1.1 Prepare the cross-compilation environment](#11-prepare-the-cross-compilation-environment)
- [1.2 Prepare Paddle-Lite library](#12-prepare-paddle-lite-library)
- [2 Run](#2-run)
- [2.1 Inference Model Optimization](#21-inference-model-optimization)
- [2.2 Run optimized model on Phone](#22-run-optimized-model-on-phone)
- [注意:](#注意)
- [FAQ](#faq)
# Tutorial of PaddleOCR Mobile deployment # Tutorial of PaddleOCR Mobile deployment
This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy PaddleOCR ultra-lightweight Chinese and English detection models on mobile phones. This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy PaddleOCR ultra-lightweight Chinese and English detection models on mobile phones.
...@@ -28,17 +39,17 @@ There are two ways to obtain the Paddle-Lite library: ...@@ -28,17 +39,17 @@ There are two ways to obtain the Paddle-Lite library:
| Platform | Paddle-Lite library download link | | Platform | Paddle-Lite library download link |
|---|---| |---|---|
|Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz)| |Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz)|
|IOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.ios.armv7.with_cv.with_extra.with_log.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.ios.armv8.with_cv.with_extra.with_log.tiny_publish.tar.gz)| |IOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv7.with_cv.with_extra.with_log.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv8.with_cv.with_extra.with_log.tiny_publish.tar.gz)|
Note: 1. The above Paddle-Lite library is compiled from the Paddle-Lite 2.9 branch. For more information about Paddle-Lite 2.9, please refer to [link](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.9). Note: 1. The above Paddle-Lite library is compiled from the Paddle-Lite 2.10 branch. For more information about Paddle-Lite 2.10, please refer to [link](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.10).
- 2. [Recommended] Compile Paddle-Lite to get the prediction library. The compilation method of Paddle-Lite is as follows: - 2. [Recommended] Compile Paddle-Lite to get the prediction library. The compilation method of Paddle-Lite is as follows:
``` ```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite cd Paddle-Lite
# Switch to Paddle-Lite release/v2.8 stable branch # Switch to Paddle-Lite release/v2.10 stable branch
git checkout release/v2.8 git checkout release/v2.10
./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON ./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON
``` ```
...@@ -87,10 +98,10 @@ The following table also provides a series of models that can be deployed on mob ...@@ -87,10 +98,10 @@ The following table also provides a series of models that can be deployed on mob
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch| |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| |PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9| |PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
If you directly use the model in the above table for deployment, you can skip the following steps and directly read [Section 2.2](#2.2 Run optimized model on Phone). If you directly use the model in the above table for deployment, you can skip the following steps and directly read [Section 2.2](#2.2-Run-optimized-model-on-Phone).
If the model to be deployed is not in the above table, you need to follow the steps below to obtain the optimized model. If the model to be deployed is not in the above table, you need to follow the steps below to obtain the optimized model.
...@@ -98,7 +109,7 @@ The `opt` tool can be obtained by compiling Paddle Lite. ...@@ -98,7 +109,7 @@ The `opt` tool can be obtained by compiling Paddle Lite.
``` ```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite cd Paddle-Lite
git checkout release/v2.9 git checkout release/v2.10
./lite/tools/build.sh build_optimize_tool ./lite/tools/build.sh build_optimize_tool
``` ```
...@@ -124,22 +135,22 @@ cd build.opt/lite/api/ ...@@ -124,22 +135,22 @@ cd build.opt/lite/api/
The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model
``` ```
# [Recommendation] Download the Chinese and English inference model of PaddleOCR V2.0 # 【[Recommendation] Download the Chinese and English inference model of PP-OCRv2
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_rec_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# Convert V2.0 detection model # Convert detection model
./opt --model_file=./ch_ppocr_mobile_v2.0_det_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_det_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer ./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert V2.0 recognition model # Convert recognition model
./opt --model_file=./ch_ppocr_mobile_v2.0_rec_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_rec_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer ./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert V2.0 angle classifier model # Convert angle classifier model
./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer ./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
``` ```
After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file. After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file.
<a name="2.2 Run optimized model on Phone"></a> <a name="2.2-Run-optimized-model-on-Phone"></a>
### 2.2 Run optimized model on Phone ### 2.2 Run optimized model on Phone
Some preparatory work is required first. Some preparatory work is required first.
...@@ -194,8 +205,8 @@ The structure of the OCR demo is as follows after the above command is executed: ...@@ -194,8 +205,8 @@ The structure of the OCR demo is as follows after the above command is executed:
``` ```
demo/cxx/ocr/ demo/cxx/ocr/
|-- debug/ |-- debug/
| |--ch_ppocr_mobile_v2.0_det_slim_opt.nb Detection model | |--ch_PP-OCRv2_det_slim_opt.nb Detection model
| |--ch_ppocr_mobile_v2.0_rec_slim_opt.nb Recognition model | |--ch_PP-OCRv2_rec_slim_opt.nb Recognition model
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb Text direction classification model | |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb Text direction classification model
| |--11.jpg Image for OCR | |--11.jpg Image for OCR
| |--ppocr_keys_v1.txt Dictionary file | |--ppocr_keys_v1.txt Dictionary file
...@@ -249,7 +260,7 @@ After the above steps are completed, you can use adb to push the file to the pho ...@@ -249,7 +260,7 @@ After the above steps are completed, you can use adb to push the file to the pho
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# The use of ocr_db_crnn is: # The use of ocr_db_crnn is:
# ./ocr_db_crnn Detection model file Orientation classifier model file Recognition model file Test image path Dictionary file path # ./ocr_db_crnn Detection model file Orientation classifier model file Recognition model file Test image path Dictionary file path
./ocr_db_crnn ch_ppocr_mobile_v2.0_det_opt.nb ch_ppocr_mobile_v2.0_rec_opt.nb ch_ppocr_mobile_v2.0_cls_opt.nb ./11.jpg ppocr_keys_v1.txt ./ocr_db_crnn ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_opt.nb ./11.jpg ppocr_keys_v1.txt
``` ```
If you modify the code, you need to recompile and push to the phone. If you modify the code, you need to recompile and push to the phone.
......
# paddle2onnx 模型转化与预测 # paddle2onnx 模型转化与预测
本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型,并基于 ONNX 引擎预测。 本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型,并基于 ONNXRuntime 引擎预测。
## 1. 环境准备 ## 1. 环境准备
需要准备 Paddle2ONNX 模型转化环境,和 ONNX 模型预测环境 需要准备 PaddleOCR、Paddle2ONNX 模型转化环境,和 ONNXRuntime 预测环境
### PaddleOCR
克隆PaddleOCR的仓库,使用release/2.4分支,并进行安装,由于PaddleOCR仓库比较大,git clone速度比较慢,所以本教程已下载
```
git clone -b release/2.4 https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR && python3.7 setup.py install
```
### Paddle2ONNX ### Paddle2ONNX
...@@ -16,7 +25,7 @@ Paddle2ONNX 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式,算 ...@@ -16,7 +25,7 @@ Paddle2ONNX 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式,算
python3.7 -m pip install paddle2onnx python3.7 -m pip install paddle2onnx
``` ```
- 安装 ONNX - 安装 ONNXRuntime
``` ```
# 建议安装 1.9.0 版本,可根据环境更换版本号 # 建议安装 1.9.0 版本,可根据环境更换版本号
python3.7 -m pip install onnxruntime==1.9.0 python3.7 -m pip install onnxruntime==1.9.0
...@@ -30,11 +39,17 @@ python3.7 -m pip install onnxruntime==1.9.0 ...@@ -30,11 +39,17 @@ python3.7 -m pip install onnxruntime==1.9.0
有两种方式获取Paddle静态图模型:在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型; 有两种方式获取Paddle静态图模型:在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型;
参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。 参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。
以 ppocr 检测模型为例: 以 ppocr 中文检测、识别、分类模型为例:
``` ```
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd .. cd ./inference && tar xf ch_PP-OCRv2_det_infer.tar && cd ..
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
cd ./inference && tar xf ch_PP-OCRv2_rec_infer.tar && cd ..
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
cd ./inference && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar && cd ..
``` ```
- 模型转换 - 模型转换
...@@ -42,35 +57,160 @@ cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd .. ...@@ -42,35 +57,160 @@ cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd ..
使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式: 使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式:
``` ```
paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \ paddle2onnx --model_dir ./inference/ch_PP-OCRv2_det_infer \
--model_filename=inference.pdmodel \ --model_filename inference.pdmodel \
--params_filename=inference.pdiparams \ --params_filename inference.pdiparams \
--save_file=./inference/det_mobile_onnx/model.onnx \ --save_file ./inference/det_onnx/model.onnx \
--opset_version=10 \ --opset_version 10 \
--input_shape_dict="{'x': [-1, 3, -1, -1]}" \ --input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker=True --enable_onnx_checker True
paddle2onnx --model_dir ./inference/ch_PP-OCRv2_rec_infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ./inference/rec_onnx/model.onnx \
--opset_version 10 \
--input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker True
paddle2onnx --model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer \
--model_filename ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel \
--params_filename ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams \
--save_file ./inferencecls_onnx/model.onnx \
--opset_version 10 \
--input_shape_dict="{'x':[-1,3,-1,-1]}" \
--enable_onnx_checker True
``` ```
执行完毕后,ONNX 模型会被保存在 `./inference/det_mobile_onnx/` 路径下 执行完毕后,ONNX 模型会被分别保存在 `./inference/det_onnx/``./inference/rec_onnx/``./inference/cls_onnx/`路径下
* 注意:对于OCR模型,转化过程中必须采用动态shape的形式,即加入选项--input_shape_dict="{'x': [-1, 3, -1, -1]}",否则预测结果可能与直接使用Paddle预测有细微不同。 * 注意:对于OCR模型,转化过程中必须采用动态shape的形式,即加入选项--input_shape_dict="{'x': [-1, 3, -1, -1]}",否则预测结果可能与直接使用Paddle预测有细微不同。
另外,以下几个模型暂不支持转换为 ONNX 模型: 另外,以下几个模型暂不支持转换为 ONNX 模型:
NRTR、SAR、RARE、SRN NRTR、SAR、RARE、SRN
## 3. onnx 预测 ## 3. 推理预测
以中文OCR模型为例,使用 ONNXRuntime 预测可执行如下命令:
```
python3.7 tools/infer/predict_system.py --use_gpu=False --use_onnx=True \
--det_model_dir=./inference/det_onnx/model.onnx \
--rec_model_dir=./inference/rec_onnx/model.onnx \
--cls_model_dir=./inference/cls_onnx/model.onnx \
--image_dir=./deploy/lite/imgs/lite_demo.png
```
以中文OCR模型为例,使用 Paddle Inference 预测可执行如下命令:
```
python3.7 tools/infer/predict_system.py --use_gpu=False \
--cls_model_dir=./inference/ch_ppocr_mobile_v2.0_cls_infer \
--rec_model_dir=./inference/ch_PP-OCRv2_rec_infer \
--det_model_dir=./inference/ch_PP-OCRv2_det_infer \
--image_dir=./deploy/lite/imgs/lite_demo.png
```
执行命令后在终端会打印出预测的识别信息,并在 `./inference_results/` 下保存可视化结果。
ONNXRuntime 执行效果:
<div align="center">
<img src="./images/lite_demo_onnx.png" width=800">
</div>
Paddle Inference 执行效果:
<div align="center">
<img src="./images/lite_demo_paddle.png" width=800">
</div>
以检测模型为例,使用 ONNX 预测可执行如下命令:
使用 ONNXRuntime 预测,终端输出:
``` ```
python3.7 ../../tools/infer/predict_det.py --use_gpu=False --use_onnx=True \ [2022/02/22 17:48:27] root DEBUG: dt_boxes num : 38, elapse : 0.043187856674194336
--det_model_dir=./inference/det_mobile_onnx/model.onnx \ [2022/02/22 17:48:27] root DEBUG: rec_res num : 38, elapse : 0.592170000076294
--image_dir=../../doc/imgs/1.jpg [2022/02/22 17:48:27] root DEBUG: 0 Predict time of ./deploy/lite/imgs/lite_demo.png: 0.642s
[2022/02/22 17:48:27] root DEBUG: The, 0.984
[2022/02/22 17:48:27] root DEBUG: visualized, 0.882
[2022/02/22 17:48:27] root DEBUG: etect18片, 0.720
[2022/02/22 17:48:27] root DEBUG: image saved in./vis.jpg, 0.947
[2022/02/22 17:48:27] root DEBUG: 纯臻营养护发素0.993604, 0.996
[2022/02/22 17:48:27] root DEBUG: 产品信息/参数, 0.922
[2022/02/22 17:48:27] root DEBUG: 0.992728, 0.914
[2022/02/22 17:48:27] root DEBUG: (45元/每公斤,100公斤起订), 0.926
[2022/02/22 17:48:27] root DEBUG: 0.97417, 0.977
[2022/02/22 17:48:27] root DEBUG: 每瓶22元,1000瓶起订)0.993976, 0.962
[2022/02/22 17:48:27] root DEBUG: 【品牌】:代加工方式/0EMODM, 0.945
[2022/02/22 17:48:27] root DEBUG: 0.985133, 0.980
[2022/02/22 17:48:27] root DEBUG: 【品名】:纯臻营养护发素, 0.921
[2022/02/22 17:48:27] root DEBUG: 0.995007, 0.883
[2022/02/22 17:48:27] root DEBUG: 【产品编号】:YM-X-30110.96899, 0.955
[2022/02/22 17:48:27] root DEBUG: 【净含量】:220ml, 0.943
[2022/02/22 17:48:27] root DEBUG: Q.996577, 0.932
[2022/02/22 17:48:27] root DEBUG: 【适用人群】:适合所有肤质, 0.913
[2022/02/22 17:48:27] root DEBUG: 0.995842, 0.969
[2022/02/22 17:48:27] root DEBUG: 【主要成分】:鲸蜡硬脂醇、燕麦B-葡聚, 0.883
[2022/02/22 17:48:27] root DEBUG: 0.961928, 0.964
[2022/02/22 17:48:27] root DEBUG: 10, 0.812
[2022/02/22 17:48:27] root DEBUG: 糖、椰油酰胺丙基甜菜碱、泛醒, 0.866
[2022/02/22 17:48:27] root DEBUG: 0.925898, 0.943
[2022/02/22 17:48:27] root DEBUG: (成品包材), 0.974
[2022/02/22 17:48:27] root DEBUG: 0.972573, 0.961
[2022/02/22 17:48:27] root DEBUG: 【主要功能】:可紧致头发磷层,从而达到, 0.936
[2022/02/22 17:48:27] root DEBUG: 0.994448, 0.952
[2022/02/22 17:48:27] root DEBUG: 13, 0.998
[2022/02/22 17:48:27] root DEBUG: 即时持久改善头发光泽的效果,给干燥的头, 0.994
[2022/02/22 17:48:27] root DEBUG: 0.990198, 0.975
[2022/02/22 17:48:27] root DEBUG: 14, 0.977
[2022/02/22 17:48:27] root DEBUG: 发足够的滋养, 0.991
[2022/02/22 17:48:27] root DEBUG: 0.997668, 0.918
[2022/02/22 17:48:27] root DEBUG: 花费了0.457335秒, 0.901
[2022/02/22 17:48:27] root DEBUG: The visualized image saved in ./inference_results/lite_demo.png
[2022/02/22 17:48:27] root INFO: The predict total time is 0.7003889083862305
``` ```
执行命令后在终端会打印出预测的检测框坐标,并在 `./inference_results/` 下保存可视化结果。 使用 Paddle Inference 预测,终端输出:
``` ```
root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296], [379, 294], [387, 669], [353, 671]]] [2022/02/22 17:47:25] root DEBUG: dt_boxes num : 38, elapse : 0.11791276931762695
The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289 [2022/02/22 17:47:27] root DEBUG: rec_res num : 38, elapse : 2.6206860542297363
The visualized image saved in ./inference_results/det_res_1.jpg [2022/02/22 17:47:27] root DEBUG: 0 Predict time of ./deploy/lite/imgs/lite_demo.png: 2.746s
[2022/02/22 17:47:27] root DEBUG: The, 0.984
[2022/02/22 17:47:27] root DEBUG: visualized, 0.882
[2022/02/22 17:47:27] root DEBUG: etect18片, 0.720
[2022/02/22 17:47:27] root DEBUG: image saved in./vis.jpg, 0.947
[2022/02/22 17:47:27] root DEBUG: 纯臻营养护发素0.993604, 0.996
[2022/02/22 17:47:27] root DEBUG: 产品信息/参数, 0.922
[2022/02/22 17:47:27] root DEBUG: 0.992728, 0.914
[2022/02/22 17:47:27] root DEBUG: (45元/每公斤,100公斤起订), 0.926
[2022/02/22 17:47:27] root DEBUG: 0.97417, 0.977
[2022/02/22 17:47:27] root DEBUG: 每瓶22元,1000瓶起订)0.993976, 0.962
[2022/02/22 17:47:27] root DEBUG: 【品牌】:代加工方式/0EMODM, 0.945
[2022/02/22 17:47:27] root DEBUG: 0.985133, 0.980
[2022/02/22 17:47:27] root DEBUG: 【品名】:纯臻营养护发素, 0.921
[2022/02/22 17:47:27] root DEBUG: 0.995007, 0.883
[2022/02/22 17:47:27] root DEBUG: 【产品编号】:YM-X-30110.96899, 0.955
[2022/02/22 17:47:27] root DEBUG: 【净含量】:220ml, 0.943
[2022/02/22 17:47:27] root DEBUG: Q.996577, 0.932
[2022/02/22 17:47:27] root DEBUG: 【适用人群】:适合所有肤质, 0.913
[2022/02/22 17:47:27] root DEBUG: 0.995842, 0.969
[2022/02/22 17:47:27] root DEBUG: 【主要成分】:鲸蜡硬脂醇、燕麦B-葡聚, 0.883
[2022/02/22 17:47:27] root DEBUG: 0.961928, 0.964
[2022/02/22 17:47:27] root DEBUG: 10, 0.812
[2022/02/22 17:47:27] root DEBUG: 糖、椰油酰胺丙基甜菜碱、泛醒, 0.866
[2022/02/22 17:47:27] root DEBUG: 0.925898, 0.943
[2022/02/22 17:47:27] root DEBUG: (成品包材), 0.974
[2022/02/22 17:47:27] root DEBUG: 0.972573, 0.961
[2022/02/22 17:47:27] root DEBUG: 【主要功能】:可紧致头发磷层,从而达到, 0.936
[2022/02/22 17:47:27] root DEBUG: 0.994448, 0.952
[2022/02/22 17:47:27] root DEBUG: 13, 0.998
[2022/02/22 17:47:27] root DEBUG: 即时持久改善头发光泽的效果,给干燥的头, 0.994
[2022/02/22 17:47:27] root DEBUG: 0.990198, 0.975
[2022/02/22 17:47:27] root DEBUG: 14, 0.977
[2022/02/22 17:47:27] root DEBUG: 发足够的滋养, 0.991
[2022/02/22 17:47:27] root DEBUG: 0.997668, 0.918
[2022/02/22 17:47:27] root DEBUG: 花费了0.457335秒, 0.901
[2022/02/22 17:47:27] root DEBUG: The visualized image saved in ./inference_results/lite_demo.png
[2022/02/22 17:47:27] root INFO: The predict total time is 2.8338775634765625
``` ```
...@@ -34,35 +34,26 @@ The introduction and tutorial of Paddle Serving service deployment framework ref ...@@ -34,35 +34,26 @@ The introduction and tutorial of Paddle Serving service deployment framework ref
PaddleOCR operating environment and Paddle Serving operating environment are needed. PaddleOCR operating environment and Paddle Serving operating environment are needed.
1. Please prepare PaddleOCR operating environment reference [link](../../doc/doc_ch/installation.md). 1. Please prepare PaddleOCR operating environment reference [link](../../doc/doc_ch/installation.md).
Download the corresponding paddle whl package according to the environment, it is recommended to install version 2.0.1. Download the corresponding paddlepaddle whl package according to the environment, it is recommended to install version 2.2.2.
2. The steps of PaddleServing operating environment prepare are as follows: 2. The steps of PaddleServing operating environment prepare are as follows:
Install serving which used to start the service
```
pip3 install paddle-serving-server==0.6.1 # for CPU
pip3 install paddle-serving-server-gpu==0.6.1 # for GPU
# Other GPU environments need to confirm the environment and then choose to execute the following commands
pip3 install paddle-serving-server-gpu==0.6.1.post101 # GPU with CUDA10.1 + TensorRT6
pip3 install paddle-serving-server-gpu==0.6.1.post11 # GPU with CUDA11 + TensorRT7
```
3. Install the client to send requests to the service
```bash ```bash
# 安装serving,用于启动服务 # Install serving which used to start the service
wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl
pip3 install paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl pip3 install paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl
# 如果是cuda10.1环境,可以使用下面的命令安装paddle-serving-server
# Install paddle-serving-server for cuda10.1
# wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl # wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl
# pip3 install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl # pip3 install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl
# 安装client,用于向服务发送请求 # Install serving which used to start the service
wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl
pip3 install paddle_serving_client-0.7.0-cp37-none-any.whl pip3 install paddle_serving_client-0.7.0-cp37-none-any.whl
# 安装serving-app # Install serving-app
wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl
pip3 install paddle_serving_app-0.7.0-py3-none-any.whl pip3 install paddle_serving_app-0.7.0-py3-none-any.whl
``` ```
...@@ -87,27 +78,27 @@ Then, you can use installed paddle_serving_client tool to convert inference mode ...@@ -87,27 +78,27 @@ Then, you can use installed paddle_serving_client tool to convert inference mode
python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_det_infer/ \ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_det_infer/ \
--model_filename inference.pdmodel \ --model_filename inference.pdmodel \
--params_filename inference.pdiparams \ --params_filename inference.pdiparams \
--serving_server ./ppocrv2_det_serving/ \ --serving_server ./ppocr_det_mobile_2.0_serving/ \
--serving_client ./ppocrv2_det_client/ --serving_client ./ppocr_det_mobile_2.0_client/
# Recognition model conversion # Recognition model conversion
python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \
--model_filename inference.pdmodel \ --model_filename inference.pdmodel \
--params_filename inference.pdiparams \ --params_filename inference.pdiparams \
--serving_server ./ppocrv2_rec_serving/ \ --serving_server ./ppocr_rec_mobile_2.0_serving/ \
--serving_client ./ppocrv2_rec_client/ --serving_client ./ppocr_rec_mobile_2.0_client/
``` ```
After the detection model is converted, there will be additional folders of `ppocr_det_mobile_2.0_serving` and `ppocr_det_mobile_2.0_client` in the current folder, with the following format: After the detection model is converted, there will be additional folders of `ppocr_det_mobile_2.0_serving` and `ppocr_det_mobile_2.0_client` in the current folder, with the following format:
``` ```
|- ppocrv2_det_serving/ |- ppocr_det_mobile_2.0_serving/
|- __model__ |- __model__
|- __params__ |- __params__
|- serving_server_conf.prototxt |- serving_server_conf.prototxt
|- serving_server_conf.stream.prototxt |- serving_server_conf.stream.prototxt
|- ppocrv2_det_client |- ppocr_det_mobile_2.0_client
|- serving_client_conf.prototxt |- serving_client_conf.prototxt
|- serving_client_conf.stream.prototxt |- serving_client_conf.stream.prototxt
......
...@@ -31,7 +31,7 @@ PaddleOCR提供2种服务部署方式: ...@@ -31,7 +31,7 @@ PaddleOCR提供2种服务部署方式:
需要准备PaddleOCR的运行环境和Paddle Serving的运行环境。 需要准备PaddleOCR的运行环境和Paddle Serving的运行环境。
- 准备PaddleOCR的运行环境[链接](../../doc/doc_ch/installation.md) - 准备PaddleOCR的运行环境[链接](../../doc/doc_ch/installation.md)
根据环境下载对应的paddle whl包,推荐安装2.0.1版本 根据环境下载对应的paddlepaddle whl包,推荐安装2.2.2版本
- 准备PaddleServing的运行环境,步骤如下 - 准备PaddleServing的运行环境,步骤如下
...@@ -75,26 +75,26 @@ wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ...@@ -75,26 +75,26 @@ wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_det_infer/ \ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_det_infer/ \
--model_filename inference.pdmodel \ --model_filename inference.pdmodel \
--params_filename inference.pdiparams \ --params_filename inference.pdiparams \
--serving_server ./ppocrv2_det_serving/ \ --serving_server ./ppocr_det_mobile_2.0_serving/ \
--serving_client ./ppocrv2_det_client/ --serving_client ./ppocr_det_mobile_2.0_client/
# 转换识别模型 # 转换识别模型
python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \
--model_filename inference.pdmodel \ --model_filename inference.pdmodel \
--params_filename inference.pdiparams \ --params_filename inference.pdiparams \
--serving_server ./ppocrv2_rec_serving/ \ --serving_server ./ppocr_rec_mobile_2.0_serving/ \
--serving_client ./ppocrv2_rec_client/ --serving_client ./ppocr_rec_mobile_2.0_client/
``` ```
检测模型转换完成后,会在当前文件夹多出`ppocrv2_det_serving``ppocrv2_det_client`的文件夹,具备如下格式: 检测模型转换完成后,会在当前文件夹多出`ppocr_det_mobile_2.0_serving``ppocr_det_mobile_2.0_client`的文件夹,具备如下格式:
``` ```
|- ppocrv2_det_serving/ |- ppocr_det_mobile_2.0_serving/
|- __model__ |- __model__
|- __params__ |- __params__
|- serving_server_conf.prototxt |- serving_server_conf.prototxt
|- serving_server_conf.stream.prototxt |- serving_server_conf.stream.prototxt
|- ppocrv2_det_client |- ppocr_det_mobile_2.0_client
|- serving_client_conf.prototxt |- serving_client_conf.prototxt
|- serving_client_conf.stream.prototxt |- serving_client_conf.stream.prototxt
......
...@@ -34,7 +34,7 @@ op: ...@@ -34,7 +34,7 @@ op:
client_type: local_predictor client_type: local_predictor
#det模型路径 #det模型路径
model_config: ./ppocrv2_det_serving model_config: ./ppocr_det_mobile_2.0_serving
#Fetch结果列表,以client_config中fetch_var的alias_name为准 #Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["save_infer_model/scale_0.tmp_1"] fetch_list: ["save_infer_model/scale_0.tmp_1"]
...@@ -60,7 +60,7 @@ op: ...@@ -60,7 +60,7 @@ op:
client_type: local_predictor client_type: local_predictor
#rec模型路径 #rec模型路径
model_config: ./ppocrv2_rec_serving model_config: ./ppocr_rec_mobile_2.0_serving
#Fetch结果列表,以client_config中fetch_var的alias_name为准 #Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["save_infer_model/scale_0.tmp_1"] fetch_list: ["save_infer_model/scale_0.tmp_1"]
......
...@@ -433,3 +433,54 @@ class OCRReader(object): ...@@ -433,3 +433,54 @@ class OCRReader(object):
text = self.label_ops.decode( text = self.label_ops.decode(
preds_idx, preds_prob, is_remove_duplicate=True) preds_idx, preds_prob, is_remove_duplicate=True)
return text return text
from argparse import ArgumentParser,RawDescriptionHelpFormatter
import yaml
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
formatter_class=RawDescriptionHelpFormatter)
self.add_argument("-c", "--config", help="configuration file to use")
self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options")
def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv)
assert args.config is not None, \
"Please specify --config=configure_file_path."
args.conf_dict = self._parse_opt(args.opt, args.config)
print("args config:", args.conf_dict)
return args
def _parse_helper(self, v):
if v.isnumeric():
if "." in v:
v = float(v)
else:
v = int(v)
elif v == "True" or v == "False":
v = (v == "True")
return v
def _parse_opt(self, opts, conf_path):
f = open(conf_path)
config = yaml.load(f, Loader=yaml.Loader)
if not opts:
return config
for s in opts:
s = s.strip()
k, v = s.split('=')
v = self._parse_helper(v)
print(k,v, type(v))
cur = config
parent = cur
for kk in k.split("."):
if kk not in cur:
cur[kk] = {}
parent = cur
cur = cur[kk]
else:
parent = cur
cur = cur[kk]
parent[k.split(".")[-1]] = v
return config
\ No newline at end of file
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import cv2 import cv2
import base64 import base64
# from paddle_serving_app.reader import OCRReader # from paddle_serving_app.reader import OCRReader
from ocr_reader import OCRReader, DetResizeForTest from ocr_reader import OCRReader, DetResizeForTest, ArgsParser
from paddle_serving_app.reader import Sequential, ResizeByFactor from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
...@@ -73,5 +73,6 @@ class OcrService(WebService): ...@@ -73,5 +73,6 @@ class OcrService(WebService):
uci_service = OcrService(name="ocr") uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("config.yml") FLAGS = ArgsParser().parse_args()
uci_service.prepare_pipeline_config(yml_dict=FLAGS.conf_dict)
uci_service.run_service() uci_service.run_service()
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import cv2 import cv2
import base64 import base64
# from paddle_serving_app.reader import OCRReader # from paddle_serving_app.reader import OCRReader
from ocr_reader import OCRReader, DetResizeForTest from ocr_reader import OCRReader, DetResizeForTest, ArgsParser
from paddle_serving_app.reader import Sequential, ResizeByFactor from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
...@@ -82,5 +82,6 @@ class OcrService(WebService): ...@@ -82,5 +82,6 @@ class OcrService(WebService):
uci_service = OcrService(name="ocr") uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("config.yml") FLAGS = ArgsParser().parse_args()
uci_service.prepare_pipeline_config(yml_dict=FLAGS.conf_dict)
uci_service.run_service() uci_service.run_service()
...@@ -349,7 +349,7 @@ A:PaddleOCR已完成Windows和Mac系统适配,运行时注意两点: ...@@ -349,7 +349,7 @@ A:PaddleOCR已完成Windows和Mac系统适配,运行时注意两点:
#### Q:训练文字识别模型,真实数据有30w,合成数据有500w,需要做样本均衡吗? #### Q:训练文字识别模型,真实数据有30w,合成数据有500w,需要做样本均衡吗?
A:需要,一般需要保证一个batch中真实数据样本和合成数据样本的比例是1:1~1:3左右效果比较理想。如果合成数据过大,会过拟合到合成数据,预测效果往往不佳。还有一种启发性的尝试是可以先用大量合成数据训练一个base模型,然后再用真实数据微调,在一些简单场景效果也是会有提升的。 A:需要,一般需要保证一个batch中真实数据样本和合成数据样本的比例是5:1~10:1左右效果比较理想。如果合成数据过大,会过拟合到合成数据,预测效果往往不佳。还有一种启发性的尝试是可以先用大量合成数据训练一个base模型,然后再用真实数据微调,在一些简单场景效果也是会有提升的。
#### Q: 当训练数据量少时,如何获取更多的数据? #### Q: 当训练数据量少时,如何获取更多的数据?
......
# 两阶段算法 # 两阶段算法
- [两阶段算法](#-----) - [两阶段算法](#两阶段算法)
* [1. 算法介绍](#1) - [1. 算法介绍](#1-算法介绍)
+ [1.1 文本检测算法](#11) - [1.1 文本检测算法](#11-文本检测算法)
+ [1.2 文本识别算法](#12) - [1.2 文本识别算法](#12-文本识别算法)
* [2. 模型训练](#2) - [2. 模型训练](#2-模型训练)
* [3. 模型推理](#3) - [3. 模型推理](#3-模型推理)
<a name="1"></a> <a name="1"></a>
...@@ -21,6 +21,7 @@ PaddleOCR开源的文本检测算法列表: ...@@ -21,6 +21,7 @@ PaddleOCR开源的文本检测算法列表:
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1] - [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1]
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] - [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
- [x] PSENet([paper](https://arxiv.org/abs/1903.12473v2) - [x] PSENet([paper](https://arxiv.org/abs/1903.12473v2)
- [x] FCENet([paper](https://arxiv.org/abs/2104.10442))
在ICDAR2015文本检测公开数据集上,算法效果如下: 在ICDAR2015文本检测公开数据集上,算法效果如下:
|模型|骨干网络|precision|recall|Hmean|下载链接| |模型|骨干网络|precision|recall|Hmean|下载链接|
...@@ -39,6 +40,12 @@ PaddleOCR开源的文本检测算法列表: ...@@ -39,6 +40,12 @@ PaddleOCR开源的文本检测算法列表:
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
|SAST|ResNet50_vd|89.63%|78.44%|83.66%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)| |SAST|ResNet50_vd|89.63%|78.44%|83.66%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
在CTW1500文本检测公开数据集上,算法效果如下:
|模型|骨干网络|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- |
|FCE|ResNet50_dcn|88.39%|82.18%|85.27%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar)|
**说明:** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载: **说明:** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载:
* [百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi) * [百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi)
* [Google Drive下载地址](https://drive.google.com/drive/folders/1ll2-XEVyCQLpJjawLDiRlvo_i4BqHCJe?usp=sharing) * [Google Drive下载地址](https://drive.google.com/drive/folders/1ll2-XEVyCQLpJjawLDiRlvo_i4BqHCJe?usp=sharing)
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
* [2.1 启动训练](#21-----) * [2.1 启动训练](#21-----)
* [2.2 断点训练](#22-----) * [2.2 断点训练](#22-----)
* [2.3 更换Backbone 训练](#23---backbone---) * [2.3 更换Backbone 训练](#23---backbone---)
* [2.4 知识蒸馏训练](#24---distill---)
- [3. 模型评估与预测](#3--------) - [3. 模型评估与预测](#3--------)
* [3.1 指标评估](#31-----) * [3.1 指标评估](#31-----)
* [3.2 测试检测效果](#32-------) * [3.2 测试检测效果](#32-------)
...@@ -182,6 +183,15 @@ args1: args1 ...@@ -182,6 +183,15 @@ args1: args1
**注意**:如果要更换网络的其他模块,可以参考[文档](./add_new_algorithm.md)。 **注意**:如果要更换网络的其他模块,可以参考[文档](./add_new_algorithm.md)。
<a name="24---distill---"></a>
## 2.4 知识蒸馏训练
PaddleOCR支持了基于知识蒸馏的检测模型训练过程,更多内容可以参考[知识蒸馏说明文档](./knowledge_distillation.md)。
<a name="3--------"></a> <a name="3--------"></a>
# 3. 模型评估与预测 # 3. 模型评估与预测
......
...@@ -6,13 +6,14 @@ ...@@ -6,13 +6,14 @@
> 3. 本文档提供的是PPOCR自研模型列表,更多基于公开数据集的算法介绍与预训练模型可以参考:[算法概览文档](./algorithm_overview.md)。 > 3. 本文档提供的是PPOCR自研模型列表,更多基于公开数据集的算法介绍与预训练模型可以参考:[算法概览文档](./algorithm_overview.md)。
- [1. 文本检测模型](#文本检测模型) - [PP-OCR系列模型列表(V2.1,2021年9月6日更新)](#pp-ocr系列模型列表v212021年9月6日更新)
- [2. 文本识别模型](#文本识别模型) - [1. 文本检测模型](#1-文本检测模型)
- [2.1 中文识别模型](#中文识别模型) - [2. 文本识别模型](#2-文本识别模型)
- [2.2 英文识别模型](#英文识别模型) - [2.1 中文识别模型](#21-中文识别模型)
- [2.3 多语言识别模型](#多语言识别模型) - [2.2 英文识别模型](#22-英文识别模型)
- [3. 文本方向分类模型](#文本方向分类模型) - [2.3 多语言识别模型(更多语言持续更新中...)](#23-多语言识别模型更多语言持续更新中)
- [4. Paddle-Lite 模型](#Paddle-Lite模型) - [3. 文本方向分类模型](#3-文本方向分类模型)
- [4. Paddle-Lite 模型](#4-paddle-lite-模型)
PaddleOCR提供的可下载模型包括`推理模型``训练模型``预训练模型``slim模型`,模型区别说明如下: PaddleOCR提供的可下载模型包括`推理模型``训练模型``预训练模型``slim模型`,模型区别说明如下:
...@@ -100,6 +101,8 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 ...@@ -100,6 +101,8 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本| |模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
|PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
|PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer_opt.nb)|v2.9| |PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer_opt.nb)|v2.9|
|PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_opt.nb)|v2.9| |PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_opt.nb)|v2.9|
|V2.0|ppocr_v2.0超轻量中文OCR移动端模型|7.8M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| |V2.0|ppocr_v2.0超轻量中文OCR移动端模型|7.8M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|
......
# PaddleOCR快速开始 # PaddleOCR快速开始
- [1. 安装PaddleOCR whl包](#1)
- [PaddleOCR快速开始](#paddleocr) - [2. 便捷使用](#2)
- [2.1 命令行使用](#21)
+ [1. 安装PaddleOCR whl包](#1)
* [2. 便捷使用](#2)
+ [2.1 命令行使用](#21)
- [2.1.1 中英文模型](#211) - [2.1.1 中英文模型](#211)
- [2.1.2 多语言模型](#212) - [2.1.2 多语言模型](#212)
- [2.1.3 版面分析](#213) - [2.1.3 版面分析](#213)
+ [2.2 Python脚本使用](#22) - [2.2 Python脚本使用](#22)
- [2.2.1 中英文与多语言使用](#221) - [2.2.1 中英文与多语言使用](#221)
- [2.2.2 版面分析](#222) - [2.2.2 版面分析](#222)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
- [2.1 数据增强](#数据增强) - [2.1 数据增强](#数据增强)
- [2.2 通用模型训练](#通用模型训练) - [2.2 通用模型训练](#通用模型训练)
- [2.3 多语言模型训练](#多语言模型训练) - [2.3 多语言模型训练](#多语言模型训练)
- [2.4 知识蒸馏训练](#知识蒸馏训练)
- [3 评估](#评估) - [3 评估](#评估)
- [4 预测](#预测) - [4 预测](#预测)
- [5 转Inference模型测试](#Inference) - [5 转Inference模型测试](#Inference)
...@@ -368,6 +369,13 @@ Eval: ...@@ -368,6 +369,13 @@ Eval:
label_file_list: ["./train_data/french_val.txt"] label_file_list: ["./train_data/french_val.txt"]
... ...
``` ```
<a name="知识蒸馏训练"></a>
### 2.4 知识蒸馏训练
PaddleOCR支持了基于知识蒸馏的文本识别模型训练过程,更多内容可以参考[知识蒸馏说明文档](./knowledge_distillation.md)
<a name="评估"></a> <a name="评估"></a>
## 3 评估 ## 3 评估
......
...@@ -9,6 +9,7 @@ This section uses the icdar2015 dataset as an example to introduce the training, ...@@ -9,6 +9,7 @@ This section uses the icdar2015 dataset as an example to introduce the training,
* [2.1 Start Training](#21-start-training) * [2.1 Start Training](#21-start-training)
* [2.2 Load Trained Model and Continue Training](#22-load-trained-model-and-continue-training) * [2.2 Load Trained Model and Continue Training](#22-load-trained-model-and-continue-training)
* [2.3 Training with New Backbone](#23-training-with-new-backbone) * [2.3 Training with New Backbone](#23-training-with-new-backbone)
* [2.4 Training with knowledge distillation](#24)
- [3. Evaluation and Test](#3-evaluation-and-test) - [3. Evaluation and Test](#3-evaluation-and-test)
* [3.1 Evaluation](#31-evaluation) * [3.1 Evaluation](#31-evaluation)
* [3.2 Test](#32-test) * [3.2 Test](#32-test)
...@@ -174,6 +175,11 @@ After adding the four-part modules of the network, you only need to configure th ...@@ -174,6 +175,11 @@ After adding the four-part modules of the network, you only need to configure th
**NOTE**: More details about replace Backbone and other mudule can be found in [doc](add_new_algorithm_en.md). **NOTE**: More details about replace Backbone and other mudule can be found in [doc](add_new_algorithm_en.md).
### 2.4 Training with knowledge distillation
Knowledge distillation is supported in PaddleOCR for text detection training process. For more details, please refer to [doc](./knowledge_distillation_en.md).
## 3. Evaluation and Test ## 3. Evaluation and Test
### 3.1 Evaluation ### 3.1 Evaluation
......
...@@ -94,6 +94,8 @@ For more supported languages, please refer to : [Multi-language model](./multi_l ...@@ -94,6 +94,8 @@ For more supported languages, please refer to : [Multi-language model](./multi_l
## 4. Paddle-Lite Model ## 4. Paddle-Lite Model
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch| |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
|PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
|PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer_opt.nb)|v2.9| |PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer_opt.nb)|v2.9|
|PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_opt.nb)|v2.9| |PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_opt.nb)|v2.9|
|V2.0|ppocr_v2.0 extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| |V2.0|ppocr_v2.0 extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|
......
# PaddleOCR Quick Start # PaddleOCR Quick Start
[PaddleOCR Quick Start](#paddleocr-quick-start)
+ [1. Install PaddleOCR Whl Package](#1-install-paddleocr-whl-package) + [1. Install PaddleOCR Whl Package](#1-install-paddleocr-whl-package)
* [2. Easy-to-Use](#2-easy-to-use) * [2. Easy-to-Use](#2-easy-to-use)
+ [2.1 Use by Command Line](#21-use-by-command-line) + [2.1 Use by Command Line](#21-use-by-command-line)
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
- [2.1 Data Augmentation](#Data_Augmentation) - [2.1 Data Augmentation](#Data_Augmentation)
- [2.2 General Training](#Training) - [2.2 General Training](#Training)
- [2.3 Multi-language Training](#Multi_language) - [2.3 Multi-language Training](#Multi_language)
- [2.4 Training with Knowledge Distillation](#kd)
- [3. Evaluation](#EVALUATION) - [3. Evaluation](#EVALUATION)
...@@ -361,6 +362,12 @@ Eval: ...@@ -361,6 +362,12 @@ Eval:
... ...
``` ```
<a name="kd"></a>
### 2.4 Training with Knowledge Distillation
Knowledge distillation is supported in PaddleOCR for text recognition training process. For more details, please refer to [doc](./knowledge_distillation_en.md).
<a name="EVALUATION"></a> <a name="EVALUATION"></a>
## 3. Evalution ## 3. Evalution
......
...@@ -22,7 +22,8 @@ from .make_shrink_map import MakeShrinkMap ...@@ -22,7 +22,8 @@ from .make_shrink_map import MakeShrinkMap
from .random_crop_data import EastRandomCropData, RandomCropImgMask from .random_crop_data import EastRandomCropData, RandomCropImgMask
from .make_pse_gt import MakePseGt from .make_pse_gt import MakePseGt
from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, \
SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg
from .randaugment import RandAugment from .randaugment import RandAugment
from .copy_paste import CopyPaste from .copy_paste import CopyPaste
from .ColorJitter import ColorJitter from .ColorJitter import ColorJitter
...@@ -36,6 +37,9 @@ from .gen_table_mask import * ...@@ -36,6 +37,9 @@ from .gen_table_mask import *
from .vqa import * from .vqa import *
from .fce_aug import *
from .fce_targets import FCENetTargets
def transform(data, ops=None): def transform(data, ops=None):
""" transform """ """ transform """
......
此差异已折叠。
此差异已折叠。
...@@ -785,6 +785,53 @@ class SARLabelEncode(BaseRecLabelEncode): ...@@ -785,6 +785,53 @@ class SARLabelEncode(BaseRecLabelEncode):
return [self.padding_idx] return [self.padding_idx]
class PRENLabelEncode(BaseRecLabelEncode):
def __init__(self,
max_text_length,
character_dict_path,
use_space_char=False,
**kwargs):
super(PRENLabelEncode, self).__init__(
max_text_length, character_dict_path, use_space_char)
def add_special_char(self, dict_character):
padding_str = '<PAD>' # 0
end_str = '<EOS>' # 1
unknown_str = '<UNK>' # 2
dict_character = [padding_str, end_str, unknown_str] + dict_character
self.padding_idx = 0
self.end_idx = 1
self.unknown_idx = 2
return dict_character
def encode(self, text):
if len(text) == 0 or len(text) >= self.max_text_len:
return None
if self.lower:
text = text.lower()
text_list = []
for char in text:
if char not in self.dict:
text_list.append(self.unknown_idx)
else:
text_list.append(self.dict[char])
text_list.append(self.end_idx)
if len(text_list) < self.max_text_len:
text_list += [self.padding_idx] * (
self.max_text_len - len(text_list))
return text_list
def __call__(self, data):
text = data['label']
encoded_text = self.encode(text)
if encoded_text is None:
return None
data['label'] = np.array(encoded_text)
return data
class VQATokenLabelEncode(object): class VQATokenLabelEncode(object):
""" """
Label encode for NLP VQA methods Label encode for NLP VQA methods
......
...@@ -23,14 +23,20 @@ import sys ...@@ -23,14 +23,20 @@ import sys
import six import six
import cv2 import cv2
import numpy as np import numpy as np
import math
class DecodeImage(object): class DecodeImage(object):
""" decode image """ """ decode image """
def __init__(self, img_mode='RGB', channel_first=False, **kwargs): def __init__(self,
img_mode='RGB',
channel_first=False,
ignore_orientation=False,
**kwargs):
self.img_mode = img_mode self.img_mode = img_mode
self.channel_first = channel_first self.channel_first = channel_first
self.ignore_orientation = ignore_orientation
def __call__(self, data): def __call__(self, data):
img = data['image'] img = data['image']
...@@ -41,6 +47,10 @@ class DecodeImage(object): ...@@ -41,6 +47,10 @@ class DecodeImage(object):
assert type(img) is bytes and len( assert type(img) is bytes and len(
img) > 0, "invalid input 'img' in DecodeImage" img) > 0, "invalid input 'img' in DecodeImage"
img = np.frombuffer(img, dtype='uint8') img = np.frombuffer(img, dtype='uint8')
if self.ignore_orientation:
img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION |
cv2.IMREAD_COLOR)
else:
img = cv2.imdecode(img, 1) img = cv2.imdecode(img, 1)
if img is None: if img is None:
return None return None
...@@ -156,6 +166,44 @@ class KeepKeys(object): ...@@ -156,6 +166,44 @@ class KeepKeys(object):
return data_list return data_list
class Pad(object):
def __init__(self, size=None, size_div=32, **kwargs):
if size is not None and not isinstance(size, (int, list, tuple)):
raise TypeError("Type of target_size is invalid. Now is {}".format(
type(size)))
if isinstance(size, int):
size = [size, size]
self.size = size
self.size_div = size_div
def __call__(self, data):
img = data['image']
img_h, img_w = img.shape[0], img.shape[1]
if self.size:
resize_h2, resize_w2 = self.size
assert (
img_h < resize_h2 and img_w < resize_w2
), '(h, w) of target size should be greater than (img_h, img_w)'
else:
resize_h2 = max(
int(math.ceil(img.shape[0] / self.size_div) * self.size_div),
self.size_div)
resize_w2 = max(
int(math.ceil(img.shape[1] / self.size_div) * self.size_div),
self.size_div)
img = cv2.copyMakeBorder(
img,
0,
resize_h2 - img_h,
0,
resize_w2 - img_w,
cv2.BORDER_CONSTANT,
value=0)
data['image'] = img
return data
class Resize(object): class Resize(object):
def __init__(self, size=(640, 640), **kwargs): def __init__(self, size=(640, 640), **kwargs):
self.size = size self.size = size
......
...@@ -141,6 +141,25 @@ class SARRecResizeImg(object): ...@@ -141,6 +141,25 @@ class SARRecResizeImg(object):
return data return data
class PRENResizeImg(object):
def __init__(self, image_shape, **kwargs):
"""
Accroding to original paper's realization, it's a hard resize method here.
So maybe you should optimize it to fit for your task better.
"""
self.dst_h, self.dst_w = image_shape
def __call__(self, data):
img = data['image']
resized_img = cv2.resize(
img, (self.dst_w, self.dst_h), interpolation=cv2.INTER_LINEAR)
resized_img = resized_img.transpose((2, 0, 1)) / 255
resized_img -= 0.5
resized_img /= 0.5
data['image'] = resized_img.astype(np.float32)
return data
def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
imgC, imgH, imgW_min, imgW_max = image_shape imgC, imgH, imgW_min, imgW_max = image_shape
h = img.shape[0] h = img.shape[0]
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
import os import os
import json
import random import random
import traceback import traceback
from paddle.io import Dataset from paddle.io import Dataset
......
...@@ -24,6 +24,7 @@ from .det_db_loss import DBLoss ...@@ -24,6 +24,7 @@ from .det_db_loss import DBLoss
from .det_east_loss import EASTLoss from .det_east_loss import EASTLoss
from .det_sast_loss import SASTLoss from .det_sast_loss import SASTLoss
from .det_pse_loss import PSELoss from .det_pse_loss import PSELoss
from .det_fce_loss import FCELoss
# rec loss # rec loss
from .rec_ctc_loss import CTCLoss from .rec_ctc_loss import CTCLoss
...@@ -32,6 +33,7 @@ from .rec_srn_loss import SRNLoss ...@@ -32,6 +33,7 @@ from .rec_srn_loss import SRNLoss
from .rec_nrtr_loss import NRTRLoss from .rec_nrtr_loss import NRTRLoss
from .rec_sar_loss import SARLoss from .rec_sar_loss import SARLoss
from .rec_aster_loss import AsterLoss from .rec_aster_loss import AsterLoss
from .rec_pren_loss import PRENLoss
# cls loss # cls loss
from .cls_loss import ClsLoss from .cls_loss import ClsLoss
...@@ -55,10 +57,10 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss ...@@ -55,10 +57,10 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
def build_loss(config): def build_loss(config):
support_dict = [ support_dict = [
'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', 'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss',
'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', 'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss',
'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss', 'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
'VQASerTokenLayoutLMLoss', 'LossFromOutput' 'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss'
] ]
config = copy.deepcopy(config) config = copy.deepcopy(config)
module_name = config.pop('name') module_name = config.pop('name')
......
...@@ -95,9 +95,15 @@ class DMLLoss(nn.Layer): ...@@ -95,9 +95,15 @@ class DMLLoss(nn.Layer):
self.act = None self.act = None
self.use_log = use_log self.use_log = use_log
self.jskl_loss = KLJSLoss(mode="js") self.jskl_loss = KLJSLoss(mode="js")
def _kldiv(self, x, target):
eps = 1.0e-10
loss = target * (paddle.log(target + eps) - x)
# batch mean loss
loss = paddle.sum(loss) / loss.shape[0]
return loss
def forward(self, out1, out2): def forward(self, out1, out2):
if self.act is not None: if self.act is not None:
out1 = self.act(out1) out1 = self.act(out1)
...@@ -106,9 +112,8 @@ class DMLLoss(nn.Layer): ...@@ -106,9 +112,8 @@ class DMLLoss(nn.Layer):
# for recognition distillation, log is needed for feature map # for recognition distillation, log is needed for feature map
log_out1 = paddle.log(out1) log_out1 = paddle.log(out1)
log_out2 = paddle.log(out2) log_out2 = paddle.log(out2)
loss = (F.kl_div( loss = (
log_out1, out2, reduction='batchmean') + F.kl_div( self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0
log_out2, out1, reduction='batchmean')) / 2.0
else: else:
# for detection distillation log is not needed # for detection distillation log is not needed
loss = self.jskl_loss(out1, out2) loss = self.jskl_loss(out1, out2)
......
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/fce_loss.py
"""
import numpy as np
from paddle import nn
import paddle
import paddle.nn.functional as F
from functools import partial
def multi_apply(func, *args, **kwargs):
pfunc = partial(func, **kwargs) if kwargs else func
map_results = map(pfunc, *args)
return tuple(map(list, zip(*map_results)))
class FCELoss(nn.Layer):
"""The class for implementing FCENet loss
FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped
Text Detection
[https://arxiv.org/abs/2104.10442]
Args:
fourier_degree (int) : The maximum Fourier transform degree k.
num_sample (int) : The sampling points number of regression
loss. If it is too small, fcenet tends to be overfitting.
ohem_ratio (float): the negative/positive ratio in OHEM.
"""
def __init__(self, fourier_degree, num_sample, ohem_ratio=3.):
super().__init__()
self.fourier_degree = fourier_degree
self.num_sample = num_sample
self.ohem_ratio = ohem_ratio
def forward(self, preds, labels):
assert isinstance(preds, dict)
preds = preds['levels']
p3_maps, p4_maps, p5_maps = labels[1:]
assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\
'fourier degree not equal in FCEhead and FCEtarget'
# to tensor
gts = [p3_maps, p4_maps, p5_maps]
for idx, maps in enumerate(gts):
gts[idx] = paddle.to_tensor(np.stack(maps))
losses = multi_apply(self.forward_single, preds, gts)
loss_tr = paddle.to_tensor(0.).astype('float32')
loss_tcl = paddle.to_tensor(0.).astype('float32')
loss_reg_x = paddle.to_tensor(0.).astype('float32')
loss_reg_y = paddle.to_tensor(0.).astype('float32')
loss_all = paddle.to_tensor(0.).astype('float32')
for idx, loss in enumerate(losses):
loss_all += sum(loss)
if idx == 0:
loss_tr += sum(loss)
elif idx == 1:
loss_tcl += sum(loss)
elif idx == 2:
loss_reg_x += sum(loss)
else:
loss_reg_y += sum(loss)
results = dict(
loss=loss_all,
loss_text=loss_tr,
loss_center=loss_tcl,
loss_reg_x=loss_reg_x,
loss_reg_y=loss_reg_y, )
return results
def forward_single(self, pred, gt):
cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1))
reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1))
gt = paddle.transpose(gt, (0, 2, 3, 1))
k = 2 * self.fourier_degree + 1
tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2))
tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2))
x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k))
y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k))
tr_mask = gt[:, :, :, :1].reshape([-1])
tcl_mask = gt[:, :, :, 1:2].reshape([-1])
train_mask = gt[:, :, :, 2:3].reshape([-1])
x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k))
y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k))
tr_train_mask = (train_mask * tr_mask).astype('bool')
tr_train_mask2 = paddle.concat(
[tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1)
# tr loss
loss_tr = self.ohem(tr_pred, tr_mask, train_mask)
# tcl loss
loss_tcl = paddle.to_tensor(0.).astype('float32')
tr_neg_mask = tr_train_mask.logical_not()
tr_neg_mask2 = paddle.concat(
[tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1)
if tr_train_mask.sum().item() > 0:
loss_tcl_pos = F.cross_entropy(
tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]),
tcl_mask.masked_select(tr_train_mask).astype('int64'))
loss_tcl_neg = F.cross_entropy(
tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]),
tcl_mask.masked_select(tr_neg_mask).astype('int64'))
loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg
# regression loss
loss_reg_x = paddle.to_tensor(0.).astype('float32')
loss_reg_y = paddle.to_tensor(0.).astype('float32')
if tr_train_mask.sum().item() > 0:
weight = (tr_mask.masked_select(tr_train_mask.astype('bool'))
.astype('float32') + tcl_mask.masked_select(
tr_train_mask.astype('bool')).astype('float32')) / 2
weight = weight.reshape([-1, 1])
ft_x, ft_y = self.fourier2poly(x_map, y_map)
ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred)
dim = ft_x.shape[1]
tr_train_mask3 = paddle.concat(
[tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1)
loss_reg_x = paddle.mean(weight * F.smooth_l1_loss(
ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
ft_x.masked_select(tr_train_mask3).reshape([-1, dim]),
reduction='none'))
loss_reg_y = paddle.mean(weight * F.smooth_l1_loss(
ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
ft_y.masked_select(tr_train_mask3).reshape([-1, dim]),
reduction='none'))
return loss_tr, loss_tcl, loss_reg_x, loss_reg_y
def ohem(self, predict, target, train_mask):
pos = (target * train_mask).astype('bool')
neg = ((1 - target) * train_mask).astype('bool')
pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1)
neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1)
n_pos = pos.astype('float32').sum()
if n_pos.item() > 0:
loss_pos = F.cross_entropy(
predict.masked_select(pos2).reshape([-1, 2]),
target.masked_select(pos).astype('int64'),
reduction='sum')
loss_neg = F.cross_entropy(
predict.masked_select(neg2).reshape([-1, 2]),
target.masked_select(neg).astype('int64'),
reduction='none')
n_neg = min(
int(neg.astype('float32').sum().item()),
int(self.ohem_ratio * n_pos.astype('float32')))
else:
loss_pos = paddle.to_tensor(0.)
loss_neg = F.cross_entropy(
predict.masked_select(neg2).reshape([-1, 2]),
target.masked_select(neg).astype('int64'),
reduction='none')
n_neg = 100
if len(loss_neg) > n_neg:
loss_neg, _ = paddle.topk(loss_neg, n_neg)
return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32')
def fourier2poly(self, real_maps, imag_maps):
"""Transform Fourier coefficient maps to polygon maps.
Args:
real_maps (tensor): A map composed of the real parts of the
Fourier coefficients, whose shape is (-1, 2k+1)
imag_maps (tensor):A map composed of the imag parts of the
Fourier coefficients, whose shape is (-1, 2k+1)
Returns
x_maps (tensor): A map composed of the x value of the polygon
represented by n sample points (xn, yn), whose shape is (-1, n)
y_maps (tensor): A map composed of the y value of the polygon
represented by n sample points (xn, yn), whose shape is (-1, n)
"""
k_vect = paddle.arange(
-self.fourier_degree, self.fourier_degree + 1,
dtype='float32').reshape([-1, 1])
i_vect = paddle.arange(
0, self.num_sample, dtype='float32').reshape([1, -1])
transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect,
i_vect)
x1 = paddle.einsum('ak, kn-> an', real_maps,
paddle.cos(transform_matrix))
x2 = paddle.einsum('ak, kn-> an', imag_maps,
paddle.sin(transform_matrix))
y1 = paddle.einsum('ak, kn-> an', real_maps,
paddle.sin(transform_matrix))
y2 = paddle.einsum('ak, kn-> an', imag_maps,
paddle.cos(transform_matrix))
x_maps = x1 - x2
y_maps = y1 + y2
return x_maps, y_maps
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import nn
class PRENLoss(nn.Layer):
def __init__(self, **kwargs):
super(PRENLoss, self).__init__()
# note: 0 is padding idx
self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
def forward(self, predicts, batch):
loss = self.loss_func(predicts, batch[1].astype('int64'))
return {'loss': loss}
...@@ -21,7 +21,7 @@ import copy ...@@ -21,7 +21,7 @@ import copy
__all__ = ["build_metric"] __all__ = ["build_metric"]
from .det_metric import DetMetric from .det_metric import DetMetric, DetFCEMetric
from .rec_metric import RecMetric from .rec_metric import RecMetric
from .cls_metric import ClsMetric from .cls_metric import ClsMetric
from .e2e_metric import E2EMetric from .e2e_metric import E2EMetric
...@@ -34,7 +34,7 @@ from .vqa_token_re_metric import VQAReTokenMetric ...@@ -34,7 +34,7 @@ from .vqa_token_re_metric import VQAReTokenMetric
def build_metric(config): def build_metric(config):
support_dict = [ support_dict = [
"DetMetric", "RecMetric", "ClsMetric", "E2EMetric", "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
"DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric', "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
'VQAReTokenMetric' 'VQAReTokenMetric'
] ]
......
...@@ -16,7 +16,7 @@ from __future__ import absolute_import ...@@ -16,7 +16,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
__all__ = ['DetMetric'] __all__ = ['DetMetric', 'DetFCEMetric']
from .eval_det_iou import DetectionIoUEvaluator from .eval_det_iou import DetectionIoUEvaluator
...@@ -55,7 +55,6 @@ class DetMetric(object): ...@@ -55,7 +55,6 @@ class DetMetric(object):
result = self.evaluator.evaluate_image(gt_info_list, det_info_list) result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
self.results.append(result) self.results.append(result)
def get_metric(self): def get_metric(self):
""" """
return metrics { return metrics {
...@@ -71,3 +70,85 @@ class DetMetric(object): ...@@ -71,3 +70,85 @@ class DetMetric(object):
def reset(self): def reset(self):
self.results = [] # clear results self.results = [] # clear results
class DetFCEMetric(object):
def __init__(self, main_indicator='hmean', **kwargs):
self.evaluator = DetectionIoUEvaluator()
self.main_indicator = main_indicator
self.reset()
def __call__(self, preds, batch, **kwargs):
'''
batch: a list produced by dataloaders.
image: np.ndarray of shape (N, C, H, W).
ratio_list: np.ndarray of shape(N,2)
polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not.
preds: a list of dict produced by post process
points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
'''
gt_polyons_batch = batch[2]
ignore_tags_batch = batch[3]
for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch,
ignore_tags_batch):
# prepare gt
gt_info_list = [{
'points': gt_polyon,
'text': '',
'ignore': ignore_tag
} for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)]
# prepare det
det_info_list = [{
'points': det_polyon,
'text': '',
'score': score
} for det_polyon, score in zip(pred['points'], pred['scores'])]
for score_thr in self.results.keys():
det_info_list_thr = [
det_info for det_info in det_info_list
if det_info['score'] >= score_thr
]
result = self.evaluator.evaluate_image(gt_info_list,
det_info_list_thr)
self.results[score_thr].append(result)
def get_metric(self):
"""
return metrics {'heman':0,
'thr 0.3':'precision: 0 recall: 0 hmean: 0',
'thr 0.4':'precision: 0 recall: 0 hmean: 0',
'thr 0.5':'precision: 0 recall: 0 hmean: 0',
'thr 0.6':'precision: 0 recall: 0 hmean: 0',
'thr 0.7':'precision: 0 recall: 0 hmean: 0',
'thr 0.8':'precision: 0 recall: 0 hmean: 0',
'thr 0.9':'precision: 0 recall: 0 hmean: 0',
}
"""
metircs = {}
hmean = 0
for score_thr in self.results.keys():
metirc = self.evaluator.combine_results(self.results[score_thr])
# for key, value in metirc.items():
# metircs['{}_{}'.format(key, score_thr)] = value
metirc_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format(
metirc['precision'], metirc['recall'], metirc['hmean'])
metircs['thr {}'.format(score_thr)] = metirc_str
hmean = max(hmean, metirc['hmean'])
metircs['hmean'] = hmean
self.reset()
return metircs
def reset(self):
self.results = {
0.3: [],
0.4: [],
0.5: [],
0.6: [],
0.7: [],
0.8: [],
0.9: []
} # clear results
...@@ -30,9 +30,10 @@ def build_backbone(config, model_type): ...@@ -30,9 +30,10 @@ def build_backbone(config, model_type):
from .rec_resnet_31 import ResNet31 from .rec_resnet_31 import ResNet31
from .rec_resnet_aster import ResNet_ASTER from .rec_resnet_aster import ResNet_ASTER
from .rec_micronet import MicroNet from .rec_micronet import MicroNet
from .rec_efficientb3_pren import EfficientNetb3_PREN
support_dict = [ support_dict = [
'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB', 'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
"ResNet31", "ResNet_ASTER", 'MicroNet' "ResNet31", "ResNet_ASTER", 'MicroNet', 'EfficientNetb3_PREN'
] ]
elif model_type == "e2e": elif model_type == "e2e":
from .e2e_resnet_vd_pg import ResNet from .e2e_resnet_vd_pg import ResNet
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -23,7 +23,12 @@ def build_neck(config): ...@@ -23,7 +23,12 @@ def build_neck(config):
from .pg_fpn import PGFPN from .pg_fpn import PGFPN
from .table_fpn import TableFPN from .table_fpn import TableFPN
from .fpn import FPN from .fpn import FPN
support_dict = ['FPN','DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN'] from .fce_fpn import FCEFPN
from .pren_fpn import PRENFPN
support_dict = [
'FPN', 'FCEFPN', 'DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder',
'PGFPN', 'TableFPN', 'PRENFPN'
]
module_name = config.pop('name') module_name = config.pop('name')
assert module_name in support_dict, Exception('neck only support {}'.format( assert module_name in support_dict, Exception('neck only support {}'.format(
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -58,6 +58,7 @@ class OCRSystem(object): ...@@ -58,6 +58,7 @@ class OCRSystem(object):
self.table_layout = lp.PaddleDetectionLayoutModel( self.table_layout = lp.PaddleDetectionLayoutModel(
config_path=config_path, config_path=config_path,
model_path=model_path, model_path=model_path,
label_map=args.layout_label_map,
threshold=0.5, threshold=0.5,
enable_mkldnn=args.enable_mkldnn, enable_mkldnn=args.enable_mkldnn,
enforce_cpu=not args.use_gpu, enforce_cpu=not args.use_gpu,
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册