提交 cf03889b 编写于 作者: W weishengyu
...@@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.preButton.setIconSize(QSize(40, 100)) self.preButton.setIconSize(QSize(40, 100))
self.preButton.clicked.connect(self.openPrevImg) self.preButton.clicked.connect(self.openPrevImg)
self.preButton.setStyleSheet('border: none;') self.preButton.setStyleSheet('border: none;')
self.preButton.setShortcut('a')
self.iconlist = QListWidget() self.iconlist = QListWidget()
self.iconlist.setViewMode(QListView.IconMode) self.iconlist.setViewMode(QListView.IconMode)
self.iconlist.setFlow(QListView.TopToBottom) self.iconlist.setFlow(QListView.TopToBottom)
...@@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin):
self.nextButton.setIconSize(QSize(40, 100)) self.nextButton.setIconSize(QSize(40, 100))
self.nextButton.setStyleSheet('border: none;') self.nextButton.setStyleSheet('border: none;')
self.nextButton.clicked.connect(self.openNextImg) self.nextButton.clicked.connect(self.openNextImg)
self.nextButton.setShortcut('d')
hlayout.addWidget(self.preButton) hlayout.addWidget(self.preButton)
hlayout.addWidget(self.iconlist) hlayout.addWidget(self.iconlist)
hlayout.addWidget(self.nextButton) hlayout.addWidget(self.nextButton)
# self.setLayout(hlayout)
iconListContainer = QWidget() iconListContainer = QWidget()
iconListContainer.setLayout(hlayout) iconListContainer.setLayout(hlayout)
...@@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin):
opendir = action(getStr('openDir'), self.openDirDialog, opendir = action(getStr('openDir'), self.openDirDialog,
'Ctrl+u', 'open', getStr('openDir')) 'Ctrl+u', 'open', getStr('openDir'))
openNextImg = action(getStr('nextImg'), self.openNextImg,
'd', 'next', getStr('nextImgDetail'))
openPrevImg = action(getStr('prevImg'), self.openPrevImg,
'a', 'prev', getStr('prevImgDetail'))
save = action(getStr('save'), self.saveFile, save = action(getStr('save'), self.saveFile,
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False) 'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
...@@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin):
alcm = action(getStr('choosemodel'), self.autolcm, alcm = action(getStr('choosemodel'), self.autolcm,
'Ctrl+M', 'next', getStr('tipchoosemodel')) 'Ctrl+M', 'next', getStr('tipchoosemodel'))
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+D', 'close', getStr('deleteImgDetail'), deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
enabled=True) enabled=True)
resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail')) resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail'))
...@@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin):
'w', 'new', getStr('crtBoxDetail'), enabled=False) 'w', 'new', getStr('crtBoxDetail'), enabled=False)
delete = action(getStr('delBox'), self.deleteSelectedShape, delete = action(getStr('delBox'), self.deleteSelectedShape,
'Delete', 'delete', getStr('delBoxDetail'), enabled=False) 'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
copy = action(getStr('dupBox'), self.copySelectedShape, copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'), 'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False) enabled=False)
...@@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin):
reRec = action(getStr('reRecognition'), self.reRecognition, reRec = action(getStr('reRecognition'), self.reRecognition,
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False) 'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
singleRere = action(getStr('singleRe'), self.singleRerecognition,
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
createpoly = action(getStr('creatPolygon'), self.createPolygon, createpoly = action(getStr('creatPolygon'), self.createPolygon,
'p', 'new', 'Creat Polygon', enabled=True) 'q', 'new', 'Creat Polygon', enabled=True)
saveRec = action(getStr('saveRec'), self.saveRecResult, saveRec = action(getStr('saveRec'), self.saveRecResult,
'', 'save', getStr('saveRec'), enabled=False) '', 'save', getStr('saveRec'), enabled=False)
...@@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin):
icon='color', tip=getStr('shapeFillColorDetail'), icon='color', tip=getStr('shapeFillColorDetail'),
enabled=False) enabled=False)
# Label list context menu. # Label list context menu.
labelMenu = QMenu() labelMenu = QMenu()
addActions(labelMenu, (edit, delete)) addActions(labelMenu, (edit, delete))
...@@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin):
# Draw squares/rectangles # Draw squares/rectangles
self.drawSquaresOption = QAction(getStr('drawSquares'), self) self.drawSquaresOption = QAction(getStr('drawSquares'), self)
self.drawSquaresOption.setShortcut('Ctrl+Shift+R')
self.drawSquaresOption.setCheckable(True) self.drawSquaresOption.setCheckable(True)
self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False)) self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False))
self.drawSquaresOption.triggered.connect(self.toogleDrawSquare) self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
...@@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin):
# Store actions for further handling. # Store actions for further handling.
self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg, self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg,
lineColor=color1, create=create, delete=delete, edit=edit, copy=copy, lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
saveRec=saveRec, saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
createMode=createMode, editMode=editMode, createMode=createMode, editMode=editMode,
shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor, shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg, zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
...@@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin):
fileMenuActions=( fileMenuActions=(
open, opendir, saveLabel, resetAll, quit), open, opendir, saveLabel, resetAll, quit),
beginner=(), advanced=(), beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete, editMenu=(createpoly, edit, copy, delete,singleRere,
None, color1, self.drawSquaresOption), None, color1, self.drawSquaresOption),
beginnerContext=(create, edit, copy, delete), beginnerContext=(create, edit, copy, delete, singleRere),
advancedContext=(createMode, editMode, edit, copy, advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor), delete, shapeLineColor, shapeFillColor),
onLoadActive=( onLoadActive=(
...@@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin):
zoomIn, zoomOut, zoomOrg, None, zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth)) fitWindow, fitWidth))
addActions(self.menus.autolabel, (alcm, None, help)) # addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
self.menus.file.aboutToShow.connect(self.updateFileMenu) self.menus.file.aboutToShow.connect(self.updateFileMenu)
...@@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin):
action('&Copy here', self.copyShape), action('&Copy here', self.copyShape),
action('&Move here', self.moveShape))) action('&Move here', self.moveShape)))
self.statusBar().showMessage('%s started.' % __appname__) self.statusBar().showMessage('%s started.' % __appname__)
self.statusBar().show() self.statusBar().show()
...@@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected) self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected) self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected) self.actions.shapeFillColor.setEnabled(selected)
self.actions.singleRere.setEnabled(selected)
def addLabel(self, shape): def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked() shape.paintLabel = self.displayLabelOption.isChecked()
...@@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin):
self.updateComboBox() self.updateComboBox()
self.canvas.loadShapes(s) self.canvas.loadShapes(s)
def singleLabel(self, shape):
if shape is None:
# print('rm empty label')
return
item = self.shapesToItems[shape]
item.setText(shape.label)
self.updateComboBox()
# ADD:
item = self.shapesToItemsbox[shape]
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
self.updateComboBox()
def updateComboBox(self): def updateComboBox(self):
# Get the unique labels and add them to the Combobox. # Get the unique labels and add them to the Combobox.
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())] itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
...@@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.haveAutoReced = False self.haveAutoReced = False
self.AutoRecognition.setEnabled(True) self.AutoRecognition.setEnabled(True)
self.reRecogButton.setEnabled(True) self.reRecogButton.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
self.actions.reRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True) self.actions.saveLabel.setEnabled(True)
...@@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.loadFile(self.filePath) # ADD self.loadFile(self.filePath) # ADD
self.haveAutoReced = True self.haveAutoReced = True
self.AutoRecognition.setEnabled(False) self.AutoRecognition.setEnabled(False)
self.actions.AutoRec.setEnabled(False)
self.setDirty() self.setDirty()
self.saveCacheLabel() self.saveCacheLabel()
...@@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin):
else: else:
QMessageBox.information(self, "Information", "Draw a box!") QMessageBox.information(self, "Information", "Draw a box!")
def singleRerecognition(self):
img = cv2.imread(self.filePath)
shape = self.canvas.selectedShape
box = [[int(p.x()), int(p.y())] for p in shape.points]
assert len(box) == 4
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
if img_crop is None:
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
QMessageBox.information(self, "Information", msg)
return
result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] is not '':
result.insert(0, box)
print('result in reRec is ', result)
if result[1][0] == shape.label:
print('label no change')
else:
shape.label = result[1][0]
self.singleLabel(shape)
self.setDirty()
print(box)
def autolcm(self): def autolcm(self):
vbox = QVBoxLayout() vbox = QVBoxLayout()
...@@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.dialog.exec_() self.dialog.exec_()
if self.filePath: if self.filePath:
self.AutoRecognition.setEnabled(True) self.AutoRecognition.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
def modelChoose(self): def modelChoose(self):
......
...@@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I ...@@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I
<img src="./data/gif/steps_en.gif" width="100%"/> <img src="./data/gif/steps_en.gif" width="100%"/>
### Recent Update
- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
## Installation ## Installation
### 1. Install PaddleOCR ### 1. Install PaddleOCR
...@@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch ...@@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch
## Explanation ## Explanation
### Shortcut keys
| Shortcut keys | Description |
| ---------------- | ------------------------------------------------ |
| Ctrl + shift + A | Automatically label all unchecked images |
| Ctrl + shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box |
| Q | Create a four-points box |
| Ctrl + E | Edit label of the selected box |
| Ctrl + R | Re-recognize the selected box |
| Backspace | Delete the selected box |
| Ctrl + V | Check image |
| Ctrl + Shift + d | Delete image |
| D | Next image |
| A | Previous image |
| Ctrl++ | Zoom in |
| Ctrl-- | Zoom out |
| ↑→↓← | Move selected box |
### Built-in Model ### Built-in Model
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection. - Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languages​include French, German, Korean, and Japanese. - Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languages​include French, German, Korean, and Japanese.
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating) For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model) - Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
......
...@@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,使用p ...@@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,使用p
<img src="./data/gif/steps.gif" width="100%"/> <img src="./data/gif/steps.gif" width="100%"/>
#### 近期更新
- 2020.12.18: 支持对单个标记框进行重新识别(by [ninetailskim](https://github.com/ninetailskim) ),完善快捷键。
## 安装 ## 安装
### 1. 安装PaddleOCR ### 1. 安装PaddleOCR
...@@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch ...@@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 | | crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
## 说明 ## 说明
### 快捷键
| 快捷键 | 说明 |
| ---------------- | ---------------------------- |
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 |
| Q | 新建四点框 |
| Ctrl + E | 编辑所选框标签 |
| Ctrl + R | 重新识别所选标记 |
| Backspace | 删除所选框 |
| Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 |
| D | 下一张图片 |
| A | 上一张图片 |
| Ctrl++ | 缩小 |
| Ctrl-- | 放大 |
| ↑→↓← | 移动标记框 |
### 内置模型 ### 内置模型
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。 - 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
......
...@@ -46,8 +46,9 @@ class Worker(QThread): ...@@ -46,8 +46,9 @@ class Worker(QThread):
chars = res[1][0] chars = res[1][0]
cond = res[1][1] cond = res[1][1]
posi = res[0] posi = res[0]
strs += "Transcription: " + chars + " Probability: " + str( strs += "Transcription: " + chars + " Probability: " + str(cond) + \
cond) + " Location: " + json.dumps(posi) + '\n' " Location: " + json.dumps(posi) +'\n'
# Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
self.listValue.emit(strs) self.listValue.emit(strs)
self.mainThread.result_dic = self.result_dic self.mainThread.result_dic = self.result_dic
self.mainThread.filePath = Imgpath self.mainThread.filePath = Imgpath
......
此差异已折叠。
...@@ -94,4 +94,5 @@ ok=确认 ...@@ -94,4 +94,5 @@ ok=确认
autolabeling=自动标注中 autolabeling=自动标注中
hideBox=隐藏所有标注 hideBox=隐藏所有标注
showBox=显示所有标注 showBox=显示所有标注
saveLabel=保存标记结果 saveLabel=保存标记结果
\ No newline at end of file singleRe=重识别此区块
\ No newline at end of file
saveAsDetail=將標籤保存到其他文件
changeSaveDir=改變存放目錄
openFile=開啟檔案
shapeLineColorDetail=更改線條顏色
resetAll=重置
crtBox=創建區塊
crtBoxDetail=畫一個區塊
dupBoxDetail=複製區塊
verifyImg=驗證圖像
zoominDetail=放大
verifyImgDetail=驗證圖像
saveDetail=將標籤存到
openFileDetail=打開圖像
fitWidthDetail=調整到窗口寬度
tutorial=YouTube教學
editLabel=編輯標籤
openAnnotationDetail=打開標籤文件
quit=結束
shapeFillColorDetail=更改填充顏色
closeCurDetail=關閉目前檔案
closeCur=關閉
deleteImg=刪除圖像
deleteImgDetail=刪除目前圖像
fitWin=調整到跟窗口一樣大小
delBox=刪除選取區塊
boxLineColorDetail=選擇框線顏色
originalsize=原始大小
resetAllDetail=重設所有設定
zoomoutDetail=畫面放大
save=儲存
saveAs=另存為
fitWinDetail=縮放到窗口一樣
openDir=開啟目錄
copyPrevBounding=複製當前圖像中的上一個邊界框
showHide=顯示/隱藏標籤
changeSaveFormat=更改儲存格式
shapeFillColor=填充顏色
quitApp=離開本程式
dupBox=複製區塊
delBoxDetail=刪除區塊
zoomin=放大畫面
info=資訊
openAnnotation=開啟標籤
prevImgDetail=上一個圖像
fitWidth=縮放到跟畫面一樣寬
zoomout=縮小畫面
changeSavedAnnotationDir=更改預設標籤存的目錄
nextImgDetail=下一個圖像
originalsizeDetail=放大到原始大小
prevImg=上一個圖像
tutorialDetail=顯示示範內容
shapeLineColor=形狀線條顏色
boxLineColor=日期分隔線顏色
editLabelDetail=修改所選區塊的標籤
nextImg=下一張圖片
useDefaultLabel=使用預設標籤
useDifficult=有難度的
boxLabelText=區塊的標籤
labels=標籤
autoSaveMode=自動儲存模式
singleClsMode=單一類別模式
displayLabel=顯示類別
fileList=檔案清單
files=檔案
iconList=XX
icon=XX
advancedMode=進階模式
advancedModeDetail=切到進階模式
showAllBoxDetail=顯示所有區塊
hideAllBoxDetail=隱藏所有區塊
...@@ -94,4 +94,5 @@ ok=OK ...@@ -94,4 +94,5 @@ ok=OK
autolabeling=Automatic Labeling autolabeling=Automatic Labeling
hideBox=Hide All Box hideBox=Hide All Box
showBox=Show All Box showBox=Show All Box
saveLabel=Save Label saveLabel=Save Label
\ No newline at end of file singleRe=Re-recognition RectBox
\ No newline at end of file
...@@ -69,12 +69,14 @@ fusion_generator: ...@@ -69,12 +69,14 @@ fusion_generator:
1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder. 1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
```python ```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
``` ```
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean. * Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
* Note 2: Synth-Text is mainly used to generate images for OCR recognition models. * Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly. So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
For example, enter the following image and corpus `PaddleOCR`. For example, enter the following image and corpus `PaddleOCR`.
...@@ -139,9 +141,10 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag ...@@ -139,9 +141,10 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
2. You can run the following command to start synthesis task: 2. You can run the following command to start synthesis task:
``` bash ``` bash
python -m tools.synth_dataset.py -c configs/dataset_config.yml python3 tools/synth_dataset.py -c configs/dataset_config.yml
``` ```
We also provide example corpus and images in `examples` folder.
We also provide example corpus and images in `examples` folder.
<div align="center"> <div align="center">
<img src="examples/style_images/1.jpg" width="300"> <img src="examples/style_images/1.jpg" width="300">
<img src="examples/style_images/2.jpg" width="300"> <img src="examples/style_images/2.jpg" width="300">
......
...@@ -61,11 +61,12 @@ fusion_generator: ...@@ -61,11 +61,12 @@ fusion_generator:
输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下: 输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下:
```python ```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
``` ```
* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。 * 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
* 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。 * 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。
如果输入图像尺寸相差过多,效果可能不佳。 如果输入图像尺寸相差过多,效果可能不佳。
* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
例如,输入如下图片和语料"PaddleOCR": 例如,输入如下图片和语料"PaddleOCR":
...@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ ...@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
2. 运行`tools/synth_dataset`合成数据: 2. 运行`tools/synth_dataset`合成数据:
``` bash ``` bash
python -m tools.synth_dataset -c configs/dataset_config.yml python tools/synth_dataset.py -c configs/dataset_config.yml
``` ```
我们在examples目录下提供了样例图片和语料。 我们在examples目录下提供了样例图片和语料。
<div align="center"> <div align="center">
......
...@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object): ...@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
], "Generator {} not supported.".format(algorithm) ], "Generator {} not supported.".format(algorithm)
use_gpu = config["Global"]['use_gpu'] use_gpu = config["Global"]['use_gpu']
check_gpu(use_gpu) check_gpu(use_gpu)
paddle.set_device('gpu' if use_gpu else 'cpu')
self.logger = get_logger() self.logger = get_logger()
self.generator = getattr(style_text_rec, algorithm)(config) self.generator = getattr(style_text_rec, algorithm)(config)
self.height = config["Global"]["image_height"] self.height = config["Global"]["image_height"]
......
...@@ -11,6 +11,14 @@ ...@@ -11,6 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from engine.synthesisers import DatasetSynthesiser from engine.synthesisers import DatasetSynthesiser
......
...@@ -16,13 +16,13 @@ import cv2 ...@@ -16,13 +16,13 @@ import cv2
import sys import sys
import glob import glob
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
def synth_image(): def synth_image():
args = ArgsParser().parse_args() args = ArgsParser().parse_args()
......
...@@ -107,10 +107,10 @@ make inference_lib_dist ...@@ -107,10 +107,10 @@ make inference_lib_dist
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html). For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html).
* After the compilation process, you can see the following files in the folder of `build/fluid_inference_install_dir/`. * After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
``` ```
build/fluid_inference_install_dir/ build/paddle_inference_install_dir/
|-- CMakeCache.txt |-- CMakeCache.txt
|-- paddle |-- paddle
|-- third_party |-- third_party
......
...@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
else if (resize_h / 32 < 1 + 1e-5) else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32; resize_h = 32;
else else
resize_h = (resize_h / 32 - 1) * 32; resize_h = (resize_h / 32) * 32;
if (resize_w % 32 == 0) if (resize_w % 32 == 0)
resize_w = resize_w; resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5) else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32; resize_w = 32;
else else
resize_w = (resize_w / 32 - 1) * 32; resize_w = (resize_w / 32) * 32;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
......
...@@ -11,7 +11,7 @@ max_side_len 960 ...@@ -11,7 +11,7 @@ max_side_len 960
det_db_thresh 0.3 det_db_thresh 0.3
det_db_box_thresh 0.5 det_db_box_thresh 0.5
det_db_unclip_ratio 2.0 det_db_unclip_ratio 2.0
det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/ det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config # cls config
use_angle_cls 0 use_angle_cls 0
......
...@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/ ...@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
``` ```
# 预测分类结果 # 预测分类结果
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
预测图片: 预测图片:
......
...@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat ...@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
测试单张图像的检测效果 测试单张图像的检测效果
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
测试DB模型时,调整后处理阈值, 测试DB模型时,调整后处理阈值,
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5 python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
``` ```
测试文件夹下所有图像的检测效果 测试文件夹下所有图像的检测效果
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
...@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ...@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
超轻量中文识别模型推理,可以执行如下命令: 超轻量中文识别模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/" # 下载超轻量中文识别模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
``` ```
![](../imgs_words/ch/word_4.jpg) ![](../imgs_words/ch/word_4.jpg)
...@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153) ...@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
``` ```
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
``` ```
CRNN 文本识别模型推理,可以执行如下命令: CRNN 文本识别模型推理,可以执行如下命令:
...@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904) ...@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
方向分类模型推理,可以执行如下命令: 方向分类模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/" # 下载超轻量中文方向分类器模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
``` ```
![](../imgs_words/ch/word_1.jpg) ![](../imgs_words/ch/word_1.jpg)
......
...@@ -324,7 +324,6 @@ Eval: ...@@ -324,7 +324,6 @@ Eval:
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。 评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
*注意* 评估时必须确保配置文件中 infer_img 字段为空
``` ```
# GPU 评估, Global.checkpoints 为待测权重 # GPU 评估, Global.checkpoints 为待测权重
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
...@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec ...@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
``` ```
# 预测英文结果 # 预测英文结果
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
``` ```
预测图片: 预测图片:
...@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png ...@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
``` ```
# 预测中文结果 # 预测中文结果
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
预测图片: 预测图片:
......
...@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a ...@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a
``` ```
# Predict English results # Predict English results
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_10.png python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words_en/word_10.png
``` ```
Input image: Input image:
......
...@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat ...@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
Test the detection result on a single image: Test the detection result on a single image:
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
When testing the DB model, adjust the post-processing threshold: When testing the DB model, adjust the post-processing threshold:
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5 python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
``` ```
Test the detection result on all images in the folder: Test the detection result on all images in the folder:
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
...@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference ...@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference
For lightweight Chinese recognition model inference, you can execute the following commands: For lightweight Chinese recognition model inference, you can execute the following commands:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/" # download CRNN text recognition inference model
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
``` ```
![](../imgs_words/ch/word_4.jpg) ![](../imgs_words_en/word_10.png)
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen. After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
```bash ```bash
Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153) Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
``` ```
<a name="CTC-BASED_RECOGNITION"></a> <a name="CTC-BASED_RECOGNITION"></a>
...@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands ...@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands
``` ```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/" python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
``` ```
```
# download text angle class inference model:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
```
![](../imgs_words_en/word_10.png) ![](../imgs_words_en/word_10.png)
After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen. After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
......
...@@ -317,11 +317,11 @@ Eval: ...@@ -317,11 +317,11 @@ Eval:
<a name="EVALUATION"></a> <a name="EVALUATION"></a>
### EVALUATION ### EVALUATION
The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader. The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
``` ```
# GPU evaluation, Global.checkpoints is the weight to be tested # GPU evaluation, Global.checkpoints is the weight to be tested
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
``` ```
<a name="PREDICTION"></a> <a name="PREDICTION"></a>
...@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci ...@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci
``` ```
# Predict English results # Predict English results
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg
``` ```
Input image: Input image:
...@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training. ...@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training.
``` ```
# Predict Chinese results # Predict Chinese results
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
Input image: Input image:
......
doc/joinus.PNG

271.9 KB | W: | H:

doc/joinus.PNG

211.9 KB | W: | H:

doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
  • 2-up
  • Swipe
  • Onion skin
...@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem): ...@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem):
logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL)) logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
sys.exit(0) sys.exit(0)
postprocess_params.rec_char_dict_path = Path( postprocess_params.rec_char_dict_path = str(
__file__).parent / postprocess_params.rec_char_dict_path Path(__file__).parent / postprocess_params.rec_char_dict_path)
# init det_model and rec_model # init det_model and rec_model
super().__init__(postprocess_params) super().__init__(postprocess_params)
......
...@@ -32,7 +32,7 @@ setup( ...@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''}, package_dir={'paddleocr': ''},
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
version='2.0.1', version='2.0.2',
install_requires=requirements, install_requires=requirements,
license='Apache License 2.0', license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
......
...@@ -35,6 +35,7 @@ logger = get_logger() ...@@ -35,6 +35,7 @@ logger = get_logger()
class TextDetector(object): class TextDetector(object):
def __init__(self, args): def __init__(self, args):
self.args = args
self.det_algorithm = args.det_algorithm self.det_algorithm = args.det_algorithm
self.use_zero_copy_run = args.use_zero_copy_run self.use_zero_copy_run = args.use_zero_copy_run
pre_process_list = [{ pre_process_list = [{
...@@ -70,6 +71,9 @@ class TextDetector(object): ...@@ -70,6 +71,9 @@ class TextDetector(object):
postprocess_params["cover_thresh"] = args.det_east_cover_thresh postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST": elif self.det_algorithm == "SAST":
pre_process_list[0] = {
'DetResizeForTest': {'resize_long': args.det_limit_side_len}
}
postprocess_params['name'] = 'SASTPostProcess' postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh postprocess_params["score_thresh"] = args.det_sast_score_thresh
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
......
...@@ -33,6 +33,7 @@ def parse_args(): ...@@ -33,6 +33,7 @@ def parse_args():
parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--use_fp16", type=str2bool, default=False)
parser.add_argument("--gpu_mem", type=int, default=8000) parser.add_argument("--gpu_mem", type=int, default=8000)
# params for text detector # params for text detector
...@@ -46,7 +47,7 @@ def parse_args(): ...@@ -46,7 +47,7 @@ def parse_args():
parser.add_argument("--det_db_thresh", type=float, default=0.3) parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5) parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6) parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
parser.add_argument("--max_batch_size", type=int, default=10)
# EAST parmas # EAST parmas
parser.add_argument("--det_east_score_thresh", type=float, default=0.8) parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
...@@ -62,7 +63,7 @@ def parse_args(): ...@@ -62,7 +63,7 @@ def parse_args():
parser.add_argument("--rec_model_dir", type=str) parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch') parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument("--rec_batch_num", type=int, default=6) parser.add_argument("--rec_batch_num", type=int, default=1)
parser.add_argument("--max_text_length", type=int, default=25) parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument( parser.add_argument(
"--rec_char_dict_path", "--rec_char_dict_path",
...@@ -78,7 +79,7 @@ def parse_args(): ...@@ -78,7 +79,7 @@ def parse_args():
parser.add_argument("--cls_model_dir", type=str) parser.add_argument("--cls_model_dir", type=str)
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
parser.add_argument("--label_list", type=list, default=['0', '180']) parser.add_argument("--label_list", type=list, default=['0', '180'])
parser.add_argument("--cls_batch_num", type=int, default=30) parser.add_argument("--cls_batch_num", type=int, default=6)
parser.add_argument("--cls_thresh", type=float, default=0.9) parser.add_argument("--cls_thresh", type=float, default=0.9)
parser.add_argument("--enable_mkldnn", type=str2bool, default=False) parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
...@@ -113,6 +114,11 @@ def create_predictor(args, mode, logger): ...@@ -113,6 +114,11 @@ def create_predictor(args, mode, logger):
if args.use_gpu: if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0) config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt:
config.enable_tensorrt_engine(
precision_mode=AnalysisConfig.Precision.Half
if args.use_fp16 else AnalysisConfig.Precision.Float32,
max_batch_size=args.max_batch_size)
else: else:
config.disable_gpu() config.disable_gpu()
config.set_cpu_math_library_num_threads(6) config.set_cpu_math_library_num_threads(6)
......
...@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class): ...@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
return metirc return metirc
def preprocess(): def preprocess(is_train=False):
FLAGS = ArgsParser().parse_args() FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config) config = load_config(FLAGS.config)
merge_config(FLAGS.opt) merge_config(FLAGS.opt)
...@@ -350,15 +350,17 @@ def preprocess(): ...@@ -350,15 +350,17 @@ def preprocess():
device = paddle.set_device(device) device = paddle.set_device(device)
config['Global']['distributed'] = dist.get_world_size() != 1 config['Global']['distributed'] = dist.get_world_size() != 1
if is_train:
# save_config # save_config
save_model_dir = config['Global']['save_model_dir'] save_model_dir = config['Global']['save_model_dir']
os.makedirs(save_model_dir, exist_ok=True) os.makedirs(save_model_dir, exist_ok=True)
with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f: with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False) yaml.dump(
dict(config), f, default_flow_style=False, sort_keys=False)
logger = get_logger( log_file = '{}/train.log'.format(save_model_dir)
name='root', log_file='{}/train.log'.format(save_model_dir)) else:
log_file = None
logger = get_logger(name='root', log_file=log_file)
if config['Global']['use_visualdl']: if config['Global']['use_visualdl']:
from visualdl import LogWriter from visualdl import LogWriter
vdl_writer_path = '{}/vdl/'.format(save_model_dir) vdl_writer_path = '{}/vdl/'.format(save_model_dir)
......
...@@ -110,6 +110,6 @@ def test_reader(config, device, logger): ...@@ -110,6 +110,6 @@ def test_reader(config, device, logger):
if __name__ == '__main__': if __name__ == '__main__':
config, device, logger, vdl_writer = program.preprocess() config, device, logger, vdl_writer = program.preprocess(is_train=True)
main(config, device, logger, vdl_writer) main(config, device, logger, vdl_writer)
# test_reader(config, device, logger) # test_reader(config, device, logger)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册