提交 cf03889b 编写于 作者: W weishengyu
......@@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.preButton.setIconSize(QSize(40, 100))
self.preButton.clicked.connect(self.openPrevImg)
self.preButton.setStyleSheet('border: none;')
self.preButton.setShortcut('a')
self.iconlist = QListWidget()
self.iconlist.setViewMode(QListView.IconMode)
self.iconlist.setFlow(QListView.TopToBottom)
......@@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin):
self.nextButton.setIconSize(QSize(40, 100))
self.nextButton.setStyleSheet('border: none;')
self.nextButton.clicked.connect(self.openNextImg)
self.nextButton.setShortcut('d')
hlayout.addWidget(self.preButton)
hlayout.addWidget(self.iconlist)
hlayout.addWidget(self.nextButton)
# self.setLayout(hlayout)
iconListContainer = QWidget()
iconListContainer.setLayout(hlayout)
......@@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin):
opendir = action(getStr('openDir'), self.openDirDialog,
'Ctrl+u', 'open', getStr('openDir'))
openNextImg = action(getStr('nextImg'), self.openNextImg,
'd', 'next', getStr('nextImgDetail'))
openPrevImg = action(getStr('prevImg'), self.openPrevImg,
'a', 'prev', getStr('prevImgDetail'))
save = action(getStr('save'), self.saveFile,
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
......@@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin):
alcm = action(getStr('choosemodel'), self.autolcm,
'Ctrl+M', 'next', getStr('tipchoosemodel'))
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+D', 'close', getStr('deleteImgDetail'),
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
enabled=True)
resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail'))
......@@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin):
'w', 'new', getStr('crtBoxDetail'), enabled=False)
delete = action(getStr('delBox'), self.deleteSelectedShape,
'Delete', 'delete', getStr('delBoxDetail'), enabled=False)
'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False)
......@@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin):
reRec = action(getStr('reRecognition'), self.reRecognition,
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
singleRere = action(getStr('singleRe'), self.singleRerecognition,
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
createpoly = action(getStr('creatPolygon'), self.createPolygon,
'p', 'new', 'Creat Polygon', enabled=True)
'q', 'new', 'Creat Polygon', enabled=True)
saveRec = action(getStr('saveRec'), self.saveRecResult,
'', 'save', getStr('saveRec'), enabled=False)
......@@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin):
icon='color', tip=getStr('shapeFillColorDetail'),
enabled=False)
# Label list context menu.
labelMenu = QMenu()
addActions(labelMenu, (edit, delete))
......@@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin):
# Draw squares/rectangles
self.drawSquaresOption = QAction(getStr('drawSquares'), self)
self.drawSquaresOption.setShortcut('Ctrl+Shift+R')
self.drawSquaresOption.setCheckable(True)
self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False))
self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
......@@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin):
# Store actions for further handling.
self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg,
lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
saveRec=saveRec,
saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
createMode=createMode, editMode=editMode,
shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
......@@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin):
fileMenuActions=(
open, opendir, saveLabel, resetAll, quit),
beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete,
editMenu=(createpoly, edit, copy, delete,singleRere,
None, color1, self.drawSquaresOption),
beginnerContext=(create, edit, copy, delete),
beginnerContext=(create, edit, copy, delete, singleRere),
advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor),
onLoadActive=(
......@@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin):
zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth))
addActions(self.menus.autolabel, (alcm, None, help)) #
addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
self.menus.file.aboutToShow.connect(self.updateFileMenu)
......@@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin):
action('&Copy here', self.copyShape),
action('&Move here', self.moveShape)))
self.statusBar().showMessage('%s started.' % __appname__)
self.statusBar().show()
......@@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected)
self.actions.singleRere.setEnabled(selected)
def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked()
......@@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin):
self.updateComboBox()
self.canvas.loadShapes(s)
def singleLabel(self, shape):
if shape is None:
# print('rm empty label')
return
item = self.shapesToItems[shape]
item.setText(shape.label)
self.updateComboBox()
# ADD:
item = self.shapesToItemsbox[shape]
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
self.updateComboBox()
def updateComboBox(self):
# Get the unique labels and add them to the Combobox.
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
......@@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.haveAutoReced = False
self.AutoRecognition.setEnabled(True)
self.reRecogButton.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
self.actions.reRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True)
......@@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.loadFile(self.filePath) # ADD
self.haveAutoReced = True
self.AutoRecognition.setEnabled(False)
self.actions.AutoRec.setEnabled(False)
self.setDirty()
self.saveCacheLabel()
......@@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin):
else:
QMessageBox.information(self, "Information", "Draw a box!")
def singleRerecognition(self):
img = cv2.imread(self.filePath)
shape = self.canvas.selectedShape
box = [[int(p.x()), int(p.y())] for p in shape.points]
assert len(box) == 4
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
if img_crop is None:
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
QMessageBox.information(self, "Information", msg)
return
result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] is not '':
result.insert(0, box)
print('result in reRec is ', result)
if result[1][0] == shape.label:
print('label no change')
else:
shape.label = result[1][0]
self.singleLabel(shape)
self.setDirty()
print(box)
def autolcm(self):
vbox = QVBoxLayout()
......@@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.dialog.exec_()
if self.filePath:
self.AutoRecognition.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
def modelChoose(self):
......
......@@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I
<img src="./data/gif/steps_en.gif" width="100%"/>
### Recent Update
- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
## Installation
### 1. Install PaddleOCR
......@@ -92,6 +96,25 @@ Therefore, if the recognition result has been manually changed before, it may ch
## Explanation
### Shortcut keys
| Shortcut keys | Description |
| ---------------- | ------------------------------------------------ |
| Ctrl + shift + A | Automatically label all unchecked images |
| Ctrl + shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box |
| Q | Create a four-points box |
| Ctrl + E | Edit label of the selected box |
| Ctrl + R | Re-recognize the selected box |
| Backspace | Delete the selected box |
| Ctrl + V | Check image |
| Ctrl + Shift + d | Delete image |
| D | Next image |
| A | Previous image |
| Ctrl++ | Zoom in |
| Ctrl-- | Zoom out |
| ↑→↓← | Move selected box |
### Built-in Model
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
......
......@@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,使用p
<img src="./data/gif/steps.gif" width="100%"/>
#### 近期更新
- 2020.12.18: 支持对单个标记框进行重新识别(by [ninetailskim](https://github.com/ninetailskim) ),完善快捷键。
## 安装
### 1. 安装PaddleOCR
......@@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
## 说明
### 快捷键
| 快捷键 | 说明 |
| ---------------- | ---------------------------- |
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 |
| Q | 新建四点框 |
| Ctrl + E | 编辑所选框标签 |
| Ctrl + R | 重新识别所选标记 |
| Backspace | 删除所选框 |
| Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 |
| D | 下一张图片 |
| A | 上一张图片 |
| Ctrl++ | 缩小 |
| Ctrl-- | 放大 |
| ↑→↓← | 移动标记框 |
### 内置模型
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
......
......@@ -46,8 +46,9 @@ class Worker(QThread):
chars = res[1][0]
cond = res[1][1]
posi = res[0]
strs += "Transcription: " + chars + " Probability: " + str(
cond) + " Location: " + json.dumps(posi) + '\n'
strs += "Transcription: " + chars + " Probability: " + str(cond) + \
" Location: " + json.dumps(posi) +'\n'
# Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
self.listValue.emit(strs)
self.mainThread.result_dic = self.result_dic
self.mainThread.filePath = Imgpath
......
此差异已折叠。
......@@ -95,3 +95,4 @@ autolabeling=自动标注中
hideBox=隐藏所有标注
showBox=显示所有标注
saveLabel=保存标记结果
singleRe=重识别此区块
\ No newline at end of file
saveAsDetail=將標籤保存到其他文件
changeSaveDir=改變存放目錄
openFile=開啟檔案
shapeLineColorDetail=更改線條顏色
resetAll=重置
crtBox=創建區塊
crtBoxDetail=畫一個區塊
dupBoxDetail=複製區塊
verifyImg=驗證圖像
zoominDetail=放大
verifyImgDetail=驗證圖像
saveDetail=將標籤存到
openFileDetail=打開圖像
fitWidthDetail=調整到窗口寬度
tutorial=YouTube教學
editLabel=編輯標籤
openAnnotationDetail=打開標籤文件
quit=結束
shapeFillColorDetail=更改填充顏色
closeCurDetail=關閉目前檔案
closeCur=關閉
deleteImg=刪除圖像
deleteImgDetail=刪除目前圖像
fitWin=調整到跟窗口一樣大小
delBox=刪除選取區塊
boxLineColorDetail=選擇框線顏色
originalsize=原始大小
resetAllDetail=重設所有設定
zoomoutDetail=畫面放大
save=儲存
saveAs=另存為
fitWinDetail=縮放到窗口一樣
openDir=開啟目錄
copyPrevBounding=複製當前圖像中的上一個邊界框
showHide=顯示/隱藏標籤
changeSaveFormat=更改儲存格式
shapeFillColor=填充顏色
quitApp=離開本程式
dupBox=複製區塊
delBoxDetail=刪除區塊
zoomin=放大畫面
info=資訊
openAnnotation=開啟標籤
prevImgDetail=上一個圖像
fitWidth=縮放到跟畫面一樣寬
zoomout=縮小畫面
changeSavedAnnotationDir=更改預設標籤存的目錄
nextImgDetail=下一個圖像
originalsizeDetail=放大到原始大小
prevImg=上一個圖像
tutorialDetail=顯示示範內容
shapeLineColor=形狀線條顏色
boxLineColor=日期分隔線顏色
editLabelDetail=修改所選區塊的標籤
nextImg=下一張圖片
useDefaultLabel=使用預設標籤
useDifficult=有難度的
boxLabelText=區塊的標籤
labels=標籤
autoSaveMode=自動儲存模式
singleClsMode=單一類別模式
displayLabel=顯示類別
fileList=檔案清單
files=檔案
iconList=XX
icon=XX
advancedMode=進階模式
advancedModeDetail=切到進階模式
showAllBoxDetail=顯示所有區塊
hideAllBoxDetail=隱藏所有區塊
......@@ -95,3 +95,4 @@ autolabeling=Automatic Labeling
hideBox=Hide All Box
showBox=Show All Box
saveLabel=Save Label
singleRe=Re-recognition RectBox
\ No newline at end of file
......@@ -69,12 +69,14 @@ fusion_generator:
1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
```
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
For example, enter the following image and corpus `PaddleOCR`.
......@@ -139,8 +141,9 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
2. You can run the following command to start synthesis task:
``` bash
python -m tools.synth_dataset.py -c configs/dataset_config.yml
python3 tools/synth_dataset.py -c configs/dataset_config.yml
```
We also provide example corpus and images in `examples` folder.
<div align="center">
<img src="examples/style_images/1.jpg" width="300">
......
......@@ -61,11 +61,12 @@ fusion_generator:
输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下:
```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
```
* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
* 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。
如果输入图像尺寸相差过多,效果可能不佳。
* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
例如,输入如下图片和语料"PaddleOCR":
......@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
2. 运行`tools/synth_dataset`合成数据:
``` bash
python -m tools.synth_dataset -c configs/dataset_config.yml
python tools/synth_dataset.py -c configs/dataset_config.yml
```
我们在examples目录下提供了样例图片和语料。
<div align="center">
......
......@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
], "Generator {} not supported.".format(algorithm)
use_gpu = config["Global"]['use_gpu']
check_gpu(use_gpu)
paddle.set_device('gpu' if use_gpu else 'cpu')
self.logger = get_logger()
self.generator = getattr(style_text_rec, algorithm)(config)
self.height = config["Global"]["image_height"]
......
......@@ -11,6 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from engine.synthesisers import DatasetSynthesiser
......
......@@ -16,13 +16,13 @@ import cv2
import sys
import glob
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
def synth_image():
args = ArgsParser().parse_args()
......
......@@ -107,10 +107,10 @@ make inference_lib_dist
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html).
* After the compilation process, you can see the following files in the folder of `build/fluid_inference_install_dir/`.
* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
```
build/fluid_inference_install_dir/
build/paddle_inference_install_dir/
|-- CMakeCache.txt
|-- paddle
|-- third_party
......
......@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32;
else
resize_h = (resize_h / 32 - 1) * 32;
resize_h = (resize_h / 32) * 32;
if (resize_w % 32 == 0)
resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32;
else
resize_w = (resize_w / 32 - 1) * 32;
resize_w = (resize_w / 32) * 32;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
......
......@@ -11,7 +11,7 @@ max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 2.0
det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config
use_angle_cls 0
......
......@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
```
# 预测分类结果
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
预测图片:
......
......@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
测试单张图像的检测效果
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
测试DB模型时,调整后处理阈值,
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
```
测试文件夹下所有图像的检测效果
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
......@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
超轻量中文识别模型推理,可以执行如下命令:
```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
# 下载超轻量中文识别模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
```
![](../imgs_words/ch/word_4.jpg)
......@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
```
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
```
CRNN 文本识别模型推理,可以执行如下命令:
......@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
方向分类模型推理,可以执行如下命令:
```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
# 下载超轻量中文方向分类器模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
```
![](../imgs_words/ch/word_1.jpg)
......
......@@ -324,7 +324,6 @@ Eval:
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
*注意* 评估时必须确保配置文件中 infer_img 字段为空
```
# GPU 评估, Global.checkpoints 为待测权重
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
......@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
```
# 预测英文结果
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
```
预测图片:
......@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
```
# 预测中文结果
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
预测图片:
......
......@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a
```
# Predict English results
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_10.png
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words_en/word_10.png
```
Input image:
......
......@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
Test the detection result on a single image:
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
When testing the DB model, adjust the post-processing threshold:
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
```
Test the detection result on all images in the folder:
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
......@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference
For lightweight Chinese recognition model inference, you can execute the following commands:
```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
# download CRNN text recognition inference model
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
```
![](../imgs_words/ch/word_4.jpg)
![](../imgs_words_en/word_10.png)
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
```bash
Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
```
<a name="CTC-BASED_RECOGNITION"></a>
......@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands
```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
```
```
# download text angle class inference model:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
```
![](../imgs_words_en/word_10.png)
After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
......
......@@ -317,11 +317,11 @@ Eval:
<a name="EVALUATION"></a>
### EVALUATION
The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader.
The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
```
# GPU evaluation, Global.checkpoints is the weight to be tested
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
```
<a name="PREDICTION"></a>
......@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci
```
# Predict English results
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg
```
Input image:
......@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training.
```
# Predict Chinese results
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/ch/word_1.jpg
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
Input image:
......
doc/joinus.PNG

271.9 KB | W: | H:

doc/joinus.PNG

211.9 KB | W: | H:

doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
  • 2-up
  • Swipe
  • Onion skin
......@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem):
logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
sys.exit(0)
postprocess_params.rec_char_dict_path = Path(
__file__).parent / postprocess_params.rec_char_dict_path
postprocess_params.rec_char_dict_path = str(
Path(__file__).parent / postprocess_params.rec_char_dict_path)
# init det_model and rec_model
super().__init__(postprocess_params)
......
......@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''},
include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
version='2.0.1',
version='2.0.2',
install_requires=requirements,
license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
......
......@@ -35,6 +35,7 @@ logger = get_logger()
class TextDetector(object):
def __init__(self, args):
self.args = args
self.det_algorithm = args.det_algorithm
self.use_zero_copy_run = args.use_zero_copy_run
pre_process_list = [{
......@@ -70,6 +71,9 @@ class TextDetector(object):
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST":
pre_process_list[0] = {
'DetResizeForTest': {'resize_long': args.det_limit_side_len}
}
postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
......
......@@ -33,6 +33,7 @@ def parse_args():
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--use_fp16", type=str2bool, default=False)
parser.add_argument("--gpu_mem", type=int, default=8000)
# params for text detector
......@@ -46,7 +47,7 @@ def parse_args():
parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
parser.add_argument("--max_batch_size", type=int, default=10)
# EAST parmas
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
......@@ -62,7 +63,7 @@ def parse_args():
parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--rec_batch_num", type=int, default=1)
parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument(
"--rec_char_dict_path",
......@@ -78,7 +79,7 @@ def parse_args():
parser.add_argument("--cls_model_dir", type=str)
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
parser.add_argument("--label_list", type=list, default=['0', '180'])
parser.add_argument("--cls_batch_num", type=int, default=30)
parser.add_argument("--cls_batch_num", type=int, default=6)
parser.add_argument("--cls_thresh", type=float, default=0.9)
parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
......@@ -113,6 +114,11 @@ def create_predictor(args, mode, logger):
if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt:
config.enable_tensorrt_engine(
precision_mode=AnalysisConfig.Precision.Half
if args.use_fp16 else AnalysisConfig.Precision.Float32,
max_batch_size=args.max_batch_size)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(6)
......
......@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
return metirc
def preprocess():
def preprocess(is_train=False):
FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
......@@ -350,15 +350,17 @@ def preprocess():
device = paddle.set_device(device)
config['Global']['distributed'] = dist.get_world_size() != 1
if is_train:
# save_config
save_model_dir = config['Global']['save_model_dir']
os.makedirs(save_model_dir, exist_ok=True)
with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
logger = get_logger(
name='root', log_file='{}/train.log'.format(save_model_dir))
yaml.dump(
dict(config), f, default_flow_style=False, sort_keys=False)
log_file = '{}/train.log'.format(save_model_dir)
else:
log_file = None
logger = get_logger(name='root', log_file=log_file)
if config['Global']['use_visualdl']:
from visualdl import LogWriter
vdl_writer_path = '{}/vdl/'.format(save_model_dir)
......
......@@ -110,6 +110,6 @@ def test_reader(config, device, logger):
if __name__ == '__main__':
config, device, logger, vdl_writer = program.preprocess()
config, device, logger, vdl_writer = program.preprocess(is_train=True)
main(config, device, logger, vdl_writer)
# test_reader(config, device, logger)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册