未验证 提交 e77b1f0f 编写于 作者: W WuZhe 提交者: GitHub

Merge branch 'dygraph' into dygraph

......@@ -61,7 +61,7 @@ from combobox import ComboBox
from libs.constants import *
from libs.utils import *
from libs.settings import Settings
from libs.shape import Shape, DEFAULT_LINE_COLOR, DEFAULT_FILL_COLOR
from libs.shape import Shape, DEFAULT_LINE_COLOR, DEFAULT_FILL_COLOR,DEFAULT_LOCK_COLOR
from libs.stringBundle import StringBundle
from libs.canvas import Canvas
from libs.zoomWidget import ZoomWidget
......@@ -126,7 +126,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelHist = []
self.lastOpenDir = None
self.result_dic = []
self.result_dic_locked = []
self.changeFileFolder = False
self.haveAutoReced = False
self.labelFile = None
......@@ -395,6 +395,7 @@ class MainWindow(QMainWindow, WindowMixin):
delete = action(getStr('delBox'), self.deleteSelectedShape,
'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False)
......@@ -405,6 +406,7 @@ class MainWindow(QMainWindow, WindowMixin):
showAll = action(getStr('showBox'), partial(self.togglePolygons, True),
'Ctrl+A', 'hide', getStr('showAllBoxDetail'),
enabled=False)
help = action(getStr('tutorial'), self.showTutorialDialog, None, 'help', getStr('tutorialDetail'))
showInfo = action(getStr('info'), self.showInfoDialog, None, 'help', getStr('info'))
......@@ -476,6 +478,10 @@ class MainWindow(QMainWindow, WindowMixin):
undo = action(getStr("undo"), self.undoShapeEdit,
'Ctrl+Z', "undo", getStr("undo"), enabled=False)
lock = action(getStr("lockBox"), self.lockSelectedShape,
None, "lock", getStr("lockBoxDetail"),
enabled=False)
self.editButton.setDefaultAction(edit)
self.newButton.setDefaultAction(create)
......@@ -538,13 +544,13 @@ class MainWindow(QMainWindow, WindowMixin):
fitWindow=fitWindow, fitWidth=fitWidth,
zoomActions=zoomActions, saveLabel=saveLabel,
undo=undo, undoLastPoint=undoLastPoint,open_dataset_dir=open_dataset_dir,
rotateLeft=rotateLeft,rotateRight=rotateRight,
rotateLeft=rotateLeft,rotateRight=rotateRight,lock=lock,
fileMenuActions=(
opendir, open_dataset_dir, saveLabel, resetAll, quit),
beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption),
beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,),
None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption,lock),
beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,lock),
advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor),
onLoadActive=(
......@@ -998,6 +1004,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.delete.setEnabled(n_selected)
self.actions.copy.setEnabled(n_selected)
self.actions.edit.setEnabled(n_selected == 1)
self.actions.lock.setEnabled(n_selected)
def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked()
......@@ -1041,7 +1048,7 @@ class MainWindow(QMainWindow, WindowMixin):
def loadLabels(self, shapes):
s = []
for label, points, line_color, fill_color, difficult in shapes:
shape = Shape(label=label)
shape = Shape(label=label,line_color=line_color)
for x, y in points:
# Ensure the labels are within the bounds of the image. If not, fix them.
......@@ -1051,6 +1058,7 @@ class MainWindow(QMainWindow, WindowMixin):
shape.addPoint(QPointF(x, y))
shape.difficult = difficult
#shape.locked = False
shape.close()
s.append(shape)
......@@ -1063,10 +1071,12 @@ class MainWindow(QMainWindow, WindowMixin):
# shape.fill_color = QColor(*fill_color)
# else:
# shape.fill_color = generateColorByText(label)
self.addLabel(shape)
self.updateComboBox()
self.canvas.loadShapes(s)
def singleLabel(self, shape):
if shape is None:
......@@ -1106,10 +1116,9 @@ class MainWindow(QMainWindow, WindowMixin):
difficult=s.difficult) # bool
shapes = [] if mode == 'Auto' else \
[format_shape(shape) for shape in self.canvas.shapes]
[format_shape(shape) for shape in self.canvas.shapes if shape.line_color != DEFAULT_LOCK_COLOR]
# Can add differrent annotation formats here
for box in self.result_dic:
for box in self.result_dic :
trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False}
if trans_dic["label"] == "" and mode == 'Auto':
continue
......@@ -1120,7 +1129,6 @@ class MainWindow(QMainWindow, WindowMixin):
for box in shapes:
trans_dic.append({"transcription": box['label'], "points": box['points'], 'difficult': box['difficult']})
self.PPlabel[annotationFilePath] = trans_dic
if mode == 'Auto':
self.Cachelabel[annotationFilePath] = trans_dic
......@@ -1313,6 +1321,7 @@ class MainWindow(QMainWindow, WindowMixin):
# unicodeFilePath = os.path.abspath(unicodeFilePath)
# Tzutalin 20160906 : Add file list and dock to move faster
# Highlight the file item
if unicodeFilePath and self.fileListWidget.count() > 0:
if unicodeFilePath in self.mImgList:
index = self.mImgList.index(unicodeFilePath)
......@@ -1322,6 +1331,7 @@ class MainWindow(QMainWindow, WindowMixin):
###
self.iconlist.clear()
self.additems5(None)
for i in range(5):
item_tooltip = self.iconlist.item(i).toolTip()
# print(i,"---",item_tooltip)
......@@ -1340,7 +1350,6 @@ class MainWindow(QMainWindow, WindowMixin):
if unicodeFilePath and os.path.exists(unicodeFilePath):
self.canvas.verified = False
cvimg = cv2.imdecode(np.fromfile(unicodeFilePath, dtype=np.uint8), 1)
height, width, depth = cvimg.shape
cvimg = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB)
......@@ -1361,16 +1370,19 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.dirty = False
self.actions.save.setEnabled(True)
if len(self.canvas.lockedShapes) != 0:
self.actions.save.setEnabled(True)
self.setDirty()
self.canvas.setEnabled(True)
self.adjustScale(initial=True)
self.paintCanvas()
self.addRecentFile(self.filePath)
self.toggleActions(True)
self.showBoundingBoxFromPPlabel(filePath)
self.setWindowTitle(__appname__ + ' ' + filePath)
# Default : select last item if there is at least one item
if self.labelList.count():
self.labelList.setCurrentItem(self.labelList.item(self.labelList.count() - 1))
......@@ -1380,15 +1392,23 @@ class MainWindow(QMainWindow, WindowMixin):
return True
return False
def showBoundingBoxFromPPlabel(self, filePath):
width, height = self.image.width(), self.image.height()
imgidx = self.getImglabelidx(filePath)
if imgidx not in self.PPlabel.keys():
return
shapes = []
for box in self.PPlabel[imgidx]:
shapes.append((box['transcription'], box['points'], None, None, box['difficult']))
shapes =[]
#box['ratio'] of the shapes saved in lockedShapes contains the ratio of the
# four corner coordinates of the shapes to the height and width of the image
for box in self.canvas.lockedShapes:
if self.canvas.isInTheSameImage:
shapes.append((box['transcription'], [[s[0]*width,s[1]*height]for s in box['ratio']],
DEFAULT_LOCK_COLOR, None, box['difficult']))
else:
shapes.append(('锁定框:待检测', [[s[0]*width,s[1]*height]for s in box['ratio']],
DEFAULT_LOCK_COLOR, None, box['difficult']))
if imgidx in self.PPlabel.keys():
for box in self.PPlabel[imgidx]:
shapes.append((box['transcription'], box['points'], None, None, box['difficult']))
self.loadLabels(shapes)
self.canvas.verified = False
......@@ -1646,9 +1666,37 @@ class MainWindow(QMainWindow, WindowMixin):
else:
return fullFilePath
return ''
def saveLockedShapes(self):
self.canvas.lockedShapes = []
self.canvas.selectedShapes = []
for s in self.canvas.shapes:
if s.line_color == DEFAULT_LOCK_COLOR:
self.canvas.selectedShapes.append(s)
self.lockSelectedShape()
for s in self.canvas.shapes:
if s.line_color == DEFAULT_LOCK_COLOR:
self.canvas.selectedShapes.remove(s)
self.canvas.shapes.remove(s)
def _saveFile(self, annotationFilePath, mode='Manual'):
if len(self.canvas.lockedShapes) != 0:
self.saveLockedShapes()
if mode == 'Manual':
self.result_dic_locked = []
img = cv2.imread(self.filePath)
width, height = self.image.width(), self.image.height()
for shape in self.canvas.lockedShapes:
box = [[int(p[0]*width), int(p[1]*height)] for p in shape['ratio']]
assert len(box) == 4
result = [(shape['transcription'],1)]
result.insert(0, box)
self.result_dic_locked.append(result)
self.result_dic += self.result_dic_locked
self.result_dic_locked = []
if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode):
self.setClean()
self.statusBar().showMessage('Saved to %s' % annotationFilePath)
......@@ -1663,13 +1711,13 @@ class MainWindow(QMainWindow, WindowMixin):
self.savePPlabel(mode='Auto')
self.fileListWidget.insertItem(int(currIndex), item)
self.openNextImg()
if not self.canvas.isInTheSameImage:
self.openNextImg()
self.actions.saveRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True)
elif mode == 'Auto':
if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode):
self.setClean()
self.statusBar().showMessage('Saved to %s' % annotationFilePath)
self.statusBar().show()
......@@ -1733,7 +1781,9 @@ class MainWindow(QMainWindow, WindowMixin):
if discardChanges == QMessageBox.No:
return True
elif discardChanges == QMessageBox.Yes:
self.canvas.isInTheSameImage = True
self.saveFile()
self.canvas.isInTheSameImage = False
return True
else:
return False
......@@ -1872,6 +1922,7 @@ class MainWindow(QMainWindow, WindowMixin):
# org_box = [dic['points'] for dic in self.PPlabel[self.getImglabelidx(self.filePath)]]
if self.canvas.shapes:
self.result_dic = []
self.result_dic_locked = [] # result_dic_locked stores the ocr result of self.canvas.lockedShapes
rec_flag = 0
for shape in self.canvas.shapes:
box = [[int(p.x()), int(p.y())] for p in shape.points]
......@@ -1883,21 +1934,32 @@ class MainWindow(QMainWindow, WindowMixin):
return
result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] != '':
result.insert(0, box)
print('result in reRec is ', result)
self.result_dic.append(result)
if shape.line_color == DEFAULT_LOCK_COLOR:
shape.label = result[0][0]
result.insert(0, box)
self.result_dic_locked.append(result)
else:
result.insert(0, box)
self.result_dic.append(result)
else:
print('Can not recognise the box')
self.result_dic.append([box,(self.noLabelText,0)])
if self.noLabelText == shape.label or result[1][0] == shape.label:
print('label no change')
else:
rec_flag += 1
if len(self.result_dic) > 0 and rec_flag > 0:
if shape.line_color == DEFAULT_LOCK_COLOR:
shape.label = result[0][0]
self.result_dic_locked.append([box,(self.noLabelText,0)])
else:
self.result_dic.append([box,(self.noLabelText,0)])
try:
if self.noLabelText == shape.label or result[1][0] == shape.label:
print('label no change')
else:
rec_flag += 1
except IndexError as e:
print('Can not recognise the box')
if (len(self.result_dic) > 0 and rec_flag > 0)or self.canvas.lockedShapes:
self.canvas.isInTheSameImage = True
self.saveFile(mode='Auto')
self.loadFile(self.filePath)
self.canvas.isInTheSameImage = False
self.setDirty()
elif len(self.result_dic) == len(self.canvas.shapes) and rec_flag == 0:
QMessageBox.information(self, "Information", "The recognition result remains unchanged!")
......@@ -2107,6 +2169,44 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelList.clearSelection()
self._noSelectionSlot = False
self.canvas.loadShapes(shapes, replace=replace)
print("loadShapes")#1
def lockSelectedShape(self):
"""lock the selsected shapes.
Add self.selectedShapes to lock self.canvas.lockedShapes,
which holds the ratio of the four coordinates of the locked shapes
to the width and height of the image
"""
width, height = self.image.width(), self.image.height()
def format_shape(s):
return dict(label=s.label, # str
line_color=s.line_color.getRgb(),
fill_color=s.fill_color.getRgb(),
ratio=[[int(p.x())/width, int(p.y())/height] for p in s.points], # QPonitF
# add chris
difficult=s.difficult) # bool
#lock
if len(self.canvas.lockedShapes) == 0:
for s in self.canvas.selectedShapes:
s.line_color = DEFAULT_LOCK_COLOR
s.locked = True
shapes = [format_shape(shape) for shape in self.canvas.selectedShapes]
trans_dic = []
for box in shapes:
trans_dic.append({"transcription": box['label'], "ratio": box['ratio'], 'difficult': box['difficult']})
self.canvas.lockedShapes = trans_dic
self.actions.save.setEnabled(True)
#unlock
else:
for s in self.canvas.shapes:
s.line_color = DEFAULT_LINE_COLOR
self.canvas.lockedShapes = []
self.result_dic_locked = []
self.setDirty()
self.actions.save.setEnabled(True)
def inverted(color):
......
......@@ -78,14 +78,14 @@ PPOCRLabel # run
```bash
cd PaddleOCR/PPOCRLabel
python3 setup.py bdist_wheel
pip3 install dist/PPOCRLabel-1.0.0-py2.py3-none-any.whl
pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl
```
#### 1.2.3 Run PPOCRLabel by Python Script
```bash
cd ./PPOCRLabel # Switch to the PPOCRLabel directory
python PPOCRLabel.py --lang ch
python PPOCRLabel.py
```
......
......@@ -78,7 +78,7 @@ PPOCRLabel --lang ch # 启动
```bash
cd PaddleOCR/PPOCRLabel
python3 setup.py bdist_wheel
pip3 install dist/PPOCRLabel-1.0.0-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple
pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple
```
#### 1.2.3 通过Python脚本运行PPOCRLabel
......
......@@ -87,6 +87,10 @@ class Canvas(QWidget):
#initialisation for panning
self.pan_initial_pos = QPoint()
#lockedshapes related
self.lockedShapes = []
self.isInTheSameImage = False
def setDrawingColor(self, qColor):
self.drawingLineColor = qColor
self.drawingRectColor = qColor
......
此差异已折叠。
......@@ -30,6 +30,7 @@ DEFAULT_SELECT_LINE_COLOR = QColor(255, 255, 255)
DEFAULT_SELECT_FILL_COLOR = QColor(0, 128, 255, 155)
DEFAULT_VERTEX_FILL_COLOR = QColor(0, 255, 0, 255)
DEFAULT_HVERTEX_FILL_COLOR = QColor(255, 0, 0)
DEFAULT_LOCK_COLOR = QColor(255, 0, 255)
MIN_Y_LABEL = 10
......@@ -57,7 +58,7 @@ class Shape(object):
self.selected = False
self.difficult = difficult
self.paintLabel = paintLabel
self.locked = False
self._highlightIndex = None
self._highlightMode = self.NEAR_VERTEX
self._highlightSettings = {
......
......@@ -60,7 +60,7 @@ class StringBundle:
def __createLookupFallbackList(self, localeStr):
resultPaths = []
basePath = "\strings" if os.name == 'nt' else ":/strings"
basePath = "\strings" if os.name == 'nt' else "/strings"
resultPaths.append(basePath)
if localeStr is not None:
# Don't follow standard BCP47. Simple fallback
......
......@@ -104,4 +104,6 @@ singleRe=Re-recognition RectBox
labelDialogOption=Pop-up Label Input Dialog
undo=Undo
undoLastPoint=Undo Last Point
autoSaveMode=Auto Export Label Mode
\ No newline at end of file
autoSaveMode=Auto Export Label Mode
lockBox=Lock selected box/Unlock all box
lockBoxDetail=Lock selected box/Unlock all box
\ No newline at end of file
......@@ -104,4 +104,6 @@ singleRe=重识别此区块
labelDialogOption=弹出标记输入框
undo=撤销
undoLastPoint=撤销上个点
autoSaveMode=自动导出标记结果
\ No newline at end of file
autoSaveMode=自动导出标记结果
lockBox=锁定框/解除锁定框
lockBoxDetail=若当前没有框处于锁定状态则锁定选中的框,若存在锁定框则解除所有锁定框的锁定状态
......@@ -33,7 +33,7 @@ setup(
package_dir={'PPOCRLabel': ''},
include_package_data=True,
entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
version='1.0.0',
version='1.0.2',
install_requires=requirements,
license='Apache License 2.0',
description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',
......
......@@ -39,7 +39,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
- General PP-OCR server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M
- Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition
- Support multi-language recognition: about 80 languages like Korean, Japanese, German, French, etc
- document structurize system PP-Structure
- PP-Structure: a document structurize system
- support layout analysis and table recognition (support export to Excel)
- support key information extraction
- support DocVQA
......@@ -90,7 +90,7 @@ Mobile DEMO experience (based on EasyEdge and Paddle-Lite, supports iOS and Andr
| Model introduction | Model name | Recommended scene | Detection model | Direction classifier | Recognition model |
| ------------------------------------------------------------ | ---------------------------- | ----------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Chinese and English ultra-lightweight PP-OCRv2 model(11.6M) | ch_PP-OCRv2_xx |Mobile & Server|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/ch/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
| Chinese and English ultra-lightweight PP-OCRv2 model(11.6M) | ch_PP-OCRv2_xx |Mobile & Server|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
| Chinese and English ultra-lightweight PP-OCR model (9.4M) | ch_ppocr_mobile_v2.0_xx | Mobile & server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) |
| Chinese and English general PP-OCR model (143.4M) | ch_ppocr_server_v2.0_xx | Server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_traingit.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) |
......@@ -102,11 +102,11 @@ For a new language request, please refer to [Guideline for new language_requests
## Tutorials
- [Environment Preparation](./doc/doc_en/environment_en.md)
- [Quick Start](./doc/doc_en/quickstart_en.md)
- [PaddleOCR Overview and Installation](./doc/doc_en/paddleOCR_overview_en.md)
- [PaddleOCR Overview and Project Clone](./doc/doc_en/paddleOCR_overview_en.md)
- PP-OCR Industry Landing: from Training to Deployment
- [PP-OCR Model and Configuration](./doc/doc_en/models_and_config_en.md)
- [PP-OCR Model Zoo](./doc/doc_en/models_en.md)
- [PP-OCR Model Download](./doc/doc_en/models_list_en.md)
- [Python Inference for PP-OCR Model Library](./doc/doc_en/inference_ppocr_en.md)
- [Python Inference for PP-OCR Model Zoo](./doc/doc_en/inference_ppocr_en.md)
- [PP-OCR Training](./doc/doc_en/training_en.md)
- [Text Detection](./doc/doc_en/detection_en.md)
- [Text Recognition](./doc/doc_en/recognition_en.md)
......
......@@ -54,8 +54,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- 加入社区:微信扫描下方二维码加入官方交流群,与各行各业开发者充分交流,期待您的加入。
- 社区贡献:[社区贡献](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙、也是帮助优质项目宣传的广播站。如果您的OCR项目未被收集在文档中,可根据文档说明与我们联系。最新社区贡献可查看[此处](#社区贡献)
- 社区常规赛:作为社区贡献的具体承载形式,社区常规赛是面向OCR开发者的积分赛事。首届社区常规赛与《动手学OCR · 十讲》课程联合推广,课程详情可参考[链接](https://aistudio.baidu.com/aistudio/course/introduce/25207),课程奖励与作业说明可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)
- 社区常规赛:作为社区贡献的具体承载形式,社区常规赛是面向OCR开发者的积分赛事。首届社区常规赛与[《动手学OCR · 十讲》课程](https://aistudio.baidu.com/aistudio/course/introduce/25207)联合推广。社区常规赛的赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)
<div align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
......@@ -64,22 +63,33 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
## 零代码体验
- 在线网站体验:超轻量PP-OCR mobile模型体验地址:https://www.paddlepaddle.org.cn/hub/scene/ocr
- 移动端:[安装包DEMO下载地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统)
<a name="模型下载"></a>
## PP-OCR系列模型列表(更新中)
| 模型简介 | 模型名称 | 推荐场景 | 检测模型 | 方向分类器 | 识别模型 |
| ------------------------------------- | ----------------------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| 中英文超轻量PP-OCRv2模型(13.0M) | ch_PP-OCRv2_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) |
| 中英文超轻量PP-OCR mobile模型(9.4M) | ch_ppocr_mobile_v2.0_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
| 中英文通用PP-OCR server模型(143.4M) | ch_ppocr_server_v2.0_xx | 服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
更多模型下载(包括多语言),可以参考[PP-OCR 系列模型下载](./doc/doc_ch/models_list.md)
## 文档教程
- [运行环境准备](./doc/doc_ch/environment.md)
- [快速开始(中英文/多语言/文档分析)](./doc/doc_ch/quickstart.md)
- [PaddleOCR全景图与项目克隆](./doc/doc_ch/paddleOCR_overview.md)
- PP-OCR产业落地:从训练到部署
- [PP-OCR模型与配置文件](./doc/doc_ch/models_and_config.md)
- [PP-OCR模型](./doc/doc_ch/models.md)
- [PP-OCR模型下载](./doc/doc_ch/models_list.md)
- [PP-OCR模型库快速推理](./doc/doc_ch/inference_ppocr.md)
- [PP-OCR模型训练](./doc/doc_ch/training.md)
- [文本检测](./doc/doc_ch/detection.md)
- [文本识别](./doc/doc_ch/recognition.md)
- [文本方向分类器](./doc/doc_ch/angle_class.md)
- [知识蒸馏](./doc/doc_ch/knowledge_distillation.md)
- [配置文件内容与生成](./doc/doc_ch/config.md)
- PP-OCR模型推理部署
- [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
......@@ -121,7 +131,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
</div>
[1] PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测、检测框矫正和CRNN文本识别三部分组成。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941
[2] PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和Enhanced CTC loss损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)
[2] PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和[Enhanced CTC loss](./doc/doc_ch/enhanced_ctc_loss.md)损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)
<a name="效果展示"></a>
......
......@@ -21,6 +21,7 @@ Architecture:
model_type: det
Models:
Teacher:
pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy
freeze_params: true
return_all_feats: false
model_type: det
......@@ -36,6 +37,7 @@ Architecture:
name: DBHead
k: 50
Student:
pretrained:
freeze_params: false
return_all_feats: false
model_type: det
......@@ -52,6 +54,7 @@ Architecture:
name: DBHead
k: 50
Student2:
pretrained:
freeze_params: false
return_all_feats: false
model_type: det
......
......@@ -18,6 +18,7 @@ Global:
Architecture:
name: DistillationModel
algorithm: Distillation
model_type: det
Models:
Student:
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
......
......@@ -18,6 +18,7 @@ Global:
Architecture:
name: DistillationModel
algorithm: Distillation
model_type: det
Models:
Student:
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
......
......@@ -18,8 +18,8 @@ python3.7 -m pip install paddle2onnx
- 安装 ONNX
```
# 建议安装 1.4.0 版本,可根据环境更换版本号
python3.7 -m pip install onnxruntime==1.4.0
# 建议安装 1.9.0 版本,可根据环境更换版本号
python3.7 -m pip install onnxruntime==1.9.0
```
## 2. 模型转换
......@@ -47,13 +47,15 @@ paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \
--params_filename=inference.pdiparams \
--save_file=./inference/det_mobile_onnx/model.onnx \
--opset_version=10 \
--input_shape_dict="{'x': [-1, 3, -1, -1]}" \
--enable_onnx_checker=True
```
执行完毕后,ONNX 模型会被保存在 `./inference/det_mobile_onnx/` 路径下
* 注意:以下几个模型暂不支持转换为 ONNX 模型:
NRTR、SAR、RARE、SRN
* 注意:对于OCR模型,转化过程中必须采用动态shape的形式,即加入选项--input_shape_dict="{'x': [-1, 3, -1, -1]}",否则预测结果可能与直接使用Paddle预测有细微不同。
另外,以下几个模型暂不支持转换为 ONNX 模型:
NRTR、SAR、RARE、SRN
## 3. onnx 预测
......@@ -72,5 +74,3 @@ root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296]
The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289
The visualized image saved in ./inference_results/det_res_1.jpg
```
* 注意:ONNX暂时不支持变长预测,需要将输入resize到固定输入,预测结果可能与直接使用Paddle预测有细微不同。
......@@ -57,7 +57,7 @@ PaddleOCR基于动态图开源的文本识别算法列表:
- [x] SAR([paper](https://arxiv.org/abs/1811.00751v2))
- [x] SEED([paper](https://arxiv.org/pdf/2005.10977.pdf))
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
参考[DTRB](https://arxiv.org/abs/1904.01906)[3]文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|---|---|---|---|---|
......
......@@ -146,14 +146,14 @@ PaddleOCR欢迎大家向repo中积极贡献代码,下面给出一些贡献代
-`远程仓库` Clone到本地
```
# 拉取develop分支的代码
# 拉取dygraph分支的代码
git clone https://github.com/{your_name}/PaddleOCR.git -b dygraph
cd PaddleOCR
```
> 多数情况下clone失败是由于网络原因,请稍后重试或配置代理
#### 3.2.2 和 `远程仓库` 建立连接
#### 3.2.2 通过Token方式登录与建立连接
首先查看当前 `远程仓库` 的信息。
......@@ -163,7 +163,24 @@ git remote -v
# origin https://github.com/{your_name}/PaddleOCR.git (push)
```
只有clone的 `远程仓库` 的信息,也就是自己用户名下的 PaddleOCR,接下来我们创建一个原始 PaddleOCR 仓库的远程主机,命名为 upstream。
只有clone的 `远程仓库` 的信息,也就是自己用户名下的 PaddleOCR。由于Github的登录方式变化,需要通过Token的方式重新配置 `远程仓库` 的地址。生成Token的方式如下:
1. 找到个人访问令牌(token):在Github页面右上角点击自己的头像,然后依次选择 Settings --> Developer settings --> Personal access tokens
2. 点击 Generate new token:在Note中填入token名称,例如’paddle‘。在Select scopes选择repo(必选)、admin:repo_hook、delete_repo等,可根据自身需要勾选。然后点击Generate token生成token。最后复制生成的token。
删除原始的origin配置
```
git remote rm origin
```
将remote分支改成 `https://oauth2:{token}@github.com/{your_name}/PaddleOCR.git`。例如:如果token值为12345,你的用户名为PPOCR,则运行下方命令
```
git remote add origin https://oauth2:12345@github.com/PPOCR/PaddleOCR.git
```
这样我们就与自己的 `远程仓库` 建立了连接。接下来我们创建一个原始 PaddleOCR 仓库的远程主机,命名为 upstream。
```
git remote add upstream https://github.com/PaddlePaddle/PaddleOCR.git
......@@ -172,8 +189,8 @@ git remote add upstream https://github.com/PaddlePaddle/PaddleOCR.git
使用 `git remote -v` 查看当前 `远程仓库` 的信息,输出如下,发现包括了origin和upstream 2个 `远程仓库`
```
origin https://github.com/{your_name}/PaddleOCR.git (fetch)
origin https://github.com/{your_name}/PaddleOCR.git (push)
origin https://oauth2:{token}@github.com/{your_name}/PaddleOCR.git (fetch)
origin https://oauth2:{token}@github.com/{your_name}/PaddleOCR.git (push)
upstream https://github.com/PaddlePaddle/PaddleOCR.git (fetch)
upstream https://github.com/PaddlePaddle/PaddleOCR.git (push)
```
......@@ -182,21 +199,22 @@ upstream https://github.com/PaddlePaddle/PaddleOCR.git (push)
#### 3.2.3 创建本地分支
可以基于当前分支创建新的本地分支,命令如下
首先获取 upstream 的最新代码,然后基于上游仓库 (upstream)的dygraph创建new_branch分支
```
git checkout -b new_branch
git fetch upstream
git checkout -b new_branch upstream/dygraph
```
也可以基于远程或者上游的分支创建新的分支,命令如下。
```
# 基于用户远程仓库(origin)的develop创建new_branch分支
git checkout -b new_branch origin/develop
# 基于上游远程仓库(upstream)的develop创建new_branch分支
# 如果需要从upstream创建新的分支,需要首先使用git fetch upstream获取上游代码
git checkout -b new_branch upstream/develop
```
> 如果对于新Fork的PaddleOCR项目,用户远程仓库(origin)与上游(upstream)仓库的分支更新情况相同,也可以基于origin仓库的默认分支或指定分支创建新的本地分支,命令如下。
>
> ```
> # 基于用户远程仓库(origin)的dygraph创建new_branch分支
> git checkout -b new_branch origin/dygraph
>
> # 基于用户远程仓库(origin)的默认分支创建new_branch分支
> git checkout -b new_branch
> ```
最终会显示切换到新的分支,输出信息如下
......@@ -205,6 +223,8 @@ Branch new_branch set up to track remote branch develop from upstream.
Switched to a new branch 'new_branch'
```
切换分支之后即可在此分支上进行文件改动
#### 3.2.4 使用pre-commit勾子
Paddle 开发人员使用 pre-commit 工具来管理 Git 预提交钩子。 它可以帮助我们格式化源代码(C++,Python),在提交(commit)前自动检查一些基本事宜(如每个文件只有一个 EOL,Git 中不要添加大文件等)。
......@@ -234,23 +254,15 @@ pre-commit
![img](../precommit_pass.png)
使用下面的命令完成提交。
提交修改,并写明修改内容("your commit info")
```
git commit -m "your commit info"
git commit -m "your commit info"
```
#### 3.2.6 保持本地仓库最新
获取 upstream 的最新代码并更新当前分支。这里的upstream来自于2.2节的`和远程仓库建立连接`部分。
#### 3.2.6 Push到远程仓库
```
git fetch upstream
# 如果是希望提交到其他分支,则需要从upstream的其他分支pull代码,这里是develop
git pull upstream develop
```
#### 3.2.7 push到远程仓库
使用push命令将修改的commit提交到 `远程仓库`
```
git push origin new_branch
......@@ -258,7 +270,7 @@ git push origin new_branch
#### 3.2.7 提交Pull Request
点击new pull request,选择本地分支和目标分支,如下图所示。在PR的描述说明中,填写该PR所完成的功能。接下来等待review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。
打开自己的远程仓库界面,选择提交的分支。点击new pull request或contribute进入PR界面。选择本地分支和目标分支,如下图所示。在PR的描述说明中,填写该PR所完成的功能。接下来等待review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。
![banner](../pr.png)
......@@ -285,8 +297,8 @@ git push origin new_branch
- 删除本地分支
```
# 切换到develop分支,否则无法删除当前分支
git checkout develop
# 切换到dygraph分支,否则无法删除当前分支
git checkout dygraph
# 删除new_branch分支
git branch -D new_branch
......@@ -310,7 +322,6 @@ git push origin new_branch
- 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。
3)如果解决了某个Issue的问题,请在该Pull Request的第一个评论框中加上:fix #issue_number,这样当该Pull Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)
此外,在回复评审人意见时,请您遵守以下约定:
......
......@@ -78,11 +78,11 @@ json.dumps编码前的图像标注信息是包含多个字典的list,字典中
cd PaddleOCR/
# 根据backbone的不同选择下载对应的预训练模型
# 下载MobileNetV3的预训练模型
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams
# 或,下载ResNet18_vd的预训练模型
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_vd_pretrained.pdparams
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams
# 或,下载ResNet50_vd的预训练模型
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet50_vd_ssld_pretrained.pdparams
```
<a name="2-----"></a>
......
......@@ -527,7 +527,6 @@ PostProcess:
关于`DistillationDBPostProcess`更加具体的实现可以参考: [db_postprocess.py](../../ppocr/postprocess/db_postprocess.py#L195)
<a name="224"></a>
#### 2.2.4 蒸馏指标计算
......
# PP-OCR模型与配置文件
PP-OCR模型与配置文件一章主要补充一些OCR模型的基本概念、配置文件的内容与作用以便对模型后续的参数调整和训练中拥有更好的体验。
本章包含三个部分,首先在[PP-OCR模型下载](./models_list.md)中解释PP-OCR模型的类型概念,并提供所有模型的下载链接。然后在[配置文件内容与生成](./config.md)中详细说明调整PP-OCR模型所需的参数。最后的[模型库快速使用](./inference_ppocr.md)是对第一节PP-OCR模型库使用方法的介绍,可以通过Python推理引擎快速利用丰富的模型库模型获得测试结果。
------
下面我们首先了解一些OCR相关的基本概念:
- [1. OCR 简要介绍](#1-ocr-----)
* [1.1 OCR 检测模型基本概念](#11-ocr---------)
* [1.2 OCR 识别模型基本概念](#12-ocr---------)
* [1.3 PP-OCR模型](#13-pp-ocr--)
<a name="1-ocr-----"></a>
## 1. OCR 简要介绍
本节简要介绍OCR检测模型、识别模型的基本概念,并介绍PaddleOCR的PP-OCR模型。
OCR(Optical Character Recognition,光学字符识别)目前是文字识别的统称,已不限于文档或书本文字识别,更包括识别自然场景下的文字,又可以称为STR(Scene Text Recognition)。
OCR文字识别一般包括两个部分,文本检测和文本识别;文本检测首先利用检测算法检测到图像中的文本行;然后检测到的文本行用识别算法去识别到具体文字。
<a name="11-ocr---------"></a>
### 1.1 OCR 检测模型基本概念
文本检测就是要定位图像中的文字区域,然后通常以边界框的形式将单词或文本行标记出来。传统的文字检测算法多是通过手工提取特征的方式,特点是速度快,简单场景效果好,但是面对自然场景,效果会大打折扣。当前多是采用深度学习方法来做。
基于深度学习的文本检测算法可以大致分为以下几类:
1. 基于目标检测的方法;一般是预测得到文本框后,通过NMS筛选得到最终文本框,多是四点文本框,对弯曲文本场景效果不理想。典型算法为EAST、Text Box等方法。
2. 基于分割的方法;将文本行当成分割目标,然后通过分割结果构建外接文本框,可以处理弯曲文本,对于文本交叉场景问题效果不理想。典型算法为DB、PSENet等方法。
3. 混合目标检测和分割的方法;
<a name="12-ocr---------"></a>
### 1.2 OCR 识别模型基本概念
OCR识别算法的输入数据一般是文本行,背景信息不多,文字占据主要部分,识别算法目前可以分为两类算法:
1. 基于CTC的方法;即识别算法的文字预测模块是基于CTC的,常用的算法组合为CNN+RNN+CTC。目前也有一些算法尝试在网络中加入transformer模块等等。
2. 基于Attention的方法;即识别算法的文字预测模块是基于Attention的,常用算法组合是CNN+RNN+Attention。
<a name="13-pp-ocr--"></a>
### 1.3 PP-OCR模型
PaddleOCR 中集成了很多OCR算法,文本检测算法有DB、EAST、SAST等等,文本识别算法有CRNN、RARE、StarNet、Rosetta、SRN等算法。
其中PaddleOCR针对中英文自然场景通用OCR,推出了PP-OCR系列模型,PP-OCR模型由DB+CRNN算法组成,利用海量中文数据训练加上模型调优方法,在中文场景上具备较高的文本检测识别能力。并且PaddleOCR推出了高精度超轻量PP-OCRv2模型,检测模型仅3M,识别模型仅8.5M,利用[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)的模型量化方法,可以在保持精度不降低的情况下,将检测模型压缩到0.8M,识别压缩到3M,更加适用于移动端部署场景。
......@@ -143,8 +143,10 @@ PaddleOCR主要聚焦通用OCR,如果有垂类需求,您可以用PaddleOCR+
具体的训练教程可点击下方链接跳转:
\- [文本检测模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/detection.md)
- [文本检测模型训练](./detection.md)
\- [文本识别模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/recognition.md)
- [文本识别模型训练](./recognition.md)
- [文本方向分类器训练](./angle_class.md)
- [知识蒸馏](./knowledge_distillation.md)
\- [文本方向分类器训练](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/angle_class.md)
......@@ -67,11 +67,11 @@ And the responding download link of backbone pretrain weights can be found in (h
```shell
cd PaddleOCR/
# Download the pre-trained model of MobileNetV3
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams
# or, download the pre-trained model of ResNet18_vd
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_vd_pretrained.pdparams
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams
# or, download the pre-trained model of ResNet50_vd
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet50_vd_ssld_pretrained.pdparams
```
......
# PP-OCR Model and Configuration
The chapter on PP-OCR model and configuration file mainly adds some basic concepts of OCR model and the content and role of configuration file to have a better experience in the subsequent parameter adjustment and training of the model.
This chapter contains three parts. Firstly, [PP-OCR Model Download](. /models_list_en.md) explains the concept of PP-OCR model types and provides links to download all models. Then in [Yml Configuration](. /config_en.md) details the parameters needed to fine-tune the PP-OCR models. The final [Python Inference for PP-OCR Model Library](. /inference_ppocr_en.md) is an introduction to the use of the PP-OCR model library in the first section, which can quickly utilize the rich model library models to obtain test results through the Python inference engine.
------
Let's first understand some basic concepts.
- [INTRODUCTION ABOUT OCR](#introduction-about-ocr)
* [BASIC CONCEPTS OF OCR DETECTION MODEL](#basic-concepts-of-ocr-detection-model)
* [Basic concepts of OCR recognition model](#basic-concepts-of-ocr-recognition-model)
* [PP-OCR model](#pp-ocr-model)
* [And a table of contents](#and-a-table-of-contents)
* [On the right](#on-the-right)
## 1. INTRODUCTION ABOUT OCR
This section briefly introduces the basic concepts of OCR detection model and recognition model, and introduces PaddleOCR's PP-OCR model.
OCR (Optical Character Recognition, Optical Character Recognition) is currently the general term for text recognition. It is not limited to document or book text recognition, but also includes recognizing text in natural scenes. It can also be called STR (Scene Text Recognition).
OCR text recognition generally includes two parts, text detection and text recognition. The text detection module first uses detection algorithms to detect text lines in the image. And then the recognition algorithm to identify the specific text in the text line.
### 1.1 BASIC CONCEPTS OF OCR DETECTION MODEL
Text detection can locate the text area in the image, and then usually mark the word or text line in the form of a bounding box. Traditional text detection algorithms mostly extract features manually, which are characterized by fast speed and good effect in simple scenes, but the effect will be greatly reduced when faced with natural scenes. Currently, deep learning methods are mostly used.
Text detection algorithms based on deep learning can be roughly divided into the following categories:
1. Method based on target detection. Generally, after the text box is predicted, the final text box is filtered through NMS, which is mostly four-point text box, which is not ideal for curved text scenes. Typical algorithms are methods such as EAST and Text Box.
2. Method based on text segmentation. The text line is regarded as the segmentation target, and then the external text box is constructed through the segmentation result, which can handle curved text, and the effect is not ideal for the text cross scene problem. Typical algorithms are DB, PSENet and other methods.
3. Hybrid target detection and segmentation method.
### 1.2 Basic concepts of OCR recognition model
The input of the OCR recognition algorithm is generally text lines images which has less background information, and the text information occupies the main part. The recognition algorithm can be divided into two types of algorithms:
1. CTC-based method. The text prediction module of the recognition algorithm is based on CTC, and the commonly used algorithm combination is CNN+RNN+CTC. There are also some algorithms that try to add transformer modules to the network and so on.
2. Attention-based method. The text prediction module of the recognition algorithm is based on Attention, and the commonly used algorithm combination is CNN+RNN+Attention.
### 1.3 PP-OCR model
PaddleOCR integrates many OCR algorithms, text detection algorithms include DB, EAST, SAST, etc., text recognition algorithms include CRNN, RARE, StarNet, Rosetta, SRN and other algorithms.
Among them, PaddleOCR has released the PP-OCR series model for the general OCR in Chinese and English natural scenes. The PP-OCR model is composed of the DB+CRNN algorithm. It uses massive Chinese data training and model tuning methods to have high text detection and recognition capabilities in Chinese scenes. And PaddleOCR has launched a high-precision and ultra-lightweight PP-OCRv2 model. The detection model is only 3M, and the recognition model is only 8.5M. Using [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)'s model quantification method, the detection model can be compressed to 0.8M without reducing the accuracy. The recognition is compressed to 3M, which is more suitable for mobile deployment scenarios.
doc/joinus.PNG

188.5 KB | W: | H:

doc/joinus.PNG

193.0 KB | W: | H:

doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
  • 2-up
  • Swipe
  • Onion skin
......@@ -344,7 +344,7 @@ class KieLabelEncode(object):
max_num = 300
temp_bboxes = np.zeros([max_num, 4])
h, _ = bboxes.shape
temp_bboxes[:h, :h] = bboxes
temp_bboxes[:h, :] = bboxes
temp_relations = np.zeros([max_num, max_num, 5])
temp_relations[:h, :h, :] = relations
......
......@@ -23,7 +23,6 @@ import sys
import six
import cv2
import numpy as np
import fasttext
class DecodeImage(object):
......@@ -136,6 +135,7 @@ class ToCHWImage(object):
class Fasttext(object):
def __init__(self, path="None", **kwargs):
import fasttext
self.fast_model = fasttext.load_model(path)
def __call__(self, data):
......
......@@ -111,13 +111,16 @@ def load_pretrained_params(model, path):
params = paddle.load(path + '.pdparams')
state_dict = model.state_dict()
new_state_dict = {}
for k1, k2 in zip(state_dict.keys(), params.keys()):
if list(state_dict[k1].shape) == list(params[k2].shape):
new_state_dict[k1] = params[k2]
for k1 in params.keys():
if k1 not in state_dict.keys():
logger.warning("The pretrained params {} not in model".format(k1))
else:
logger.warning(
"The shape of model params {} {} not matched with loaded params {} {} !".
format(k1, state_dict[k1].shape, k2, params[k2].shape))
if list(state_dict[k1].shape) == list(params[k1].shape):
new_state_dict[k1] = params[k1]
else:
logger.warning(
"The shape of model params {} {} not matched with loaded params {} {} !".
format(k1, state_dict[k1].shape, k1, params[k1].shape))
model.set_state_dict(new_state_dict)
logger.info("load pretrain successful from {}".format(path))
return model
......
......@@ -96,10 +96,7 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR
- **(3)安装PaddleNLP**
```bash
# 需要使用PaddleNLP最新的代码版本进行安装
git clone https://github.com/PaddlePaddle/PaddleNLP -b develop
cd PaddleNLP
pip3 install -e .
pip3 install "paddlenlp>=2.2.1"
```
......
......@@ -3,7 +3,7 @@ model_name:PPOCRv2_ocr_det_kl
python:python3.7
Global.pretrained_model:null
Global.save_inference_dir:null
infer_model:./inference/ch_PP-OCRv2_det_infer/
infer_model:./inference/ch_PP-OCRv2_det_infer
infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
infer_quant:True
inference:tools/infer/predict_det.py
......
......@@ -13,7 +13,7 @@ inference:tools/infer/predict_rec.py
--rec_batch_num:1
--use_tensorrt:False|True
--precision:int8
--det_model_dir:
--rec_model_dir:
--image_dir:./inference/rec_inference
null:null
--benchmark:True
......
......@@ -43,7 +43,7 @@ inference:tools/infer/predict_det.py
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False
--precision:fp32|fp16|int8
--precision:fp32|int8
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
null:null
......
......@@ -43,7 +43,7 @@ inference:tools/infer/predict_det.py
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False
--precision:fp32|fp16|int8
--precision:fp32|int8
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
null:null
......
......@@ -179,7 +179,7 @@ elif [ ${MODE} = "whole_infer" ];then
cd ./inference/ && tar xf rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar && cd ../
fi
if [ ${model_name} == "ch_ppocr_server_v2.0_rec" ]; then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/ch_ppocr_server_v2.0_rec_train.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar --no-check-certificate
cd ./inference/ && tar xf ch_ppocr_server_v2.0_rec_train.tar && cd ../
fi
if [ ${model_name} == "ch_ppocr_mobile_v2.0_rec" ]; then
......@@ -239,18 +239,23 @@ fi
if [ ${MODE} = "klquant_whole_infer" ]; then
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar --no-check-certificate
cd ./train_data/ && tar xf icdar2015_lite.tar
ln -s ./icdar2015_lite ./icdar2015 && cd ../
cd ./train_data/ && tar xf icdar2015_lite.tar && rm -rf ./icdar2015 && ln -s ./icdar2015_lite ./icdar2015 && cd ../
if [ ${model_name} = "ch_ppocr_mobile_v2.0_det_KL" ]; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate
cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../
fi
if [ ${model_name} = "ch_PPOCRv2_det" ]; then
eval_model_name="ch_PP-OCRv2_det_infer"
if [ ${model_name} = "PPOCRv2_ocr_rec_kl" ]; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar --no-check-certificate
cd ./train_data/ && tar xf ic15_data.tar && cd ../
cd ./inference && tar xf rec_inference.tar && tar xf ch_PP-OCRv2_rec_infer.tar && cd ../
fi
if [ ${model_name} = "PPOCRv2_ocr_det_kl" ]; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate
cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate
cd ./inference && tar xf ch_PP-OCRv2_det_infer.tar && tar xf ch_det_data_50.tar && cd ../
fi
if [ ${model_name} = "ch_ppocr_mobile_v2.0_rec_KL" ]; then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate
......
......@@ -183,7 +183,7 @@ function func_inference(){
if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
continue
fi
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
if [[ ${use_trt} = "False" && ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
continue
fi
for batch_size in ${batch_size_list[*]}; do
......@@ -227,7 +227,12 @@ if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
for infer_model in ${infer_model_dir_list[*]}; do
# run export
if [ ${infer_run_exports[Count]} != "null" ];then
save_infer_dir=$(dirname $infer_model)
if [ ${MODE} = "klquant_whole_infer" ]; then
save_infer_dir="${infer_model}_klquant"
fi
if [ ${MODE} = "whole_infer" ]; then
save_infer_dir="${infer_model}"
fi
set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}"
......@@ -259,7 +264,6 @@ else
env=""
elif [ ${#gpu} -le 1 ];then
env="export CUDA_VISIBLE_DEVICES=${gpu}"
eval ${env}
elif [ ${#gpu} -le 15 ];then
IFS=","
array=(${gpu})
......@@ -280,6 +284,7 @@ else
set_amp_config=" "
fi
for trainer in ${trainer_list[*]}; do
eval ${env}
flag_quant=False
if [ ${trainer} = ${pact_key} ]; then
run_train=${pact_trainer}
......@@ -332,7 +337,6 @@ else
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
fi
# run train
eval "unset CUDA_VISIBLE_DEVICES"
eval $cmd
status_check $? "${cmd}" "${status_log}"
......
......@@ -101,17 +101,22 @@ class TextDetector(object):
else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0)
if self.use_onnx:
pre_process_list[0] = {
'DetResizeForTest': {
'image_shape': [640, 640]
}
}
self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
args, 'det', logger)
if self.use_onnx:
img_h, img_w = self.input_tensor.shape[2:]
if img_h is not None and img_w is not None and img_h > 0 and img_w > 0:
pre_process_list[0] = {
'DetResizeForTest': {
'image_shape': [img_h, img_w]
}
}
self.preprocess_op = create_operators(pre_process_list)
if args.benchmark:
import auto_log
pid = os.getpid()
......
......@@ -109,7 +109,10 @@ class TextRecognizer(object):
assert imgC == img.shape[2]
imgW = int((32 * max_wh_ratio))
if self.use_onnx:
imgW = 100
w = self.input_tensor.shape[3:][0]
if w is not None and w > 0:
imgW = w
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
......
......@@ -15,6 +15,7 @@
import argparse
import os
import sys
import platform
import cv2
import numpy as np
import paddle
......@@ -313,6 +314,10 @@ def create_predictor(args, mode, logger):
def get_infer_gpuid():
sysstr = platform.system()
if sysstr == "Windows":
return 0
if not paddle.fluid.core.is_compiled_with_rocm():
cmd = "env | grep CUDA_VISIBLE_DEVICES"
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册