diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
index 98e2b1ef77f6fb63b49cd8d32ca87d3d783daf50..517714104d1cb62f3b0c03c34843595d85502417 100644
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -11,64 +11,45 @@
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-#!/usr/bin/env python
+# !/usr/bin/env python
# -*- coding: utf-8 -*-
# pyrcc5 -o libs/resources.py resources.qrc
import argparse
import ast
import codecs
+import json
import os.path
import platform
import subprocess
import sys
from functools import partial
-from collections import defaultdict
-import json
-import cv2
-
+try:
+ from PyQt5 import QtCore, QtGui, QtWidgets
+ from PyQt5.QtGui import *
+ from PyQt5.QtCore import *
+ from PyQt5.QtWidgets import *
+except ImportError:
+ print("Please install pyqt5...")
__dir__ = os.path.dirname(os.path.abspath(__file__))
-import numpy as np
-
-
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
sys.path.append(os.path.abspath(os.path.join(__dir__, '../PaddleOCR')))
sys.path.append("..")
from paddleocr import PaddleOCR
-
-try:
- from PyQt5 import QtCore, QtGui, QtWidgets
- from PyQt5.QtGui import *
- from PyQt5.QtCore import *
- from PyQt5.QtWidgets import *
-except ImportError:
- # needed for py3+qt4
- # Ref:
- # http://pyqt.sourceforge.net/Docs/PyQt4/incompatible_apis.html
- # http://stackoverflow.com/questions/21217399/pyqt4-qtcore-qvariant-object-instead-of-a-string
- if sys.version_info.major >= 3:
- import sip
-
- sip.setapi('QVariant', 2)
- from PyQt4.QtGui import *
- from PyQt4.QtCore import *
-
-from combobox import ComboBox
from libs.constants import *
from libs.utils import *
from libs.settings import Settings
-from libs.shape import Shape, DEFAULT_LINE_COLOR, DEFAULT_FILL_COLOR,DEFAULT_LOCK_COLOR
+from libs.shape import Shape, DEFAULT_LINE_COLOR, DEFAULT_FILL_COLOR, DEFAULT_LOCK_COLOR
from libs.stringBundle import StringBundle
from libs.canvas import Canvas
from libs.zoomWidget import ZoomWidget
from libs.autoDialog import AutoDialog
from libs.labelDialog import LabelDialog
from libs.colorDialog import ColorDialog
-from libs.toolBar import ToolBar
from libs.ustr import ustr
from libs.hashableQListWidgetItem import HashableQListWidgetItem
from libs.editinlist import EditInList
@@ -76,31 +57,19 @@ from libs.editinlist import EditInList
__appname__ = 'PPOCRLabel'
-class WindowMixin(object):
-
- def menu(self, title, actions=None):
- menu = self.menuBar().addMenu(title)
- if actions:
- addActions(menu, actions)
- return menu
-
- def toolbar(self, title, actions=None):
- toolbar = ToolBar(title)
- toolbar.setObjectName(u'%sToolBar' % title)
- # toolbar.setOrientation(Qt.Vertical)
- toolbar.setToolButtonStyle(Qt.ToolButtonTextUnderIcon)
- if actions:
- addActions(toolbar, actions)
- self.addToolBar(Qt.LeftToolBarArea, toolbar)
- return toolbar
-
-
-class MainWindow(QMainWindow, WindowMixin):
+class MainWindow(QMainWindow):
FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = list(range(3))
- def __init__(self, lang="ch", gpu=False, defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
+ def __init__(self,
+ lang="ch",
+ gpu=False,
+ default_filename=None,
+ default_predefined_class_file=None,
+ default_save_dir=None):
super(MainWindow, self).__init__()
self.setWindowTitle(__appname__)
+ self.setWindowState(Qt.WindowMaximized) # set window max
+ self.activateWindow() # PPOCRLabel goes to the front when activate
# Load setting in the main thread
self.settings = Settings()
@@ -110,11 +79,17 @@ class MainWindow(QMainWindow, WindowMixin):
# Load string bundle for i18n
if lang not in ['ch', 'en']:
lang = 'en'
- self.stringBundle = StringBundle.getBundle(localeStr='zh-CN' if lang=='ch' else 'en') # 'en'
+ self.stringBundle = StringBundle.getBundle(localeStr='zh-CN' if lang == 'ch' else 'en') # 'en'
getStr = lambda strId: self.stringBundle.getString(strId)
- self.defaultSaveDir = defaultSaveDir
- self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=gpu, lang=lang, show_log=False)
+ self.defaultSaveDir = default_save_dir
+ self.ocr = PaddleOCR(use_pdserving=False,
+ use_angle_cls=True,
+ det=True,
+ cls=True,
+ use_gpu=gpu,
+ lang=lang,
+ show_log=False)
if os.path.exists('./data/paddle.png'):
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
@@ -132,7 +107,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelFile = None
self.currIndex = 0
-
# Whether we need to save or not.
self.dirty = False
@@ -142,7 +116,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.screencast = "https://github.com/PaddlePaddle/PaddleOCR"
# Load predefined classes to the list
- self.loadPredefinedClasses(defaultPrefdefClassFile)
+ self.loadPredefinedClasses(default_predefined_class_file)
# Main widgets and related state.
self.labelDialog = LabelDialog(parent=self, listItem=self.labelHist)
@@ -158,7 +132,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.PPreader = None
self.autoSaveNum = 5
- ################# file list ###############
+ # ================== File List ==================
self.fileListWidget = QListWidget()
self.fileListWidget.itemClicked.connect(self.fileitemDoubleClicked)
self.fileListWidget.setIconSize(QSize(25, 25))
@@ -178,12 +152,13 @@ class MainWindow(QMainWindow, WindowMixin):
fileListContainer = QWidget()
fileListContainer.setLayout(filelistLayout)
- self.filedock = QDockWidget(getStr('fileList'), self)
- self.filedock.setObjectName(getStr('files'))
- self.filedock.setWidget(fileListContainer)
- self.addDockWidget(Qt.LeftDockWidgetArea, self.filedock)
+ self.fileListName = getStr('fileList')
+ self.fileDock = QDockWidget(self.fileListName, self)
+ self.fileDock.setObjectName(getStr('files'))
+ self.fileDock.setWidget(fileListContainer)
+ self.addDockWidget(Qt.LeftDockWidgetArea, self.fileDock)
- ######## Right area ##########
+ # ================== Right Area ==================
listLayout = QVBoxLayout()
listLayout.setContentsMargins(0, 0, 0, 0)
@@ -199,7 +174,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.DelButton = QToolButton()
self.DelButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
-
lefttoptoolbox = QHBoxLayout()
lefttoptoolbox.addWidget(self.newButton)
lefttoptoolbox.addWidget(self.reRecogButton)
@@ -207,36 +181,37 @@ class MainWindow(QMainWindow, WindowMixin):
lefttoptoolboxcontainer.setLayout(lefttoptoolbox)
listLayout.addWidget(lefttoptoolboxcontainer)
-
- ################## label list ####################
+ # ================== Label List ==================
# Create and add a widget for showing current label items
self.labelList = EditInList()
labelListContainer = QWidget()
labelListContainer.setLayout(listLayout)
- #self.labelList.itemActivated.connect(self.labelSelectionChanged)
self.labelList.itemSelectionChanged.connect(self.labelSelectionChanged)
self.labelList.clicked.connect(self.labelList.item_clicked)
+
# Connect to itemChanged to detect checkbox changes.
self.labelList.itemChanged.connect(self.labelItemChanged)
- self.labelListDock = QDockWidget(getStr('recognitionResult'),self)
+ self.labelListDockName = getStr('recognitionResult')
+ self.labelListDock = QDockWidget(self.labelListDockName, self)
self.labelListDock.setWidget(self.labelList)
self.labelListDock.setFeatures(QDockWidget.NoDockWidgetFeatures)
listLayout.addWidget(self.labelListDock)
- ################## detection box ####################
+ # ================== Detection Box ==================
self.BoxList = QListWidget()
- #self.BoxList.itemActivated.connect(self.boxSelectionChanged)
+ # self.BoxList.itemActivated.connect(self.boxSelectionChanged)
self.BoxList.itemSelectionChanged.connect(self.boxSelectionChanged)
self.BoxList.itemDoubleClicked.connect(self.editBox)
# Connect to itemChanged to detect checkbox changes.
self.BoxList.itemChanged.connect(self.boxItemChanged)
- self.BoxListDock = QDockWidget(getStr('detectionBoxposition'), self)
+ self.BoxListDockName = getStr('detectionBoxposition')
+ self.BoxListDock = QDockWidget(self.BoxListDockName, self)
self.BoxListDock.setWidget(self.BoxList)
self.BoxListDock.setFeatures(QDockWidget.NoDockWidgetFeatures)
listLayout.addWidget(self.BoxListDock)
- ############ lower right area ############
+ # ================== Lower Right Area ==================
leftbtmtoolbox = QHBoxLayout()
leftbtmtoolbox.addWidget(self.SaveButton)
leftbtmtoolbox.addWidget(self.DelButton)
@@ -248,26 +223,26 @@ class MainWindow(QMainWindow, WindowMixin):
self.dock.setObjectName(getStr('labels'))
self.dock.setWidget(labelListContainer)
+ # ================== Zoom Bar ==================
+ self.imageSlider = QSlider(Qt.Horizontal)
+ self.imageSlider.valueChanged.connect(self.CanvasSizeChange)
+ self.imageSlider.setMinimum(-9)
+ self.imageSlider.setMaximum(510)
+ self.imageSlider.setSingleStep(1)
+ self.imageSlider.setTickPosition(QSlider.TicksBelow)
+ self.imageSlider.setTickInterval(1)
- ########## zoom bar #########
- self.imgsplider = QSlider(Qt.Horizontal)
- self.imgsplider.valueChanged.connect(self.CanvasSizeChange)
- self.imgsplider.setMinimum(-150)
- self.imgsplider.setMaximum(150)
- self.imgsplider.setSingleStep(1)
- self.imgsplider.setTickPosition(QSlider.TicksBelow)
- self.imgsplider.setTickInterval(1)
op = QGraphicsOpacityEffect()
op.setOpacity(0.2)
- self.imgsplider.setGraphicsEffect(op)
- # self.imgsplider.setAttribute(Qt.WA_TranslucentBackground)
- self.imgsplider.setStyleSheet("background-color:transparent")
- self.imgsliderDock = QDockWidget(getStr('ImageResize'), self)
- self.imgsliderDock.setObjectName(getStr('IR'))
- self.imgsliderDock.setWidget(self.imgsplider)
- self.imgsliderDock.setFeatures(QDockWidget.DockWidgetFloatable)
- self.imgsliderDock.setAttribute(Qt.WA_TranslucentBackground)
- self.addDockWidget(Qt.RightDockWidgetArea, self.imgsliderDock)
+ self.imageSlider.setGraphicsEffect(op)
+
+ self.imageSlider.setStyleSheet("background-color:transparent")
+ self.imageSliderDock = QDockWidget(getStr('ImageResize'), self)
+ self.imageSliderDock.setObjectName(getStr('IR'))
+ self.imageSliderDock.setWidget(self.imageSlider)
+ self.imageSliderDock.setFeatures(QDockWidget.DockWidgetFloatable)
+ self.imageSliderDock.setAttribute(Qt.WA_TranslucentBackground)
+ self.addDockWidget(Qt.RightDockWidgetArea, self.imageSliderDock)
self.zoomWidget = ZoomWidget()
self.colorDialog = ColorDialog(parent=self)
@@ -275,13 +250,13 @@ class MainWindow(QMainWindow, WindowMixin):
self.msgBox = QMessageBox()
- ########## thumbnail #########
+ # ================== Thumbnail ==================
hlayout = QHBoxLayout()
m = (0, 0, 0, 0)
hlayout.setSpacing(0)
hlayout.setContentsMargins(*m)
self.preButton = QToolButton()
- self.preButton.setIcon(newIcon("prev",40))
+ self.preButton.setIcon(newIcon("prev", 40))
self.preButton.setIconSize(QSize(40, 100))
self.preButton.clicked.connect(self.openPrevImg)
self.preButton.setStyleSheet('border: none;')
@@ -291,10 +266,10 @@ class MainWindow(QMainWindow, WindowMixin):
self.iconlist.setFlow(QListView.TopToBottom)
self.iconlist.setSpacing(10)
self.iconlist.setIconSize(QSize(50, 50))
- self.iconlist.setMovement(False)
+ self.iconlist.setMovement(QListView.Static)
self.iconlist.setResizeMode(QListView.Adjust)
self.iconlist.itemClicked.connect(self.iconitemDoubleClicked)
- self.iconlist.setStyleSheet("background-color:transparent; border: none;")
+ self.iconlist.setStyleSheet("QListWidget{ background-color:transparent; border: none;}")
self.iconlist.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.nextButton = QToolButton()
self.nextButton.setIcon(newIcon("next", 40))
@@ -307,12 +282,11 @@ class MainWindow(QMainWindow, WindowMixin):
hlayout.addWidget(self.iconlist)
hlayout.addWidget(self.nextButton)
-
iconListContainer = QWidget()
iconListContainer.setLayout(hlayout)
iconListContainer.setFixedHeight(100)
- ########### Canvas ###########
+ # ================== Canvas ==================
self.canvas = Canvas(parent=self)
self.canvas.zoomRequest.connect(self.zoomRequest)
self.canvas.setDrawingShapeToSquare(settings.get(SETTING_DRAW_SQUARE, False))
@@ -335,32 +309,17 @@ class MainWindow(QMainWindow, WindowMixin):
centerLayout = QVBoxLayout()
centerLayout.setContentsMargins(0, 0, 0, 0)
centerLayout.addWidget(scroll)
- #centerLayout.addWidget(self.icondock)
- centerLayout.addWidget(iconListContainer,0,Qt.AlignCenter)
- centercontainer = QWidget()
- centercontainer.setLayout(centerLayout)
-
- # self.scrolldock = QDockWidget('WorkSpace',self)
- # self.scrolldock.setObjectName('WorkSpace')
- # self.scrolldock.setWidget(centercontainer)
- # self.scrolldock.setFeatures(QDockWidget.NoDockWidgetFeatures)
- # orititle = self.scrolldock.titleBarWidget()
- # tmpwidget = QWidget()
- # self.scrolldock.setTitleBarWidget(tmpwidget)
- # del orititle
- self.setCentralWidget(centercontainer) #self.scrolldock
- self.addDockWidget(Qt.RightDockWidgetArea, self.dock)
-
+ centerLayout.addWidget(iconListContainer, 0, Qt.AlignCenter)
+ centerContainer = QWidget()
+ centerContainer.setLayout(centerLayout)
- # self.filedock.setFeatures(QDockWidget.DockWidgetFloatable)
- self.filedock.setFeatures(self.filedock.features() ^ QDockWidget.DockWidgetFloatable)
-
- self.dockFeatures = QDockWidget.DockWidgetClosable | QDockWidget.DockWidgetFloatable
- self.dock.setFeatures(self.dock.features() ^ self.dockFeatures)
+ self.setCentralWidget(centerContainer)
+ self.addDockWidget(Qt.RightDockWidgetArea, self.dock)
- self.filedock.setFeatures(QDockWidget.NoDockWidgetFeatures)
+ self.dock.setFeatures(QDockWidget.DockWidgetClosable | QDockWidget.DockWidgetFloatable)
+ self.fileDock.setFeatures(QDockWidget.NoDockWidgetFeatures)
- ###### Actions #######
+ # ================== Actions ==================
action = partial(newAction, self)
quit = action(getStr('quit'), self.close,
'Ctrl+Q', 'quit', getStr('quitApp'))
@@ -369,13 +328,13 @@ class MainWindow(QMainWindow, WindowMixin):
'Ctrl+u', 'open', getStr('openDir'))
open_dataset_dir = action(getStr('openDatasetDir'), self.openDatasetDirDialog,
- 'Ctrl+p', 'open', getStr('openDatasetDir'), enabled=False)
+ 'Ctrl+p', 'open', getStr('openDatasetDir'), enabled=False)
save = action(getStr('save'), self.saveFile,
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
alcm = action(getStr('choosemodel'), self.autolcm,
- 'Ctrl+M', 'next', getStr('tipchoosemodel'))
+ 'Ctrl+M', 'next', getStr('tipchoosemodel'))
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
enabled=True)
@@ -394,8 +353,8 @@ class MainWindow(QMainWindow, WindowMixin):
'w', 'objects', getStr('crtBoxDetail'), enabled=False)
delete = action(getStr('delBox'), self.deleteSelectedShape,
- 'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
-
+ 'Alt+X', 'delete', getStr('delBoxDetail'), enabled=False)
+
copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False)
@@ -406,7 +365,6 @@ class MainWindow(QMainWindow, WindowMixin):
showAll = action(getStr('showBox'), partial(self.togglePolygons, True),
'Ctrl+A', 'hide', getStr('showAllBoxDetail'),
enabled=False)
-
help = action(getStr('tutorial'), self.showTutorialDialog, None, 'help', getStr('tutorialDetail'))
showInfo = action(getStr('info'), self.showInfoDialog, None, 'help', getStr('info'))
@@ -448,12 +406,12 @@ class MainWindow(QMainWindow, WindowMixin):
'Ctrl+E', 'edit', getStr('editLabelDetail'),
enabled=False)
- ######## New actions #######
+ # ================== New Actions ==================
AutoRec = action(getStr('autoRecognition'), self.autoRecognition,
- '', 'Auto', getStr('autoRecognition'), enabled=False)
+ '', 'Auto', getStr('autoRecognition'), enabled=False)
reRec = action(getStr('reRecognition'), self.reRecognition,
- 'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
+ 'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
singleRere = action(getStr('singleRe'), self.singleRerecognition,
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
@@ -462,23 +420,23 @@ class MainWindow(QMainWindow, WindowMixin):
'q', 'new', getStr('creatPolygon'), enabled=True)
saveRec = action(getStr('saveRec'), self.saveRecResult,
- '', 'save', getStr('saveRec'), enabled=False)
+ '', 'save', getStr('saveRec'), enabled=False)
- saveLabel = action(getStr('saveLabel'), self.saveLabelFile, #
- 'Ctrl+S', 'save', getStr('saveLabel'), enabled=False)
+ saveLabel = action(getStr('saveLabel'), self.saveLabelFile, #
+ 'Ctrl+S', 'save', getStr('saveLabel'), enabled=False)
undoLastPoint = action(getStr("undoLastPoint"), self.canvas.undoLastPoint,
'Ctrl+Z', "undo", getStr("undoLastPoint"), enabled=False)
- rotateLeft = action(getStr("rotateLeft"), partial(self.rotateImgAction,1),
- 'Ctrl+Alt+L', "rotateLeft", getStr("rotateLeft"), enabled=False)
+ rotateLeft = action(getStr("rotateLeft"), partial(self.rotateImgAction, 1),
+ 'Ctrl+Alt+L', "rotateLeft", getStr("rotateLeft"), enabled=False)
- rotateRight = action(getStr("rotateRight"), partial(self.rotateImgAction,-1),
- 'Ctrl+Alt+R', "rotateRight", getStr("rotateRight"), enabled=False)
+ rotateRight = action(getStr("rotateRight"), partial(self.rotateImgAction, -1),
+ 'Ctrl+Alt+R', "rotateRight", getStr("rotateRight"), enabled=False)
undo = action(getStr("undo"), self.undoShapeEdit,
'Ctrl+Z', "undo", getStr("undo"), enabled=False)
-
+
lock = action(getStr("lockBox"), self.lockSelectedShape,
None, "lock", getStr("lockBoxDetail"),
enabled=False)
@@ -492,7 +450,7 @@ class MainWindow(QMainWindow, WindowMixin):
# self.preButton.setDefaultAction(openPrevImg)
# self.nextButton.setDefaultAction(openNextImg)
- ############# Zoom layout ##############
+ # ================== Zoom layout ==================
zoomLayout = QHBoxLayout()
zoomLayout.addStretch()
self.zoominButton = QToolButton()
@@ -519,7 +477,6 @@ class MainWindow(QMainWindow, WindowMixin):
icon='color', tip=getStr('shapeFillColorDetail'),
enabled=False)
-
# Label list context menu.
labelMenu = QMenu()
addActions(labelMenu, (edit, delete))
@@ -535,39 +492,36 @@ class MainWindow(QMainWindow, WindowMixin):
self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
# Store actions for further handling.
- self.actions = struct(save=save, resetAll=resetAll, deleteImg=deleteImg,
+ self.actions = struct(save=save, resetAll=resetAll, deleteImg=deleteImg,
lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
- saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
+ saveRec=saveRec, singleRere=singleRere, AutoRec=AutoRec, reRec=reRec,
createMode=createMode, editMode=editMode,
shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
fitWindow=fitWindow, fitWidth=fitWidth,
zoomActions=zoomActions, saveLabel=saveLabel,
- undo=undo, undoLastPoint=undoLastPoint,open_dataset_dir=open_dataset_dir,
- rotateLeft=rotateLeft,rotateRight=rotateRight,lock=lock,
- fileMenuActions=(
- opendir, open_dataset_dir, saveLabel, resetAll, quit),
+ undo=undo, undoLastPoint=undoLastPoint, open_dataset_dir=open_dataset_dir,
+ rotateLeft=rotateLeft, rotateRight=rotateRight, lock=lock,
+ fileMenuActions=(opendir, open_dataset_dir, saveLabel, resetAll, quit),
beginner=(), advanced=(),
- editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
- None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption,lock),
- beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,lock),
+ editMenu=(createpoly, edit, copy, delete, singleRere, None, undo, undoLastPoint,
+ None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption, lock),
+ beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight, lock),
advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor),
- onLoadActive=(
- create, createMode, editMode),
+ onLoadActive=(create, createMode, editMode),
onShapesPresent=(hideAll, showAll))
# menus
self.menus = struct(
- file=self.menu('&'+getStr('mfile')),
- edit=self.menu('&'+getStr('medit')),
- view=self.menu('&'+getStr('mview')),
+ file=self.menu('&' + getStr('mfile')),
+ edit=self.menu('&' + getStr('medit')),
+ view=self.menu('&' + getStr('mview')),
autolabel=self.menu('&PaddleOCR'),
- help=self.menu('&'+getStr('mhelp')),
+ help=self.menu('&' + getStr('mhelp')),
recentFiles=QMenu('Open &Recent'),
labelList=labelMenu)
-
self.lastLabel = None
# Add option to enable/disable labels being displayed at the top of bounding boxes
self.displayLabelOption = QAction(getStr('displayLabel'), self)
@@ -588,33 +542,30 @@ class MainWindow(QMainWindow, WindowMixin):
self.autoSaveOption.triggered.connect(self.autoSaveFunc)
addActions(self.menus.file,
- (opendir, open_dataset_dir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))
+ (opendir, open_dataset_dir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg,
+ quit))
- addActions(self.menus.help, (showKeys,showSteps, showInfo))
+ addActions(self.menus.help, (showKeys, showSteps, showInfo))
addActions(self.menus.view, (
self.displayLabelOption, self.labelDialogOption,
- None,
+ None,
hideAll, showAll, None,
zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth))
- addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
+ addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help))
self.menus.file.aboutToShow.connect(self.updateFileMenu)
# Custom context menu for the canvas widget:
addActions(self.canvas.menus[0], self.actions.beginnerContext)
- #addActions(self.canvas.menus[1], (
- # action('&Copy here', self.copyShape),
- # action('&Move here', self.moveShape)))
-
self.statusBar().showMessage('%s started.' % __appname__)
self.statusBar().show()
# Application state.
self.image = QImage()
- self.filePath = ustr(defaultFilename)
+ self.filePath = ustr(default_filename)
self.lastOpenDir = None
self.recentFiles = []
self.maxRecent = 7
@@ -625,7 +576,7 @@ class MainWindow(QMainWindow, WindowMixin):
# Add Chris
self.difficult = False
- ## Fix the compatible issue for qt4 and qt5. Convert the QStringList to python list
+ # Fix the compatible issue for qt4 and qt5. Convert the QStringList to python list
if settings.get(SETTING_RECENT_FILES):
if have_qstring():
recentFileQStringList = settings.get(SETTING_RECENT_FILES)
@@ -654,7 +605,6 @@ class MainWindow(QMainWindow, WindowMixin):
# Add chris
Shape.difficult = self.difficult
-
# ADD:
# Populate the File menu dynamically.
self.updateFileMenu()
@@ -678,6 +628,12 @@ class MainWindow(QMainWindow, WindowMixin):
if self.filePath and os.path.isdir(self.filePath):
self.openDirDialog(dirpath=self.filePath, silent=True)
+ def menu(self, title, actions=None):
+ menu = self.menuBar().addMenu(title)
+ if actions:
+ addActions(menu, actions)
+ return menu
+
def keyReleaseEvent(self, event):
if event.key() == Qt.Key_Control:
self.canvas.setDrawingShapeToSquare(False)
@@ -687,11 +643,9 @@ class MainWindow(QMainWindow, WindowMixin):
# Draw rectangle if Ctrl is pressed
self.canvas.setDrawingShapeToSquare(True)
-
def noShapes(self):
return not self.itemsToShapes
-
def populateModeActions(self):
self.canvas.menus[0].clear()
addActions(self.canvas.menus[0], self.actions.beginnerContext)
@@ -699,7 +653,6 @@ class MainWindow(QMainWindow, WindowMixin):
actions = (self.actions.create,) # if self.beginner() else (self.actions.createMode, self.actions.editMode)
addActions(self.menus.edit, actions + self.actions.editMenu)
-
def setDirty(self):
self.dirty = True
self.actions.save.setEnabled(True)
@@ -813,10 +766,11 @@ class MainWindow(QMainWindow, WindowMixin):
def rotateImgWarn(self):
if self.lang == 'ch':
- self.msgBox.warning (self, "提示", "\n 该图片已经有标注框,旋转操作会打乱标注,建议清除标注框后旋转。")
+ self.msgBox.warning(self, "提示", "\n 该图片已经有标注框,旋转操作会打乱标注,建议清除标注框后旋转。")
else:
- self.msgBox.warning (self, "Warn", "\n The picture already has a label box, and rotation will disrupt the label.\
- It is recommended to clear the label box and rotate it.")
+ self.msgBox.warning(self, "Warn", "\n The picture already has a label box, "
+ "and rotation will disrupt the label. "
+ "It is recommended to clear the label box and rotate it.")
def rotateImgAction(self, k=1, _value=False):
@@ -891,14 +845,13 @@ class MainWindow(QMainWindow, WindowMixin):
self.setDirty()
self.updateComboBox()
- ######## detection box related functions #######
-
+ # =================== detection box related functions ===================
def boxItemChanged(self, item):
shape = self.itemsToShapesbox[item]
box = ast.literal_eval(item.text())
# print('shape in labelItemChanged is',shape.points)
- if box != [(p.x(), p.y()) for p in shape.points]:
+ if box != [(int(p.x()), int(p.y())) for p in shape.points]:
# shape.points = box
shape.points = [QPointF(p[0], p[1]) for p in box]
@@ -906,7 +859,7 @@ class MainWindow(QMainWindow, WindowMixin):
# shape.line_color = generateColorByText(shape.label)
self.setDirty()
else: # User probably changed item visibility
- self.canvas.setShapeVisible(shape, True)#item.checkState() == Qt.Checked
+ self.canvas.setShapeVisible(shape, True) # item.checkState() == Qt.Checked
def editBox(self): # ADD
if not self.canvas.editing():
@@ -956,11 +909,10 @@ class MainWindow(QMainWindow, WindowMixin):
def indexTo5Files(self, currIndex):
if currIndex < 2:
return self.mImgList[:5]
- elif currIndex > len(self.mImgList)-3:
+ elif currIndex > len(self.mImgList) - 3:
return self.mImgList[-5:]
else:
- return self.mImgList[currIndex - 2 : currIndex + 3]
-
+ return self.mImgList[currIndex - 2: currIndex + 3]
# Tzutalin 20160906 : Add file list and dock to move faster
def fileitemDoubleClicked(self, item=None):
@@ -980,9 +932,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.loadFile(filename)
def CanvasSizeChange(self):
- if len(self.mImgList) > 0:
- self.zoomWidget.setValue(self.zoomWidgetValue + self.imgsplider.value())
-
+ if len(self.mImgList) > 0 and self.imageSlider.hasFocus():
+ self.zoomWidget.setValue(self.imageSlider.value())
def shapeSelectionChanged(self, selected_shapes):
self._noSelectionSlot = True
@@ -995,7 +946,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.shapesToItems[shape].setSelected(True)
self.shapesToItemsbox[shape].setSelected(True)
- self.labelList.scrollToItem(self.currentItem()) # QAbstractItemView.EnsureVisible
+ self.labelList.scrollToItem(self.currentItem()) # QAbstractItemView.EnsureVisible
self.BoxList.scrollToItem(self.currentBox())
self._noSelectionSlot = False
@@ -1027,6 +978,10 @@ class MainWindow(QMainWindow, WindowMixin):
action.setEnabled(True)
self.updateComboBox()
+ # update show counting
+ self.BoxListDock.setWindowTitle(self.BoxListDockName + f" ({self.BoxList.count()})")
+ self.labelListDock.setWindowTitle(self.labelListDockName + f" ({self.labelList.count()})")
+
def remLabels(self, shapes):
if shapes is None:
# print('rm empty label')
@@ -1048,7 +1003,7 @@ class MainWindow(QMainWindow, WindowMixin):
def loadLabels(self, shapes):
s = []
for label, points, line_color, fill_color, difficult in shapes:
- shape = Shape(label=label,line_color=line_color)
+ shape = Shape(label=label, line_color=line_color)
for x, y in points:
# Ensure the labels are within the bounds of the image. If not, fix them.
@@ -1058,7 +1013,7 @@ class MainWindow(QMainWindow, WindowMixin):
shape.addPoint(QPointF(x, y))
shape.difficult = difficult
- #shape.locked = False
+ # shape.locked = False
shape.close()
s.append(shape)
@@ -1071,12 +1026,11 @@ class MainWindow(QMainWindow, WindowMixin):
# shape.fill_color = QColor(*fill_color)
# else:
# shape.fill_color = generateColorByText(label)
-
+
self.addLabel(shape)
-
+
self.updateComboBox()
self.canvas.loadShapes(s)
-
def singleLabel(self, shape):
if shape is None:
@@ -1112,13 +1066,13 @@ class MainWindow(QMainWindow, WindowMixin):
line_color=s.line_color.getRgb(),
fill_color=s.fill_color.getRgb(),
points=[(int(p.x()), int(p.y())) for p in s.points], # QPonitF
- # add chris
+ # add chris
difficult=s.difficult) # bool
shapes = [] if mode == 'Auto' else \
[format_shape(shape) for shape in self.canvas.shapes if shape.line_color != DEFAULT_LOCK_COLOR]
# Can add differrent annotation formats here
- for box in self.result_dic :
+ for box in self.result_dic:
trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False}
if trans_dic["label"] == "" and mode == 'Auto':
continue
@@ -1127,7 +1081,8 @@ class MainWindow(QMainWindow, WindowMixin):
try:
trans_dic = []
for box in shapes:
- trans_dic.append({"transcription": box['label'], "points": box['points'], 'difficult': box['difficult']})
+ trans_dic.append(
+ {"transcription": box['label'], "points": box['points'], 'difficult': box['difficult']})
self.PPlabel[annotationFilePath] = trans_dic
if mode == 'Auto':
self.Cachelabel[annotationFilePath] = trans_dic
@@ -1145,8 +1100,7 @@ class MainWindow(QMainWindow, WindowMixin):
for shape in self.canvas.copySelectedShape():
self.addLabel(shape)
# fix copy and delete
- #self.shapeSelectionChanged(True)
-
+ # self.shapeSelectionChanged(True)
def labelSelectionChanged(self):
if self._noSelectionSlot:
@@ -1160,10 +1114,9 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.canvas.deSelectShape()
-
def boxSelectionChanged(self):
if self._noSelectionSlot:
- #self.BoxList.scrollToItem(self.currentBox(), QAbstractItemView.PositionAtCenter)
+ # self.BoxList.scrollToItem(self.currentBox(), QAbstractItemView.PositionAtCenter)
return
if self.canvas.editing():
selected_shapes = []
@@ -1174,7 +1127,6 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.canvas.deSelectShape()
-
def labelItemChanged(self, item):
shape = self.itemsToShapes[item]
label = item.text()
@@ -1182,7 +1134,7 @@ class MainWindow(QMainWindow, WindowMixin):
shape.label = item.text()
# shape.line_color = generateColorByText(shape.label)
self.setDirty()
- elif not ((item.checkState()== Qt.Unchecked) ^ (not shape.difficult)):
+ elif not ((item.checkState() == Qt.Unchecked) ^ (not shape.difficult)):
shape.difficult = True if item.checkState() == Qt.Unchecked else False
self.setDirty()
else: # User probably changed item visibility
@@ -1208,7 +1160,7 @@ class MainWindow(QMainWindow, WindowMixin):
if text is not None:
self.prevLabelText = self.stringBundle.getString('tempLabel')
# generate_color = generateColorByText(text)
- shape = self.canvas.setLastLabel(text, None, None)#generate_color, generate_color
+ shape = self.canvas.setLastLabel(text, None, None) # generate_color, generate_color
self.addLabel(shape)
if self.beginner(): # Switch to edit mode.
self.canvas.setEditing(True)
@@ -1236,6 +1188,7 @@ class MainWindow(QMainWindow, WindowMixin):
def addZoom(self, increment=10):
self.setZoom(self.zoomWidget.value() + increment)
+ self.imageSlider.setValue(self.zoomWidget.value() + increment) # set zoom slider value
def zoomRequest(self, delta):
# get the current scrollbar positions
@@ -1321,17 +1274,16 @@ class MainWindow(QMainWindow, WindowMixin):
# unicodeFilePath = os.path.abspath(unicodeFilePath)
# Tzutalin 20160906 : Add file list and dock to move faster
# Highlight the file item
-
+
if unicodeFilePath and self.fileListWidget.count() > 0:
if unicodeFilePath in self.mImgList:
index = self.mImgList.index(unicodeFilePath)
fileWidgetItem = self.fileListWidget.item(index)
print('unicodeFilePath is', unicodeFilePath)
fileWidgetItem.setSelected(True)
- ###
self.iconlist.clear()
self.additems5(None)
-
+
for i in range(5):
item_tooltip = self.iconlist.item(i).toolTip()
# print(i,"---",item_tooltip)
@@ -1382,12 +1334,21 @@ class MainWindow(QMainWindow, WindowMixin):
self.showBoundingBoxFromPPlabel(filePath)
self.setWindowTitle(__appname__ + ' ' + filePath)
-
+
# Default : select last item if there is at least one item
if self.labelList.count():
self.labelList.setCurrentItem(self.labelList.item(self.labelList.count() - 1))
self.labelList.item(self.labelList.count() - 1).setSelected(True)
+ # show file list image count
+ select_indexes = self.fileListWidget.selectedIndexes()
+ if len(select_indexes) > 0:
+ self.fileDock.setWindowTitle(self.fileListName + f" ({select_indexes[0].row() + 1}"
+ f"/{self.fileListWidget.count()})")
+ # update show counting
+ self.BoxListDock.setWindowTitle(self.BoxListDockName + f" ({self.BoxList.count()})")
+ self.labelListDock.setWindowTitle(self.labelListDockName + f" ({self.labelList.count()})")
+
self.canvas.setFocus(True)
return True
return False
@@ -1395,24 +1356,23 @@ class MainWindow(QMainWindow, WindowMixin):
def showBoundingBoxFromPPlabel(self, filePath):
width, height = self.image.width(), self.image.height()
imgidx = self.getImglabelidx(filePath)
- shapes =[]
- #box['ratio'] of the shapes saved in lockedShapes contains the ratio of the
+ shapes = []
+ # box['ratio'] of the shapes saved in lockedShapes contains the ratio of the
# four corner coordinates of the shapes to the height and width of the image
for box in self.canvas.lockedShapes:
if self.canvas.isInTheSameImage:
- shapes.append((box['transcription'], [[s[0]*width,s[1]*height]for s in box['ratio']],
- DEFAULT_LOCK_COLOR, None, box['difficult']))
+ shapes.append((box['transcription'], [[s[0] * width, s[1] * height] for s in box['ratio']],
+ DEFAULT_LOCK_COLOR, None, box['difficult']))
else:
- shapes.append(('锁定框:待检测', [[s[0]*width,s[1]*height]for s in box['ratio']],
- DEFAULT_LOCK_COLOR, None, box['difficult']))
+ shapes.append(('锁定框:待检测', [[s[0] * width, s[1] * height] for s in box['ratio']],
+ DEFAULT_LOCK_COLOR, None, box['difficult']))
if imgidx in self.PPlabel.keys():
for box in self.PPlabel[imgidx]:
shapes.append((box['transcription'], box['points'], None, None, box['difficult']))
-
+
self.loadLabels(shapes)
self.canvas.verified = False
-
def validFilestate(self, filePath):
if filePath not in self.fileStatedict.keys():
return None
@@ -1423,7 +1383,7 @@ class MainWindow(QMainWindow, WindowMixin):
def resizeEvent(self, event):
if self.canvas and not self.image.isNull() \
- and self.zoomMode != self.MANUAL_ZOOM:
+ and self.zoomMode != self.MANUAL_ZOOM:
self.adjustScale()
super(MainWindow, self).resizeEvent(event)
@@ -1441,7 +1401,7 @@ class MainWindow(QMainWindow, WindowMixin):
"""Figure out the size of the pixmap in order to fit the main widget."""
e = 2.0 # So that no scrollbars are generated.
w1 = self.centralWidget().width() - e
- h1 = self.centralWidget().height() - e -110
+ h1 = self.centralWidget().height() - e - 110
a1 = w1 / h1
# Calculate a new scale value based on the pixmap's aspect ratio.
w2 = self.canvas.pixmap.width() - 0.0
@@ -1492,7 +1452,7 @@ class MainWindow(QMainWindow, WindowMixin):
def loadRecent(self, filename):
if self.mayContinue():
- print(filename,"======")
+ print(filename, "======")
self.loadFile(filename)
def scanAllImages(self, folderPath):
@@ -1507,8 +1467,6 @@ class MainWindow(QMainWindow, WindowMixin):
natural_sort(images, key=lambda x: x.lower())
return images
-
-
def openDirDialog(self, _value=False, dirpath=None, silent=False):
if not self.mayContinue():
return
@@ -1520,15 +1478,15 @@ class MainWindow(QMainWindow, WindowMixin):
defaultOpenDirPath = os.path.dirname(self.filePath) if self.filePath else '.'
if silent != True:
targetDirPath = ustr(QFileDialog.getExistingDirectory(self,
- '%s - Open Directory' % __appname__,
- defaultOpenDirPath,
- QFileDialog.ShowDirsOnly | QFileDialog.DontResolveSymlinks))
+ '%s - Open Directory' % __appname__,
+ defaultOpenDirPath,
+ QFileDialog.ShowDirsOnly | QFileDialog.DontResolveSymlinks))
else:
targetDirPath = ustr(defaultOpenDirPath)
self.lastOpenDir = targetDirPath
self.importDirImages(targetDirPath)
- def openDatasetDirDialog(self,):
+ def openDatasetDirDialog(self):
if self.lastOpenDir and os.path.exists(self.lastOpenDir):
if platform.system() == 'Windows':
os.startfile(self.lastOpenDir)
@@ -1540,12 +1498,13 @@ class MainWindow(QMainWindow, WindowMixin):
if self.lang == 'ch':
self.msgBox.warning(self, "提示", "\n 原文件夹已不存在,请从新选择数据集路径!")
else:
- self.msgBox.warning(self, "Warn", "\n The original folder no longer exists, please choose the data set path again!")
+ self.msgBox.warning(self, "Warn",
+ "\n The original folder no longer exists, please choose the data set path again!")
self.actions.open_dataset_dir.setEnabled(False)
defaultOpenDirPath = os.path.dirname(self.filePath) if self.filePath else '.'
- def importDirImages(self, dirpath, isDelete = False):
+ def importDirImages(self, dirpath, isDelete=False):
if not self.mayContinue() or not dirpath:
return
if self.defaultSaveDir and self.defaultSaveDir != dirpath:
@@ -1553,7 +1512,7 @@ class MainWindow(QMainWindow, WindowMixin):
if not isDelete:
self.loadFilestate(dirpath)
- self.PPlabelpath = dirpath+ '/Label.txt'
+ self.PPlabelpath = dirpath + '/Label.txt'
self.PPlabel = self.loadLabelFile(self.PPlabelpath)
self.Cachelabelpath = dirpath + '/Cache.cach'
self.Cachelabel = self.loadLabelFile(self.Cachelabelpath)
@@ -1562,7 +1521,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.lastOpenDir = dirpath
self.dirname = dirpath
-
self.defaultSaveDir = dirpath
self.statusBar().showMessage('%s started. Annotation will be saved to %s' %
(__appname__, self.defaultSaveDir))
@@ -1596,7 +1554,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.rotateLeft.setEnabled(True)
self.actions.rotateRight.setEnabled(True)
-
+ self.fileListWidget.setCurrentRow(0) # set list index to first
+ self.fileDock.setWindowTitle(self.fileListName + f" (1/{self.fileListWidget.count()})") # show image count
def openPrevImg(self, _value=False):
if len(self.mImgList) <= 0:
@@ -1632,7 +1591,7 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.mImgList5 = self.indexTo5Files(currIndex)
if filename:
- print('file name in openNext is ',filename)
+ print('file name in openNext is ', filename)
self.loadFile(filename)
def updateFileListIcon(self, filename):
@@ -1644,30 +1603,6 @@ class MainWindow(QMainWindow, WindowMixin):
imgidx = self.getImglabelidx(self.filePath)
self._saveFile(imgidx, mode=mode)
-
- def saveFileAs(self, _value=False):
- assert not self.image.isNull(), "cannot save empty image"
- self._saveFile(self.saveFileDialog())
-
- def saveFileDialog(self, removeExt=True):
- caption = '%s - Choose File' % __appname__
- filters = 'File (*%s)' % LabelFile.suffix
- openDialogPath = self.currentPath()
- dlg = QFileDialog(self, caption, openDialogPath, filters)
- dlg.setDefaultSuffix(LabelFile.suffix[1:])
- dlg.setAcceptMode(QFileDialog.AcceptSave)
- filenameWithoutExtension = os.path.splitext(self.filePath)[0]
- dlg.selectFile(filenameWithoutExtension)
- dlg.setOption(QFileDialog.DontUseNativeDialog, False)
- if dlg.exec_():
- fullFilePath = ustr(dlg.selectedFiles()[0])
- if removeExt:
- return os.path.splitext(fullFilePath)[0] # Return file path without the extension.
- else:
- return fullFilePath
- return ''
-
-
def saveLockedShapes(self):
self.canvas.lockedShapes = []
self.canvas.selectedShapes = []
@@ -1680,7 +1615,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.canvas.selectedShapes.remove(s)
self.canvas.shapes.remove(s)
-
def _saveFile(self, annotationFilePath, mode='Manual'):
if len(self.canvas.lockedShapes) != 0:
self.saveLockedShapes()
@@ -1690,9 +1624,9 @@ class MainWindow(QMainWindow, WindowMixin):
img = cv2.imread(self.filePath)
width, height = self.image.width(), self.image.height()
for shape in self.canvas.lockedShapes:
- box = [[int(p[0]*width), int(p[1]*height)] for p in shape['ratio']]
+ box = [[int(p[0] * width), int(p[1] * height)] for p in shape['ratio']]
assert len(box) == 4
- result = [(shape['transcription'],1)]
+ result = [(shape['transcription'], 1)]
result.insert(0, box)
self.result_dic_locked.append(result)
self.result_dic += self.result_dic_locked
@@ -1706,7 +1640,7 @@ class MainWindow(QMainWindow, WindowMixin):
item.setIcon(newIcon('done'))
self.fileStatedict[self.filePath] = 1
- if len(self.fileStatedict)%self.autoSaveNum ==0:
+ if len(self.fileStatedict) % self.autoSaveNum == 0:
self.saveFilestate()
self.savePPlabel(mode='Auto')
@@ -1739,8 +1673,8 @@ class MainWindow(QMainWindow, WindowMixin):
if platform.system() == 'Windows':
from win32com.shell import shell, shellcon
shell.SHFileOperation((0, shellcon.FO_DELETE, deletePath, None,
- shellcon.FOF_SILENT | shellcon.FOF_ALLOWUNDO | shellcon.FOF_NOCONFIRMATION,
- None, None))
+ shellcon.FOF_SILENT | shellcon.FOF_ALLOWUNDO | shellcon.FOF_NOCONFIRMATION,
+ None, None))
# linux
elif platform.system() == 'Linux':
cmd = 'trash ' + deletePath
@@ -1790,7 +1724,10 @@ class MainWindow(QMainWindow, WindowMixin):
def discardChangesDialog(self):
yes, no, cancel = QMessageBox.Yes, QMessageBox.No, QMessageBox.Cancel
- msg = u'You have unsaved changes, would you like to save them and proceed?\nClick "No" to undo all changes.'
+ if self.lang == 'ch':
+ msg = u'您有未保存的变更, 您想保存再继续吗?\n点击 "No" 丢弃所有未保存的变更.'
+ else:
+ msg = u'You have unsaved changes, would you like to save them and proceed?\nClick "No" to undo all changes.'
return QMessageBox.warning(self, u'Attention', msg, yes | no | cancel)
def errorMessage(self, title, message):
@@ -1817,6 +1754,8 @@ class MainWindow(QMainWindow, WindowMixin):
if self.noShapes():
for action in self.actions.onShapesPresent:
action.setEnabled(False)
+ self.BoxListDock.setWindowTitle(self.BoxListDockName + f" ({self.BoxList.count()})")
+ self.labelListDock.setWindowTitle(self.labelListDockName + f" ({self.labelList.count()})")
def chshapeLineColor(self):
color = self.colorDialog.getColor(self.lineColor, u'Choose line color',
@@ -1853,7 +1792,6 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.labelHist.append(line)
-
def togglePaintLabelsOption(self):
for shape in self.canvas.shapes:
shape.paintLabel = self.displayLabelOption.isChecked()
@@ -1882,7 +1820,7 @@ class MainWindow(QMainWindow, WindowMixin):
prelen = lentoken // 2
bfilename = prelen * " " + pfilename + (lentoken - prelen) * " "
# item = QListWidgetItem(QIcon(pix.scaled(100, 100, Qt.KeepAspectRatio, Qt.SmoothTransformation)),filename[:10])
- item = QListWidgetItem(QIcon(pix.scaled(100, 100, Qt.IgnoreAspectRatio, Qt.FastTransformation)),pfilename)
+ item = QListWidgetItem(QIcon(pix.scaled(100, 100, Qt.IgnoreAspectRatio, Qt.FastTransformation)), pfilename)
# item.setForeground(QBrush(Qt.white))
item.setToolTip(file)
self.iconlist.addItem(item)
@@ -1894,7 +1832,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.iconlist.setMinimumWidth(owidth + 50)
def getImglabelidx(self, filePath):
- if platform.system()=='Windows':
+ if platform.system() == 'Windows':
spliter = '\\'
else:
spliter = '/'
@@ -1908,15 +1846,14 @@ class MainWindow(QMainWindow, WindowMixin):
uncheckedList = [i for i in self.mImgList if i not in self.fileStatedict.keys()]
self.autoDialog = AutoDialog(parent=self, ocr=self.ocr, mImgList=uncheckedList, lenbar=len(uncheckedList))
self.autoDialog.popUp()
- self.currIndex=len(self.mImgList)
- self.loadFile(self.filePath) # ADD
+ self.currIndex = len(self.mImgList) - 1
+ self.loadFile(self.filePath) # ADD
self.haveAutoReced = True
self.AutoRecognition.setEnabled(False)
self.actions.AutoRec.setEnabled(False)
self.setDirty()
self.saveCacheLabel()
-
def reRecognition(self):
img = cv2.imread(self.filePath)
# org_box = [dic['points'] for dic in self.PPlabel[self.getImglabelidx(self.filePath)]]
@@ -1945,24 +1882,27 @@ class MainWindow(QMainWindow, WindowMixin):
print('Can not recognise the box')
if shape.line_color == DEFAULT_LOCK_COLOR:
shape.label = result[0][0]
- self.result_dic_locked.append([box,(self.noLabelText,0)])
+ self.result_dic_locked.append([box, (self.noLabelText, 0)])
else:
- self.result_dic.append([box,(self.noLabelText,0)])
+ self.result_dic.append([box, (self.noLabelText, 0)])
try:
if self.noLabelText == shape.label or result[1][0] == shape.label:
print('label no change')
else:
rec_flag += 1
except IndexError as e:
- print('Can not recognise the box')
- if (len(self.result_dic) > 0 and rec_flag > 0)or self.canvas.lockedShapes:
- self.canvas.isInTheSameImage = True
+ print('Can not recognise the box')
+ if (len(self.result_dic) > 0 and rec_flag > 0) or self.canvas.lockedShapes:
+ self.canvas.isInTheSameImage = True
self.saveFile(mode='Auto')
self.loadFile(self.filePath)
self.canvas.isInTheSameImage = False
self.setDirty()
elif len(self.result_dic) == len(self.canvas.shapes) and rec_flag == 0:
- QMessageBox.information(self, "Information", "The recognition result remains unchanged!")
+ if self.lang == 'ch':
+ QMessageBox.information(self, "Information", "识别结果保持一致!")
+ else:
+ QMessageBox.information(self, "Information", "The recognition result remains unchanged!")
else:
print('Can not recgonise in ', self.filePath)
else:
@@ -2027,7 +1967,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.AutoRecognition.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
-
def modelChoose(self):
print(self.comboBox.currentText())
lg_idx = {'Chinese & English': 'ch', 'English': 'en', 'French': 'french', 'German': 'german',
@@ -2054,14 +1993,12 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.saveLabel.setEnabled(True)
self.actions.saveRec.setEnabled(True)
-
def saveFilestate(self):
with open(self.fileStatepath, 'w', encoding='utf-8') as f:
for key in self.fileStatedict:
f.write(key + '\t')
f.write(str(self.fileStatedict[key]) + '\n')
-
def loadLabelFile(self, labelpath):
labeldict = {}
if not os.path.exists(labelpath):
@@ -2080,8 +2017,7 @@ class MainWindow(QMainWindow, WindowMixin):
labeldict[file] = []
return labeldict
-
- def savePPlabel(self,mode='Manual'):
+ def savePPlabel(self, mode='Manual'):
savedfile = [self.getImglabelidx(i) for i in self.fileStatedict.keys()]
with open(self.PPlabelpath, 'w', encoding='utf-8') as f:
for key in self.PPlabel:
@@ -2089,8 +2025,11 @@ class MainWindow(QMainWindow, WindowMixin):
f.write(key + '\t')
f.write(json.dumps(self.PPlabel[key], ensure_ascii=False) + '\n')
- if mode=='Manual':
- msg = 'Images that have been checked are saved in '+ self.PPlabelpath
+ if mode == 'Manual':
+ if self.lang == 'ch':
+ msg = '已将检查过的图片标签保存在 ' + self.PPlabelpath + " 文件中"
+ else:
+ msg = 'Images that have been checked are saved in ' + self.PPlabelpath
QMessageBox.information(self, "Information", msg)
def saveCacheLabel(self):
@@ -2122,17 +2061,19 @@ class MainWindow(QMainWindow, WindowMixin):
for i, label in enumerate(self.PPlabel[idx]):
if label['difficult']: continue
img_crop = get_rotate_crop_image(img, np.array(label['points'], np.float32))
- img_name = os.path.splitext(os.path.basename(idx))[0] + '_crop_'+str(i)+'.jpg'
- cv2.imwrite(crop_img_dir+img_name, img_crop)
- f.write('crop_img/'+ img_name + '\t')
+ img_name = os.path.splitext(os.path.basename(idx))[0] + '_crop_' + str(i) + '.jpg'
+ cv2.imwrite(crop_img_dir + img_name, img_crop)
+ f.write('crop_img/' + img_name + '\t')
f.write(label['transcription'] + '\n')
except Exception as e:
ques_img.append(key)
- print("Can not read image ",e)
+ print("Can not read image ", e)
if ques_img:
- QMessageBox.information(self, "Information", "The following images can not be saved, "
- "please check the image path and labels.\n" + "".join(str(i)+'\n' for i in ques_img))
- QMessageBox.information(self, "Information", "Cropped images have been saved in "+str(crop_img_dir))
+ QMessageBox.information(self,
+ "Information",
+ "The following images can not be saved, please check the image path and labels.\n"
+ + "".join(str(i) + '\n' for i in ques_img))
+ QMessageBox.information(self, "Information", "Cropped images have been saved in " + str(crop_img_dir))
def speedChoose(self):
if self.labelDialogOption.isChecked():
@@ -2145,14 +2086,14 @@ class MainWindow(QMainWindow, WindowMixin):
def autoSaveFunc(self):
if self.autoSaveOption.isChecked():
- self.autoSaveNum = 1 # Real auto_Save
+ self.autoSaveNum = 1 # Real auto_Save
try:
self.saveLabelFile()
except:
pass
print('The program will automatically save once after confirming an image')
else:
- self.autoSaveNum = 5 # Used for backup
+ self.autoSaveNum = 5 # Used for backup
print('The program will automatically save once after confirming 5 images (default)')
def undoShapeEdit(self):
@@ -2169,25 +2110,26 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelList.clearSelection()
self._noSelectionSlot = False
self.canvas.loadShapes(shapes, replace=replace)
- print("loadShapes")#1
-
-
+ print("loadShapes") # 1
+
def lockSelectedShape(self):
- """lock the selsected shapes.
+ """lock the selected shapes.
Add self.selectedShapes to lock self.canvas.lockedShapes,
which holds the ratio of the four coordinates of the locked shapes
to the width and height of the image
"""
width, height = self.image.width(), self.image.height()
+
def format_shape(s):
return dict(label=s.label, # str
line_color=s.line_color.getRgb(),
fill_color=s.fill_color.getRgb(),
- ratio=[[int(p.x())/width, int(p.y())/height] for p in s.points], # QPonitF
- # add chris
+ ratio=[[int(p.x()) / width, int(p.y()) / height] for p in s.points], # QPonitF
+ # add chris
difficult=s.difficult) # bool
- #lock
+
+ # lock
if len(self.canvas.lockedShapes) == 0:
for s in self.canvas.selectedShapes:
s.line_color = DEFAULT_LOCK_COLOR
@@ -2199,7 +2141,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.canvas.lockedShapes = trans_dic
self.actions.save.setEnabled(True)
- #unlock
+ # unlock
else:
for s in self.canvas.shapes:
s.line_color = DEFAULT_LINE_COLOR
@@ -2220,9 +2162,11 @@ def read(filename, default=None):
except:
return default
+
def str2bool(v):
return v.lower() in ("true", "t", "1")
+
def get_main_app(argv=[]):
"""
Standard boilerplate Qt application code.
@@ -2231,23 +2175,24 @@ def get_main_app(argv=[]):
app = QApplication(argv)
app.setApplicationName(__appname__)
app.setWindowIcon(newIcon("app"))
- # Tzutalin 201705+: Accept extra agruments to change predefined class file
- argparser = argparse.ArgumentParser()
- argparser.add_argument("--lang", type=str, default='en', nargs="?")
- argparser.add_argument("--gpu", type=str2bool, default=False, nargs="?")
- argparser.add_argument("--predefined_classes_file",
- default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"),
- nargs="?")
- args = argparser.parse_args(argv[1:])
- # Usage : labelImg.py image predefClassFile saveDir
- win = MainWindow(lang=args.lang, gpu=args.gpu,
- defaultPrefdefClassFile=args.predefined_classes_file)
+ # Tzutalin 201705+: Accept extra arguments to change predefined class file
+ arg_parser = argparse.ArgumentParser()
+ arg_parser.add_argument("--lang", type=str, default='en', nargs="?")
+ arg_parser.add_argument("--gpu", type=str2bool, default=True, nargs="?")
+ arg_parser.add_argument("--predefined_classes_file",
+ default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"),
+ nargs="?")
+ args = arg_parser.parse_args(argv[1:])
+
+ win = MainWindow(lang=args.lang,
+ gpu=args.gpu,
+ default_predefined_class_file=args.predefined_classes_file)
win.show()
return app, win
def main():
- '''construct main app and run it'''
+ """construct main app and run it"""
app, _win = get_main_app(sys.argv)
return app.exec_()
@@ -2259,5 +2204,5 @@ if __name__ == '__main__':
output = os.system('pyrcc5 -o libs/resources.py resources.qrc')
assert output == 0, "operate the cmd have some problems ,please check whether there is a in the lib " \
"directory resources.py "
- import libs.resources
+
sys.exit(main())
diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md
index e8634ef8c06feae1f0adffb22c5694084dab78cd..19e54ab14663ab86285a45680ac2b6421420e4d4 100644
--- a/PPOCRLabel/README.md
+++ b/PPOCRLabel/README.md
@@ -8,6 +8,8 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
### Recent Update
+- 2022.01:(by [PeterH0323](https://github.com/peterh0323) )
+ - Improve user experience: prompt for the number of files and labels, optimize interaction, and fix bugs such as only use CPU when inference
- 2021.11.17:
- Support install and start PPOCRLabel through the whl package (by [d2623587501](https://github.com/d2623587501))
- Dataset segmentation: Divide the annotation file into training, verification and testing parts (refer to section 3.5 below, by [MrCuiHao](https://github.com/MrCuiHao))
@@ -110,7 +112,7 @@ python PPOCRLabel.py
6. Click 're-Recognition', model will rewrite ALL recognition results in ALL detection box[3].
-7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.
+7. Single click the result in 'recognition result' list to manually change inaccurate recognition results.
8. **Click "Check", the image status will switch to "√",then the program automatically jump to the next.**
@@ -143,15 +145,17 @@ python PPOCRLabel.py
### 3.1 Shortcut keys
| Shortcut keys | Description |
-| ------------------------ | ------------------------------------------------ |
+|--------------------------|--------------------------------------------------|
| Ctrl + Shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box |
| Q | Create a four-points box |
+| X | Rotate the box anti-clockwise |
+| C | Rotate the box clockwise |
| Ctrl + E | Edit label of the selected box |
| Ctrl + R | Re-recognize the selected box |
| Ctrl + C | Copy and paste the selected box |
| Ctrl + Left Mouse Button | Multi select the label box |
-| Backspace | Delete the selected box |
+| Alt + X | Delete the selected box |
| Ctrl + V | Check image |
| Ctrl + Shift + d | Delete image |
| D | Next image |
diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md
index e1c391bc8637baa4adfa8852d805ed0f4bf04d6d..2226336631c68a892e3a7075b2dc8d65bccdf204 100644
--- a/PPOCRLabel/README_ch.md
+++ b/PPOCRLabel/README_ch.md
@@ -8,6 +8,8 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
#### 近期更新
+- 2022.01:(by [PeterH0323](https://github.com/peterh0323) )
+ - 提升用户体验:新增文件与标记数目提示、优化交互、修复gpu使用等问题
- 2021.11.17:
- 新增支持通过whl包安装和启动PPOCRLabel(by [d2623587501](https://github.com/d2623587501))
- 标注数据集切分:对标注数据进行训练、验证与测试集划分(参考下方3.5节,by [MrCuiHao](https://github.com/MrCuiHao))
@@ -102,7 +104,7 @@ python PPOCRLabel.py --lang ch
4. 手动标注:点击 “矩形标注”(推荐直接在英文模式下点击键盘中的 “W”),用户可对当前图片中模型未检出的部分进行手动绘制标记框。点击键盘Q,则使用四点标注模式(或点击“编辑” - “四点标注”),用户依次点击4个点后,双击左键表示标注完成。
5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。
6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别[3]。
-7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。
+7. 内容更改:单击识别结果,对不准确的识别结果进行手动更改。
8. **确认标记:点击 “确认”,图片状态切换为 “√”,跳转至下一张。**
9. 删除:点击 “删除图像”,图片将会被删除至回收站。
10. 导出结果:用户可以通过菜单中“文件-导出标记结果”手动导出,同时也可以点击“文件 - 自动导出标记结果”开启自动导出。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "导出识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*中[4]。
@@ -131,23 +133,25 @@ python PPOCRLabel.py --lang ch
### 3.1 快捷键
-| 快捷键 | 说明 |
-| ---------------- | ---------------------------- |
-| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
-| W | 新建矩形框 |
-| Q | 新建四点框 |
-| Ctrl + E | 编辑所选框标签 |
-| Ctrl + R | 重新识别所选标记 |
+| 快捷键 | 说明 |
+|------------------|----------------|
+| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
+| W | 新建矩形框 |
+| Q | 新建四点框 |
+| X | 框逆时针旋转 |
+| C | 框顺时针旋转 |
+| Ctrl + E | 编辑所选框标签 |
+| Ctrl + R | 重新识别所选标记 |
| Ctrl + C | 复制并粘贴选中的标记框 |
-| Ctrl + 鼠标左键 | 多选标记框 |
-| Backspace | 删除所选框 |
-| Ctrl + V | 确认本张图片标记 |
-| Ctrl + Shift + d | 删除本张图片 |
-| D | 下一张图片 |
-| A | 上一张图片 |
-| Ctrl++ | 缩小 |
-| Ctrl-- | 放大 |
-| ↑→↓← | 移动标记框 |
+| Ctrl + 鼠标左键 | 多选标记框 |
+| Alt + X | 删除所选框 |
+| Ctrl + V | 确认本张图片标记 |
+| Ctrl + Shift + d | 删除本张图片 |
+| D | 下一张图片 |
+| A | 上一张图片 |
+| Ctrl++ | 缩小 |
+| Ctrl-- | 放大 |
+| ↑→↓← | 移动标记框 |
### 3.2 内置模型
diff --git a/PPOCRLabel/combobox.py b/PPOCRLabel/combobox.py
deleted file mode 100644
index 8526b45392902e564364be27948517377217bbeb..0000000000000000000000000000000000000000
--- a/PPOCRLabel/combobox.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) <2015-Present> Tzutalin
-# Copyright (C) 2013 MIT, Computer Science and Artificial Intelligence Laboratory. Bryan Russell, Antonio Torralba,
-# William T. Freeman. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-# associated documentation files (the "Software"), to deal in the Software without restriction, including without
-# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
-# Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
-# the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
-# SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-import sys
-try:
- from PyQt5.QtWidgets import QWidget, QHBoxLayout, QComboBox
-except ImportError:
- # needed for py3+qt4
- # Ref:
- # http://pyqt.sourceforge.net/Docs/PyQt4/incompatible_apis.html
- # http://stackoverflow.com/questions/21217399/pyqt4-qtcore-qvariant-object-instead-of-a-string
- if sys.version_info.major >= 3:
- import sip
- sip.setapi('QVariant', 2)
- from PyQt4.QtGui import QWidget, QHBoxLayout, QComboBox
-
-
-class ComboBox(QWidget):
- def __init__(self, parent=None, items=[]):
- super(ComboBox, self).__init__(parent)
-
- layout = QHBoxLayout()
- self.cb = QComboBox()
- self.items = items
- self.cb.addItems(self.items)
-
- self.cb.currentIndexChanged.connect(parent.comboSelectionChanged)
-
- layout.addWidget(self.cb)
- self.setLayout(layout)
-
- def update_items(self, items):
- self.items = items
-
- self.cb.clear()
- self.cb.addItems(self.items)
diff --git a/PPOCRLabel/libs/autoDialog.py b/PPOCRLabel/libs/autoDialog.py
index 3374e92cc587baa7e8bab5c7d8e8dc34eb6366b6..189a590de851228e08d71f1dd2c00c823b9c2b0c 100644
--- a/PPOCRLabel/libs/autoDialog.py
+++ b/PPOCRLabel/libs/autoDialog.py
@@ -6,6 +6,8 @@ except ImportError:
from PyQt4.QtGui import *
from PyQt4.QtCore import *
+import time
+import datetime
import json
import cv2
import numpy as np
@@ -80,8 +82,9 @@ class AutoDialog(QDialog):
self.parent = parent
self.ocr = ocr
self.mImgList = mImgList
+ self.lender = lenbar
self.pb = QProgressBar()
- self.pb.setRange(0, lenbar)
+ self.pb.setRange(0, self.lender)
self.pb.setValue(0)
layout = QVBoxLayout()
@@ -108,10 +111,16 @@ class AutoDialog(QDialog):
self.thread_1.progressBarValue.connect(self.handleProgressBarSingal)
self.thread_1.listValue.connect(self.handleListWidgetSingal)
self.thread_1.endsignal.connect(self.handleEndsignalSignal)
+ self.time_start = time.time() # save start time
def handleProgressBarSingal(self, i):
self.pb.setValue(i)
+ # calculate time left of auto labeling
+ avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations
+ time_left = str(datetime.timedelta(seconds=avg_time * (self.lender - i))).split(".")[0] # Remove microseconds
+ self.setWindowTitle("PPOCRLabel -- " + f"Time Left: {time_left}") # show
+
def handleListWidgetSingal(self, i):
self.listWidget.addItem(i)
titem = self.listWidget.item(self.listWidget.count() - 1)
diff --git a/PPOCRLabel/libs/canvas.py b/PPOCRLabel/libs/canvas.py
index 6116f357d6efb91a5a9d9cdc6ba757fbd06df60e..8d257e6bd7e7a61d7c28e9787042c3eb9d42609f 100644
--- a/PPOCRLabel/libs/canvas.py
+++ b/PPOCRLabel/libs/canvas.py
@@ -11,19 +11,13 @@
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-try:
- from PyQt5.QtGui import *
- from PyQt5.QtCore import *
- from PyQt5.QtWidgets import *
-except ImportError:
- from PyQt4.QtGui import *
- from PyQt4.QtCore import *
-
-#from PyQt4.QtOpenGL import *
+import copy
+from PyQt5.QtCore import Qt, pyqtSignal, QPointF, QPoint
+from PyQt5.QtGui import QPainter, QBrush, QColor, QPixmap
+from PyQt5.QtWidgets import QWidget, QMenu, QApplication
from libs.shape import Shape
from libs.utils import distance
-import copy
CURSOR_DEFAULT = Qt.ArrowCursor
CURSOR_POINT = Qt.PointingHandCursor
@@ -31,8 +25,6 @@ CURSOR_DRAW = Qt.CrossCursor
CURSOR_MOVE = Qt.ClosedHandCursor
CURSOR_GRAB = Qt.OpenHandCursor
-# class Canvas(QGLWidget):
-
class Canvas(QWidget):
zoomRequest = pyqtSignal(int)
@@ -129,7 +121,6 @@ class Canvas(QWidget):
def selectedVertex(self):
return self.hVertex is not None
-
def mouseMoveEvent(self, ev):
"""Update line with last point and current coordinates."""
pos = self.transformPos(ev.pos())
@@ -333,7 +324,6 @@ class Canvas(QWidget):
self.movingShape = False
-
def endMove(self, copy=False):
assert self.selectedShapes and self.selectedShapesCopy
assert len(self.selectedShapesCopy) == len(self.selectedShapes)
@@ -410,7 +400,6 @@ class Canvas(QWidget):
self.selectionChanged.emit(shapes)
self.update()
-
def selectShapePoint(self, point, multiple_selection_mode):
"""Select the first shape created which contains this point."""
if self.selectedVertex(): # A vertex is marked for selection.
@@ -494,7 +483,6 @@ class Canvas(QWidget):
else:
shape.moveVertexBy(index, shiftPos)
-
def boundedMoveShape(self, shapes, pos):
if type(shapes).__name__ != 'list': shapes = [shapes]
if self.outOfPixmap(pos):
@@ -515,6 +503,7 @@ class Canvas(QWidget):
if dp:
for shape in shapes:
shape.moveBy(dp)
+ shape.close()
self.prevPoint = pos
return True
return False
@@ -728,6 +717,31 @@ class Canvas(QWidget):
self.moveOnePixel('Up')
elif key == Qt.Key_Down and self.selectedShapes:
self.moveOnePixel('Down')
+ elif key == Qt.Key_X and self.selectedShapes:
+ for i in range(len(self.selectedShapes)):
+ self.selectedShape = self.selectedShapes[i]
+ if self.rotateOutOfBound(0.01):
+ continue
+ self.selectedShape.rotate(0.01)
+ self.shapeMoved.emit()
+ self.update()
+
+ elif key == Qt.Key_C and self.selectedShapes:
+ for i in range(len(self.selectedShapes)):
+ self.selectedShape = self.selectedShapes[i]
+ if self.rotateOutOfBound(-0.01):
+ continue
+ self.selectedShape.rotate(-0.01)
+ self.shapeMoved.emit()
+ self.update()
+
+ def rotateOutOfBound(self, angle):
+ for shape in range(len(self.selectedShapes)):
+ self.selectedShape = self.selectedShapes[shape]
+ for i, p in enumerate(self.selectedShape.points):
+ if self.outOfPixmap(self.selectedShape.rotatePoint(p, angle)):
+ return True
+ return False
def moveOnePixel(self, direction):
# print(self.selectedShape.points)
diff --git a/PPOCRLabel/libs/editinlist.py b/PPOCRLabel/libs/editinlist.py
index c1b8249baeab81a22c22f6b183866954b2ee45de..79d2d3aa371ac076de513a4d52ea51b27c6e08f2 100644
--- a/PPOCRLabel/libs/editinlist.py
+++ b/PPOCRLabel/libs/editinlist.py
@@ -1,31 +1,29 @@
-import sys, time
-from PyQt5 import QtWidgets
-from PyQt5.QtGui import *
-from PyQt5.QtCore import *
-from PyQt5.QtWidgets import *
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+from PyQt5.QtCore import QModelIndex
+from PyQt5.QtWidgets import QListWidget
+
class EditInList(QListWidget):
def __init__(self):
- super(EditInList,self).__init__()
- # click to edit
- self.clicked.connect(self.item_clicked)
+ super(EditInList, self).__init__()
+ self.edited_item = None
+
+ def item_clicked(self, modelindex: QModelIndex):
+ try:
+ if self.edited_item is not None:
+ self.closePersistentEditor(self.edited_item)
+ except:
+ self.edited_item = self.currentItem()
- def item_clicked(self, modelindex: QModelIndex) -> None:
- self.edited_item = self.currentItem()
- self.closePersistentEditor(self.edited_item)
- item = self.item(modelindex.row())
- # time.sleep(0.2)
- self.edited_item = item
- self.openPersistentEditor(item)
- # time.sleep(0.2)
- self.editItem(item)
+ self.edited_item = self.item(modelindex.row())
+ self.openPersistentEditor(self.edited_item)
+ self.editItem(self.edited_item)
def mouseDoubleClickEvent(self, event):
- # close edit
- for i in range(self.count()):
- self.closePersistentEditor(self.item(i))
+ pass
def leaveEvent(self, event):
# close edit
for i in range(self.count()):
- self.closePersistentEditor(self.item(i))
\ No newline at end of file
+ self.closePersistentEditor(self.item(i))
diff --git a/PPOCRLabel/libs/shape.py b/PPOCRLabel/libs/shape.py
index e2cdcb322790c9b6edd3c504405ad65097a7bc49..528b1102b010ceef8fa1057309e652010a91376d 100644
--- a/PPOCRLabel/libs/shape.py
+++ b/PPOCRLabel/libs/shape.py
@@ -10,19 +10,14 @@
# SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-#!/usr/bin/python
+# !/usr/bin/python
# -*- coding: utf-8 -*-
+import math
+import sys
-
-try:
- from PyQt5.QtGui import *
- from PyQt5.QtCore import *
-except ImportError:
- from PyQt4.QtGui import *
- from PyQt4.QtCore import *
-
+from PyQt5.QtCore import QPointF
+from PyQt5.QtGui import QColor, QPen, QPainterPath, QFont
from libs.utils import distance
-import sys
DEFAULT_LINE_COLOR = QColor(0, 255, 0, 128)
DEFAULT_FILL_COLOR = QColor(255, 0, 0, 128)
@@ -59,6 +54,8 @@ class Shape(object):
self.difficult = difficult
self.paintLabel = paintLabel
self.locked = False
+ self.direction = 0
+ self.center = None
self._highlightIndex = None
self._highlightMode = self.NEAR_VERTEX
self._highlightSettings = {
@@ -74,7 +71,24 @@ class Shape(object):
# is used for drawing the pending line a different color.
self.line_color = line_color
+ def rotate(self, theta):
+ for i, p in enumerate(self.points):
+ self.points[i] = self.rotatePoint(p, theta)
+ self.direction -= theta
+ self.direction = self.direction % (2 * math.pi)
+
+ def rotatePoint(self, p, theta):
+ order = p - self.center
+ cosTheta = math.cos(theta)
+ sinTheta = math.sin(theta)
+ pResx = cosTheta * order.x() + sinTheta * order.y()
+ pResy = - sinTheta * order.x() + cosTheta * order.y()
+ pRes = QPointF(self.center.x() + pResx, self.center.y() + pResy)
+ return pRes
+
def close(self):
+ self.center = QPointF((self.points[0].x() + self.points[2].x()) / 2,
+ (self.points[0].y() + self.points[2].y()) / 2)
self._closed = True
def reachMaxPoints(self):
@@ -83,7 +97,9 @@ class Shape(object):
return False
def addPoint(self, point):
- if not self.reachMaxPoints(): # 4个点时发出close信号
+ if self.reachMaxPoints():
+ self.close()
+ else:
self.points.append(point)
def popPoint(self):
@@ -112,7 +128,7 @@ class Shape(object):
# Uncommenting the following line will draw 2 paths
# for the 1st vertex, and make it non-filled, which
# may be desirable.
- #self.drawVertex(vrtx_path, 0)
+ # self.drawVertex(vrtx_path, 0)
for i, p in enumerate(self.points):
line_path.lineTo(p)
@@ -136,9 +152,9 @@ class Shape(object):
font.setPointSize(8)
font.setBold(True)
painter.setFont(font)
- if(self.label == None):
+ if self.label is None:
self.label = ""
- if(min_y < MIN_Y_LABEL):
+ if min_y < MIN_Y_LABEL:
min_y += MIN_Y_LABEL
painter.drawText(min_x, min_y, self.label)
@@ -198,6 +214,8 @@ class Shape(object):
def copy(self):
shape = Shape("%s" % self.label)
shape.points = [p for p in self.points]
+ shape.center = self.center
+ shape.direction = self.direction
shape.fill = self.fill
shape.selected = self.selected
shape._closed = self._closed
diff --git a/README.md b/README.md
index 8002e2b03ef63afbeb4de435b8ce1960375c2bd5..8936fbaa27c92fc64a7098a9e79cc0fe923910fb 100644
--- a/README.md
+++ b/README.md
@@ -33,17 +33,17 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
- [more](./doc/doc_en/update_en.md)
## Features
-- PP-OCR series of high-quality pre-trained models, comparable to commercial effects
+- PP-OCR - A series of high-quality pre-trained models, comparable to commercial products
- Ultra lightweight PP-OCRv2 series models: detection (3.1M) + direction classifier (1.4M) + recognition 8.5M) = 13.0M
- Ultra lightweight PP-OCR mobile series models: detection (3.0M) + direction classifier (1.4M) + recognition (5.0M) = 9.4M
- General PP-OCR server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M
- Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition
- - Support multi-language recognition: about 80 languages like Korean, Japanese, German, French, etc
+ - Support multi-lingual recognition: about 80 languages like Korean, Japanese, German, French, etc
- PP-Structure: a document structurize system
- - support layout analysis and table recognition (support export to Excel)
- - support key information extraction
- - support DocVQA
-- Rich toolkits related to the OCR areas
+ - Support layout analysis and table recognition (support export to Excel)
+ - Support key information extraction
+ - Support DocVQA
+- Rich OCR toolkit
- Semi-automatic data annotation tool, i.e., PPOCRLabel: support fast and efficient data annotation
- Data synthesis tool, i.e., Style-Text: easy to synthesize a large number of images which are similar to the target scene image
- Support user-defined training, provides rich predictive inference deployment solutions
@@ -62,7 +62,7 @@ The above pictures are the visualizations of the general ppocr_server model. For
## Community
-- Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation.
+- Scan the QR code below with your Wechat, you can join the official technical discussion group. Looking forward to your participation.
@@ -120,8 +120,8 @@ For a new language request, please refer to [Guideline for new language_requests
- [PP-Structure: Information Extraction](./ppstructure/README.md)
- [Layout Parser](./ppstructure/layout/README.md)
- [Table Recognition](./ppstructure/table/README.md)
- - [DocVQA](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)
- - [Key Information Extraction](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)
+ - [DocVQA](./ppstructure/vqa/README.md)
+ - [Key Information Extraction](./ppstructure/docs/kie.md)
- Academic Circles
- [Two-stage Algorithm](./doc/doc_en/algorithm_overview_en.md)
- [PGNet Algorithm](./doc/doc_en/pgnet_en.md)
diff --git a/README_ch.md b/README_ch.md
index d16b9c0ebff5b47238a7cc246edc4534e54f3efe..f39f3cb9acdb921773368170cae355bd9d29d4ce 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -99,8 +99,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [PP-Structure信息提取](./ppstructure/README_ch.md)
- [版面分析](./ppstructure/layout/README_ch.md)
- [表格识别](./ppstructure/table/README_ch.md)
- - [DocVQA](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)
- - [关键信息提取](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)
+ - [DocVQA](./ppstructure/vqa/README_ch.md)
+ - [关键信息提取](./ppstructure/docs/kie.md)
- OCR学术圈
- [两阶段模型介绍与下载](./doc/doc_ch/algorithm_overview.md)
- [端到端PGNet算法](./doc/doc_ch/pgnet.md)
diff --git a/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/Predictor.java b/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/Predictor.java
index b474d8886a10746b8ac181085c62481dfe7a4229..8bcd79b95b322a38dcd56d6ffe3a203d3d1ea6ae 100644
--- a/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/Predictor.java
+++ b/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/Predictor.java
@@ -160,6 +160,7 @@ public class Predictor {
for (String content : contents) {
wordLabels.add(content);
}
+ wordLabels.add(" ");
Log.i(TAG, "Word label size: " + wordLabels.size());
} catch (Exception e) {
Log.e(TAG, e.getMessage());
diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md
index 4daa73453507959ea10e21a7383d03d00aedf438..8c5a323af40e64f77e76cba23fd5c4408c643de5 100644
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -1,9 +1,8 @@
# Server-side C++ Inference
-This chapter introduces the C++ deployment method of the PaddleOCR model, and the corresponding python predictive deployment method refers to [document](../../doc/doc_ch/inference.md).
-C++ is better than python in terms of performance calculation. Therefore, in most CPU and GPU deployment scenarios, C++ deployment is mostly used.
-This section will introduce how to configure the C++ environment and complete it in the Linux\Windows (CPU\GPU) environment
-PaddleOCR model deployment.
+This chapter introduces the C++ deployment steps of the PaddleOCR model. The corresponding Python predictive deployment method refers to [document](../../doc/doc_ch/inference.md).
+C++ is better than python in terms of performance. Therefore, in CPU and GPU deployment scenarios, C++ deployment is mostly used.
+This section will introduce how to configure the C++ environment and deploy PaddleOCR in Linux (CPU\GPU) environment. For Windows deployment please refer to [Windows](./docs/windows_vs2019_build.md) compilation guidelines.
## 1. Prepare the Environment
@@ -15,7 +14,7 @@ PaddleOCR model deployment.
### 1.1 Compile OpenCV
-* First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows.
+* First of all, you need to download the source code compiled package in the Linux environment from the OpenCV official website. Taking OpenCV 3.4.7 as an example, the download command is as follows.
```bash
cd deploy/cpp_infer
@@ -23,9 +22,9 @@ wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz
tar -xf opencv-3.4.7.tar.gz
```
-Finally, you can see the folder of `opencv-3.4.7/` in the current directory.
+Finally, you will see the folder of `opencv-3.4.7/` in the current directory.
-* Compile opencv, the opencv source path (`root_path`) and installation path (`install_path`) should be set by yourself. Enter the opencv source code path and compile it in the following way.
+* Compile OpenCV, the OpenCV source path (`root_path`) and installation path (`install_path`) should be set by yourself. Enter the OpenCV source code path and compile it in the following way.
```shell
@@ -58,11 +57,11 @@ make -j
make install
```
-Among them, `root_path` is the downloaded opencv source code path, and `install_path` is the installation path of opencv. After `make install` is completed, the opencv header file and library file will be generated in this folder for later OCR source code compilation.
+In the above commands, `root_path` is the downloaded OpenCV source code path, and `install_path` is the installation path of OpenCV. After `make install` is completed, the OpenCV header file and library file will be generated in this folder for later OCR source code compilation.
-The final file structure under the opencv installation path is as follows.
+The final file structure under the OpenCV installation path is as follows.
```
opencv3/
@@ -79,20 +78,20 @@ opencv3/
#### 1.2.1 Direct download and installation
-[Paddle inference library official website](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html). You can view and select the appropriate version of the inference library on the official website.
+[Paddle inference library official website](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html). You can review and select the appropriate version of the inference library on the official website.
-* After downloading, use the following method to uncompress.
+* After downloading, use the following command to extract files.
```
tar -xf paddle_inference.tgz
```
-Finally you can see the following files in the folder of `paddle_inference/`.
+Finally you will see the the folder of `paddle_inference/` in the current path.
-#### 1.2.2 Compile from the source code
-* If you want to get the latest Paddle inference library features, you can download the latest code from Paddle github repository and compile the inference library from the source code. It is recommended to download the inference library with paddle version greater than or equal to 2.0.1.
-* You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from github, and then compile To generate the latest inference library. The method of using git to access the code is as follows.
+#### 1.2.2 Compile the inference source code
+* If you want to get the latest Paddle inference library features, you can download the latest code from Paddle GitHub repository and compile the inference library from the source code. It is recommended to download the inference library with paddle version greater than or equal to 2.0.1.
+* You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from GitHub, and then compile To generate the latest inference library. The method of using git to access the code is as follows.
```shell
@@ -100,7 +99,7 @@ git clone https://github.com/PaddlePaddle/Paddle.git
git checkout develop
```
-* After entering the Paddle directory, the commands to compile the paddle inference library are as follows.
+* Enter the Paddle directory and run the following commands to compile the paddle inference library.
```shell
rm -rf build
@@ -133,14 +132,14 @@ build/paddle_inference_install_dir/
|-- version.txt
```
-Among them, `paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library.
+`paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library.
## 2. Compile and Run the Demo
### 2.1 Export the inference model
-* You can refer to [Model inference](../../doc/doc_ch/inference.md),export the inference model. After the model is exported, assuming it is placed in the `inference` directory, the directory structure is as follows.
+* You can refer to [Model inference](../../doc/doc_ch/inference.md) and export the inference model. After the model is exported, assuming it is placed in the `inference` directory, the directory structure is as follows.
```
inference/
@@ -171,20 +170,28 @@ CUDA_LIB_DIR=your_cuda_lib_dir
CUDNN_LIB_DIR=your_cudnn_lib_dir
```
-`OPENCV_DIR` is the opencv installation path; `LIB_DIR` is the download (`paddle_inference` folder)
+`OPENCV_DIR` is the OpenCV installation path; `LIB_DIR` is the download (`paddle_inference` folder)
or the generated Paddle inference library path (`build/paddle_inference_install_dir` folder);
-`CUDA_LIB_DIR` is the cuda library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cudnn library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`.
+`CUDA_LIB_DIR` is the CUDA library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cuDNN library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`.
* After the compilation is completed, an executable file named `ppocr` will be generated in the `build` folder.
### Run the demo
-Execute the built executable file:
+Execute the built executable file:
```shell
./build/ppocr
[--param1] [--param2] [...]
```
-Here, `mode` is a required parameter,and the value range is ['det', 'rec', 'system'], representing using detection only, using recognition only and using the end-to-end system respectively. Specifically,
+`mode` is a required parameter,and the valid values are
+
+mode value | Model used
+-----|------
+det | Detection only
+rec | Recognition only
+system | End-to-end system
+
+Specifically,
##### 1. run det demo:
```shell
@@ -214,9 +221,9 @@ Here, `mode` is a required parameter,and the value range is ['det', 'rec', 'sy
--image_dir=../../doc/imgs/12.jpg
```
-More parameters are as follows,
+More parameters are as follows,
-- common parameters
+- Common parameters
|parameter|data type|default|meaning|
| --- | --- | --- | --- |
@@ -226,7 +233,7 @@ More parameters are as follows,
|cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed|
|use_mkldnn|bool|true|Whether to use mkdlnn library|
-- detection related parameters
+- Detection related parameters
|parameter|data type|default|meaning|
| --- | --- | --- | --- |
@@ -238,7 +245,7 @@ More parameters are as follows,
|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.|
|visualize|bool|true|Whether to visualize the results,when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.|
-- classifier related parameters
+- Classifier related parameters
|parameter|data type|default|meaning|
| --- | --- | --- | --- |
@@ -246,7 +253,7 @@ More parameters are as follows,
|cls_model_dir|string|-|Address of direction classifier inference model|
|cls_thresh|float|0.9|Score threshold of the direction classifier|
-- recogniton related parameters
+- Recognition related parameters
|parameter|data type|default|meaning|
| --- | --- | --- | --- |
@@ -265,4 +272,4 @@ The detection results will be shown on the screen, which is as follows.
### 2.3 Notes
-* Paddle2.0.0 inference model library is recommended for this toturial.
+* Paddle 2.0.0 inference model library is recommended for this tutorial.
diff --git a/deploy/docker/hubserving/README.md b/deploy/docker/hubserving/README.md
index d4db277ffbeaf1efba18c0caef550404e08f2e85..a9cfc898105de146b32cb44eddcb85ddfe74687b 100644
--- a/deploy/docker/hubserving/README.md
+++ b/deploy/docker/hubserving/README.md
@@ -1,9 +1,9 @@
English | [简体中文](README_cn.md)
## Introduction
-Many users hope package the PaddleOCR service into a docker image, so that it can be quickly released and used in the docker or k8s environment.
+Many users hope package the PaddleOCR service into a docker image, so that it can be quickly released and used in the docker or K8s environment.
-This page provides some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the futrue)
+This page provides some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the future)
## 1. Prerequisites
@@ -14,7 +14,7 @@ c. NVIDIA Container Toolkit(GPU,Docker 19.03+ can skip this)
d. cuDNN 7.6+(GPU)
## 2. Build Image
-a. Goto Dockerfile directory(ps:Need to distinguish between cpu and gpu version, the following takes cpu as an example, gpu version needs to replace the keyword)
+a. Go to Dockerfile directory(PS: Need to distinguish between CPU and GPU version, the following takes CPU as an example, GPU version needs to replace the keyword)
```
cd deploy/docker/hubserving/cpu
```
@@ -42,13 +42,13 @@ docker logs -f paddle_ocr
```
## 4. Test
-a. Calculate the Base64 encoding of the picture to be recognized (if you just test, you can use a free online tool, like:https://freeonlinetools24.com/base64-image/)
+a. Calculate the Base64 encoding of the picture to be recognized (For test purpose, you can use a free online tool such as https://freeonlinetools24.com/base64-image/ )
b. Post a service request(sample request in sample_request.txt)
```
curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"Input image Base64 encode(need to delete the code 'data:image/jpg;base64,')\"]}" http://localhost:8868/predict/ocr_system
```
-c. Get resposne(If the call is successful, the following result will be returned)
+c. Get response(If the call is successful, the following result will be returned)
```
{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
```
diff --git a/deploy/lite/readme_en.md b/deploy/lite/readme_en.md
index 2c04c5673b1d7afec99d4e1219a20462c55be203..65dd10b710ed23ceb6ba01cd8f29b1bd01f1cf09 100644
--- a/deploy/lite/readme_en.md
+++ b/deploy/lite/readme_en.md
@@ -1,8 +1,8 @@
# Tutorial of PaddleOCR Mobile deployment
-This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy paddleOCR ultra-lightweight Chinese and English detection models on mobile phones.
+This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy PaddleOCR ultra-lightweight Chinese and English detection models on mobile phones.
-paddle-lite is a lightweight inference engine for PaddlePaddle. It provides efficient inference capabilities for mobile phones and IoTs, and extensively integrates cross-platform hardware to provide lightweight deployment solutions for end-side deployment issues.
+paddle-lite is a lightweight inference engine for PaddlePaddle. It provides efficient inference capabilities for mobile phones and IoT, and extensively integrates cross-platform hardware to provide lightweight deployment solutions for end-side deployment issues.
## 1. Preparation
diff --git a/deploy/pdserving/README_CN.md b/deploy/pdserving/README_CN.md
index 00024639b0b108225a0835499f62174b6618ae47..2652ddeb86ee16549cbad3cd205e26cf4ea5f01b 100644
--- a/deploy/pdserving/README_CN.md
+++ b/deploy/pdserving/README_CN.md
@@ -22,6 +22,7 @@ PaddleOCR提供2种服务部署方式:
- [环境准备](#环境准备)
- [模型转换](#模型转换)
- [Paddle Serving pipeline部署](#部署)
+- [Windows用户](#Windows用户)
- [FAQ](#FAQ)
@@ -187,9 +188,10 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \
2021-05-13 03:42:36,979 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0]
```
-## WINDOWS用户
+
+## Windows用户
-Windows用户不能使用上述的启动方式,需要使用Web Service,详情参见[Windows平台使用Paddle Serving指导](https://github.com/PaddlePaddle/Serving/blob/develop/doc/WINDOWS_TUTORIAL_CN.md)
+Windows用户不能使用上述的启动方式,需要使用Web Service,详情参见[Windows平台使用Paddle Serving指导](https://github.com/PaddlePaddle/Serving/blob/develop/doc/Windows_Tutorial_CN.md)
**WINDOWS只能使用0.5.0版本的CPU模式**
diff --git a/deploy/slim/prune/README_en.md b/deploy/slim/prune/README_en.md
index fe9c5dcd6660757d11a884d4dbe077e95fad8afe..f0d652f249686c1d462cd2aa71f4766cf39e763e 100644
--- a/deploy/slim/prune/README_en.md
+++ b/deploy/slim/prune/README_en.md
@@ -28,14 +28,14 @@ python3 setup.py install
```
-### 2. Download Pretrain Model
+### 2. Download Pre-trained Model
Model prune needs to load pre-trained models.
PaddleOCR also provides a series of [models](../../../doc/doc_en/models_list_en.md). Developers can choose their own models or use their own models according to their needs.
### 3. Pruning sensitivity analysis
- After the pre-training model is loaded, sensitivity analysis is performed on each network layer of the model to understand the redundancy of each network layer, and save a sensitivity file which named: sen.pickle. After that, user could load the sensitivity file via the [methods provided by PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/prune/sensitive.py#L221) and determining the pruning ratio of each network layer automatically. For specific details of sensitivity analysis, see:[Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md)
+ After the pre-trained model is loaded, sensitivity analysis is performed on each network layer of the model to understand the redundancy of each network layer, and save a sensitivity file which named: sen.pickle. After that, user could load the sensitivity file via the [methods provided by PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/prune/sensitive.py#L221) and determining the pruning ratio of each network layer automatically. For specific details of sensitivity analysis, see:[Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md)
The data format of sensitivity file:
sen.pickle(Dict){
'layer_weight_name_0': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss}
@@ -47,7 +47,7 @@ PaddleOCR also provides a series of [models](../../../doc/doc_en/models_list_en.
'conv10_expand_weights': {0.1: 0.006509952684312718, 0.2: 0.01827734339798862, 0.3: 0.014528405644659832, 0.6: 0.06536008804270439, 0.8: 0.11798612250664964, 0.7: 0.12391408417493704, 0.4: 0.030615754498018757, 0.5: 0.047105205602406594}
'conv10_linear_weights': {0.1: 0.05113190831455035, 0.2: 0.07705573833558801, 0.3: 0.12096721757739311, 0.6: 0.5135061352930738, 0.8: 0.7908166677143281, 0.7: 0.7272187676899062, 0.4: 0.1819252083008504, 0.5: 0.3728054727792405}
}
- The function would return a dict after loading the sensitivity file. The keys of the dict are name of parameters in each layer. And the value of key is the information about pruning sensitivity of correspoding layer. In example, pruning 10% filter of the layer corresponding to conv10_expand_weights would lead to 0.65% degradation of model performance. The details could be seen at: [Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/algo/algo.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
+ The function would return a dict after loading the sensitivity file. The keys of the dict are name of parameters in each layer. And the value of key is the information about pruning sensitivity of corresponding layer. In example, pruning 10% filter of the layer corresponding to conv10_expand_weights would lead to 0.65% degradation of model performance. The details could be seen at: [Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/algo/algo.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
Enter the PaddleOCR root directory,perform sensitivity analysis on the model with the following command:
diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md
index bf3e91d69a298a7aaa8da21c22c45e62713e50cf..4cafe5f44e48a479cf5b0e4209b8e335a7e4917d 100644
--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
@@ -1,7 +1,7 @@
## Introduction
-Generally, a more complex model would achive better performance in the task, but it also leads to some redundancy in the model.
+Generally, a more complex model would achieve better performance in the task, but it also leads to some redundancy in the model.
Quantization is a technique that reduces this redundancy by reducing the full precision data to a fixed number,
so as to reduce model calculation complexity and improve model inference performance.
@@ -31,14 +31,14 @@ python setup.py install
```
-### 2. Download Pretrain Model
-PaddleOCR provides a series of trained [models](../../../doc/doc_en/models_list_en.md).
+### 2. Download Pre-trained Model
+PaddleOCR provides a series of pre-trained [models](../../../doc/doc_en/models_list_en.md).
If the model to be quantified is not in the list, you need to follow the [Regular Training](../../../doc/doc_en/quickstart_en.md) method to get the trained model.
### 3. Quant-Aware Training
Quantization training includes offline quantization training and online quantization training.
-Online quantization training is more effective. It is necessary to load the pre-training model.
+Online quantization training is more effective. It is necessary to load the pre-trained model.
After the quantization strategy is defined, the model can be quantified.
The code for quantization training is located in `slim/quantization/quant.py`. For example, to train a detection model, the training instructions are as follows:
@@ -54,7 +54,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3
### 4. Export inference model
-After getting the model after pruning and finetuning we, can export it as inference_model for predictive deployment:
+Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks:
```bash
python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 0fd2f5b754e87da0926e7b28d40790c2f26cc0f6..0db6c6f7ff97a743d3f947d0588639ba267d9fc4 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -61,18 +61,18 @@ PaddleOCR基于动态图开源的文本识别算法列表:
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|---|---|---|---|---|
-|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
-|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
-|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
-|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
-|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
-|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
-|RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
-|RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
-|SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar) |
-|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |
-|SAR|Resnet31| 87.2% | rec_r31_sar | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) |
-|SEED|Aster_Resnet| 85.2% | rec_resnet_stn_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) |
+|Rosetta|Resnet34_vd|79.11%|rec_r34_vd_none_none_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
+|Rosetta|MobileNetV3|75.80%|rec_mv3_none_none_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
+|CRNN|Resnet34_vd|81.04%|rec_r34_vd_none_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
+|CRNN|MobileNetV3|77.95%|rec_mv3_none_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
+|StarNet|Resnet34_vd|82.85%|rec_r34_vd_tps_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
+|StarNet|MobileNetV3|79.28%|rec_mv3_tps_bilstm_ctc|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
+|RARE|Resnet34_vd|83.98%|rec_r34_vd_tps_bilstm_att |[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
+|RARE|MobileNetV3|81.76%|rec_mv3_tps_bilstm_att |[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
+|SRN|Resnet50_vd_fpn| 86.31% | rec_r50fpn_vd_none_srn | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar) |
+|NRTR|NRTR_MTB| 84.21% | rec_mtb_nrtr | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |
+|SAR|Resnet31| 87.20% | rec_r31_sar | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) |
+|SEED|Aster_Resnet| 85.35% | rec_resnet_stn_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) |
diff --git a/doc/doc_ch/android_demo.md b/doc/doc_ch/android_demo.md
index b6d9a7921b950a5a3c911802f0fa0ad40ab4d850..3b12308257c998387d0a95a46bcfdc7d8837caaf 100644
--- a/doc/doc_ch/android_demo.md
+++ b/doc/doc_ch/android_demo.md
@@ -14,12 +14,12 @@ Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编
1. Start a new Android Studio project
- 在项目模版中选择 Native C++ 选择PaddleOCR/depoly/android_demo 路径
+ 在项目模版中选择 Native C++ 选择PaddleOCR/deploy/android_demo 路径
进入项目后会自动编译,第一次编译会花费较长的时间,建议添加代理加速下载。
**代理添加:**
-选择 Android Studio -> Perferences -> Appearance & Behavior -> System Settings -> HTTP Proxy -> Manual proxy configuration
+选择 Android Studio -> Preferences -> Appearance & Behavior -> System Settings -> HTTP Proxy -> Manual proxy configuration
![](../demo/proxy.png)
diff --git a/doc/doc_ch/code_and_doc.md b/doc/doc_ch/code_and_doc.md
index 5ef76914689138a197292efc3654c3e591144fa7..43e28d7ab920133e2d99a78a40ca4c6af667b020 100644
--- a/doc/doc_ch/code_and_doc.md
+++ b/doc/doc_ch/code_and_doc.md
@@ -16,7 +16,7 @@ PaddleOCR的Python代码遵循 [PEP8规范](https://www.python.org/dev/peps/pep-
- 空格
- - 空格应该加在逗号、分号、冒号前,而非他们的后面
+ - 空格应该加在逗号、分号、冒号后,而非他们的前面
```python
# 正确:
@@ -334,4 +334,4 @@ git push origin new_branch
2)如果评审意见比较多:
- 请给出总体的修改情况。
-- 请采用`start a review`进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。
\ No newline at end of file
+- 请采用`start a review`进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。
diff --git a/doc/doc_ch/knowledge_distillation.md b/doc/doc_ch/knowledge_distillation.md
index 1c33a6c1fe13671a0f95bde4db63d0b9946ca180..c8ac40486871d4b63c799c22704725781889bce4 100644
--- a/doc/doc_ch/knowledge_distillation.md
+++ b/doc/doc_ch/knowledge_distillation.md
@@ -37,7 +37,7 @@
无论是大模型蒸馏小模型,还是小模型之间互相学习,更新参数,他们本质上是都是不同模型之间输出或者特征图(feature map)之间的相互监督,区别仅在于 (1) 模型是否需要固定参数。(2) 模型是否需要加载预训练模型。
-对于大模型蒸馏小模型的情况,大模型一般需要加载预训练模型并固定参数;对于小模型之间互相蒸馏的情况,小模型一般都不加载预训练模型,参数也都是可学习的状态。
+对于大模型蒸馏小模型的情况,大模型一般需要加载预训练模型并固定参数;对于小模型之间互相蒸馏的情况,小模型一般都不加载预训练模型,参数也都是可学习的状态。
在知识蒸馏任务中,不只有2个模型之间进行蒸馏的情况,多个模型之间互相学习的情况也非常普遍。因此在知识蒸馏代码框架中,也有必要支持该种类别的蒸馏方法。
@@ -550,7 +550,7 @@ Metric:
- 采用ch_PP-OCRv2_det_cml.yml,采用cml蒸馏,同样Teacher模型设置为PaddleOCR提供的模型或者您训练好的大模型
- 采用ch_PP-OCRv2_det_dml.yml,采用DML的蒸馏,两个Student模型互蒸馏的方法,在PaddleOCR采用的数据集上大约有1.7%的精度提升。
-在具体finetune时,需要在网络结构的`pretrained`参数中设置要加载的预训练模型。
+在具体fine-tune时,需要在网络结构的`pretrained`参数中设置要加载的预训练模型。
在精度提升方面,cml的精度>dml的精度>distill蒸馏方法的精度。当数据量不足或者Teacher模型精度与Student精度相差不大的时候,这个结论或许会改变。
diff --git a/doc/doc_ch/thirdparty.md b/doc/doc_ch/thirdparty.md
index 7466a6edf40d533b95cb6124da9d347dbcf877a6..d317d139a66057e9957a5b6edb9fe2d59a35427e 100644
--- a/doc/doc_ch/thirdparty.md
+++ b/doc/doc_ch/thirdparty.md
@@ -16,22 +16,20 @@ PaddleOCR希望可以通过AI的力量助力任何一位有梦想的开发者实
### 1.1 基于PaddleOCR的社区项目
-- 【最新】 [FastOCRLabel](https://gitee.com/BaoJianQiang/FastOCRLabel):完整的C#版本标注工具 (@ [包建强](https://gitee.com/BaoJianQiang) )
-
-#### 1.1.1 通用工具
-
-- [DangoOCR离线版](https://github.com/PantsuDango/DangoOCR):通用型桌面级即时翻译工具 (@ [PantsuDango](https://github.com/PantsuDango))
-- [scr2txt](https://github.com/lstwzd/scr2txt):截屏转文字工具 (@ [lstwzd](https://github.com/lstwzd))
-- [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/1054614?channelType=0&channel=0):英文视频自动生成字幕( @ [叶月水狐](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/322052))
-
-#### 1.1.2 垂类场景工具
-
-- [id_card_ocr](https://github.com/baseli/id_card_ocr):身份证复印件识别(@ [baseli](https://github.com/baseli))
-- [Paddle_Table_Image_Reader](https://github.com/thunder95/Paddle_Table_Image_Reader):能看懂表格图片的数据助手(@ [thunder95](https://github.com/thunder95]))
-
-#### 1.1.3 前后处理
-
-- [paddleOCRCorrectOutputs](https://github.com/yuranusduke/paddleOCRCorrectOutputs):获取OCR识别结果的key-value(@ [yuranusduke](https://github.com/yuranusduke))
+| 类别 | 项目 | 描述 | 开发者 |
+| -------- | ------------------------------------------------------------ | -------------------------- | ------------------------------------------------------------ |
+| 通用工具 | [FastOCRLabel](https://gitee.com/BaoJianQiang/FastOCRLabel) | 完整的C#版本标注GUI | [包建强](https://gitee.com/BaoJianQiang) |
+| 通用工具 | [DangoOCR离线版](https://github.com/PantsuDango/DangoOCR) | 通用型桌面级即时翻译GUI | [PantsuDango](https://github.com/PantsuDango) |
+| 通用工具 | [scr2txt](https://github.com/lstwzd/scr2txt) | 截屏转文字GUI | [lstwzd](https://github.com/lstwzd) |
+| 通用工具 | [ocr_sdk](https://github.com/mymagicpower/AIAS/blob/main/1_image_sdks/text_recognition/ocr_sdk) | OCR java SDK工具箱 | [Calvin](https://github.com/mymagicpower) |
+| 通用工具 | [iocr](https://github.com/mymagicpower/AIAS/blob/main/8_suite_hub/iocr) | IOCR 自定义模板识别(支持表格识别) | [Calvin](https://github.com/mymagicpower) |
+| 垂类工具 | [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/1054614?channelType=0&channel=0) | 英文视频自动生成字幕 | [叶月水狐](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/322052) |
+| 垂类工具 | [id_card_ocr](https://github.com/baseli/id_card_ocr) | 身份证复印件识别 | [baseli](https://github.com/baseli) |
+| 垂类工具 | [Paddle_Table_Image_Reader](https://github.com/thunder95/Paddle_Table_Image_Reader) | 能看懂表格图片的数据助手 | [thunder95](https://github.com/thunder95]) |
+| 前后处理 | [paddleOCRCorrectOutputs](https://github.com/yuranusduke/paddleOCRCorrectOutputs) | 获取OCR识别结果的key-value | [yuranusduke](https://github.com/yuranusduke) |
+|前处理| [optlab](https://github.com/GreatV/optlab) |OCR前处理工具箱,基于Qt和Leptonica。|[GreatV](https://github.com/GreatV)|
+|应用部署| [PaddleOCRSharp](https://github.com/raoyutian/PaddleOCRSharp) |PaddleOCR的.NET封装与应用部署。|[raoyutian](https://github.com/raoyutian/PaddleOCRSharp)|
+| 学术前沿模型训练与推理 | [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/3397137) | StarNet-MobileNetV3算法–中文训练 | [xiaoyangyang2](https://github.com/xiaoyangyang2) |
### 1.2 为PaddleOCR新增功能
@@ -67,17 +65,17 @@ PaddleOCR非常欢迎社区贡献以PaddleOCR为核心的各种服务、部署
如果您在使用PaddleOCR时遇到了代码bug、功能不符合预期等问题,可以为PaddleOCR贡献您的修改,其中:
-- Python代码规范可参考[附录1:Python代码规范](./code_and_doc.md/#附录1)。
+- Python代码规范可参考[附录1:Python代码规范](./code_and_doc.md#附录1)。
-- 提交代码前请再三确认不会引入新的bug,并在PR中描述优化点。如果该PR解决了某个issue,请在PR中连接到该issue。所有的PR都应该遵守附录3中的[3.2.10 提交代码的一些约定。](./code_and_doc.md/#提交代码的一些约定)
+- 提交代码前请再三确认不会引入新的bug,并在PR中描述优化点。如果该PR解决了某个issue,请在PR中连接到该issue。所有的PR都应该遵守附录3中的[3.2.10 提交代码的一些约定。](./code_and_doc.md#提交代码的一些约定)
-- 请在提交之前参考下方的[附录3:Pull Request说明](./code_and_doc.md/#附录3)。如果您对git的提交流程不熟悉,同样可以参考附录3的3.2节。
+- 请在提交之前参考下方的[附录3:Pull Request说明](./code_and_doc.md#附录3)。如果您对git的提交流程不熟悉,同样可以参考附录3的3.2节。
**最后请在PR的题目中加上标签`【third-party】` , 在说明中@Evezerest,拥有此标签的PR将会被高优处理**。
### 2.3 文档优化
-如果您在使用PaddleOCR时遇到了文档表述不清楚、描述缺失、链接失效等问题,可以为PaddleOCR贡献您的修改。文档书写规范请参考[附录2:文档规范](./code_and_doc.md/#附录2)。**最后请在PR的题目中加上标签`【third-party】` , 在说明中@Evezerest,拥有此标签的PR将会被高优处理。**
+如果您在使用PaddleOCR时遇到了文档表述不清楚、描述缺失、链接失效等问题,可以为PaddleOCR贡献您的修改。文档书写规范请参考[附录2:文档规范](./code_and_doc.md#附录2)。**最后请在PR的题目中加上标签`【third-party】` , 在说明中@Evezerest,拥有此标签的PR将会被高优处理。**
## 3. 更多贡献机会
diff --git a/doc/doc_ch/update.md b/doc/doc_ch/update.md
index de5cdaf2aa24aa4c32e81001cdccec1156ee8605..c4c870681c6ccb5ad7702101312e5dbe47e9cb85 100644
--- a/doc/doc_ch/update.md
+++ b/doc/doc_ch/update.md
@@ -9,7 +9,7 @@
- 2020.12.07 [FAQ](../../doc/doc_ch/FAQ.md)新增5个高频问题,总数124个,并且计划以后每周一都会更新,欢迎大家持续关注。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](../../PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
-- 2020.9.19 更新超轻量压缩ppocr_mobile_slim系列模型,整体模型3.5M(详见PP-OCR Pipline),适合在移动端部署使用。
+- 2020.9.19 更新超轻量压缩ppocr_mobile_slim系列模型,整体模型3.5M(详见PP-OCR Pipeline),适合在移动端部署使用。
- 2020.9.17 更新超轻量ppocr_mobile系列和通用ppocr_server系列中英文ocr模型,媲美商业效果。
- 2020.9.17 更新[英文识别模型](./models_list.md#english-recognition-model)和[多语种识别模型](./models_list.md#english-recognition-model),已支持`德语、法语、日语、韩语`,更多语种识别模型将持续更新。
- 2020.8.26 更新OCR相关的84个常见问题及解答,具体参考[FAQ](./FAQ.md)
diff --git a/doc/doc_en/FAQ_en.md b/doc/doc_en/FAQ_en.md
index 25777be77b6393c09c38e3c319ca1bd50cc3b1e8..5cf82a78720d15ce5b0aac37c409921474923813 100644
--- a/doc/doc_en/FAQ_en.md
+++ b/doc/doc_en/FAQ_en.md
@@ -1,7 +1,7 @@
## FAQ
1. **Prediction error: got an unexpected keyword argument 'gradient_clip'**
-The installed version of paddle is incorrect. Currently, this project only supports paddle1.7, which will be adapted to 1.8 in the near future.
+The installed version of paddle is incorrect. Currently, this project only supports Paddle 1.7, which will be adapted to 1.8 in the near future.
2. **Error when converting attention recognition model: KeyError: 'predict'**
Solved. Please update to the latest version of the code.
@@ -31,7 +31,7 @@ At present, PaddleOCR has opensourced two Chinese models, namely 8.6M ultra-ligh
|General Chinese OCR model|Resnet50_vd+Resnet34_vd|det_r50_vd_db.yml|rec_chinese_common_train.yml|
8. **Is there a plan to opensource a model that only recognizes numbers or only English + numbers?**
-It is not planned to opensource numbers only, numbers + English only, or other vertical text models. Paddleocr has opensourced a variety of detection and recognition algorithms for customized training. The two Chinese models are also based on the training output of the open-source algorithm library. You can prepare the data according to the tutorial, choose the appropriate configuration file, train yourselves, and we believe that you can get good result. If you have any questions during the training, you are welcome to open issues or ask in the communication group. We will answer them in time.
+It is not planned to opensource numbers only, numbers + English only, or other vertical text models. PaddleOCR has opensourced a variety of detection and recognition algorithms for customized training. The two Chinese models are also based on the training output of the open-source algorithm library. You can prepare the data according to the tutorial, choose the appropriate configuration file, train yourselves, and we believe that you can get good result. If you have any questions during the training, you are welcome to open issues or ask in the communication group. We will answer them in time.
9. **What is the training data used by the open-source model? Can it be opensourced?**
At present, the open source model, dataset and magnitude are as follows:
@@ -46,11 +46,11 @@ At present, the open source model, dataset and magnitude are as follows:
10. **Error in using the model with TPS module for prediction**
Error message: Input(X) dims[3] and Input(Grid) dims[2] should be equal, but received X dimension[3]\(108) != Grid dimension[2]\(100)
-Solution:TPS does not support variable shape. Please set --rec_image_shape='3,32,100' and --rec_char_type='en'
+Solution: TPS does not support variable shape. Please set --rec_image_shape='3,32,100' and --rec_char_type='en'
11. **Custom dictionary used during training, the recognition results show that words do not appear in the dictionary**
The used custom dictionary path is not set when making prediction. The solution is setting parameter `rec_char_dict_path` to the corresponding dictionary file.
12. **Results of cpp_infer and python_inference are very different**
-Versions of exprted inference model and inference libraray should be same. For example, on Windows platform, version of the inference libraray that PaddlePaddle provides is 1.8, but version of the inference model that PaddleOCR provides is 1.7, you should export model yourself(`tools/export_model.py`) on PaddlePaddle1.8 and then use the exported model for inference.
+Versions of exported inference model and inference library should be same. For example, on Windows platform, version of the inference library that PaddlePaddle provides is 1.8, but version of the inference model that PaddleOCR provides is 1.7, you should export model yourself(`tools/export_model.py`) on PaddlePaddle 1.8 and then use the exported model for inference.
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index 8d27613ba88660d760cfe1dcd3a1872ece1a51ad..3e94360653b17443536297f33c05e338656bd89b 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -67,20 +67,20 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
|Model|Backbone|Avg Accuracy|Module combination|Download link|
|---|---|---|---|---|
-|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
-|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
-|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
-|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
-|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
-|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
-|RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
-|RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
-|SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar)|
-|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |
-|SAR|Resnet31| 87.2% | rec_r31_sar | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) |
-|SEED|Aster_Resnet| 85.2% | rec_resnet_stn_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) |
-
-Please refer to the document for training guide and use of PaddleOCR
+|Rosetta|Resnet34_vd|79.11%|rec_r34_vd_none_none_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
+|Rosetta|MobileNetV3|75.80%|rec_mv3_none_none_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
+|CRNN|Resnet34_vd|81.04%|rec_r34_vd_none_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
+|CRNN|MobileNetV3|77.95%|rec_mv3_none_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
+|StarNet|Resnet34_vd|82.85%|rec_r34_vd_tps_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
+|StarNet|MobileNetV3|79.28%|rec_mv3_tps_bilstm_ctc|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
+|RARE|Resnet34_vd|83.98%|rec_r34_vd_tps_bilstm_att |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
+|RARE|MobileNetV3|81.76%|rec_mv3_tps_bilstm_att |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
+|SRN|Resnet50_vd_fpn| 86.31% | rec_r50fpn_vd_none_srn |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar)|
+|NRTR|NRTR_MTB| 84.21% | rec_mtb_nrtr | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |
+|SAR|Resnet31| 87.20% | rec_r31_sar | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) |
+|SEED|Aster_Resnet| 85.35% | rec_resnet_stn_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) |
+
+Please refer to the document for training guide and use of PaddleOCR
## 2. Training
diff --git a/doc/doc_en/android_demo_en.md b/doc/doc_en/android_demo_en.md
index bab4f5275c785d0dbbc1456ae29b98ff82cbae1a..fd962bb2911d952a4ec9919eebf1903daca323c7 100644
--- a/doc/doc_en/android_demo_en.md
+++ b/doc/doc_en/android_demo_en.md
@@ -20,7 +20,7 @@ File -> New ->New Project to create "Native C++" project
**Agent add:**
- Android Studio -> Perferences -> Appearance & Behavior -> System Settings -> HTTP Proxy -> Manual proxy configuration
+ Android Studio -> Preferences -> Appearance & Behavior -> System Settings -> HTTP Proxy -> Manual proxy configuration
![](../demo/proxy.png)
diff --git a/doc/doc_en/angle_class_en.md b/doc/doc_en/angle_class_en.md
index b7fcd63e070318d3aab37714a1213ad9f56cb6fc..00888fb04b70753d3dbdd785b3d46f286aad0315 100644
--- a/doc/doc_en/angle_class_en.md
+++ b/doc/doc_en/angle_class_en.md
@@ -92,7 +92,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c
PaddleOCR provides a variety of data augmentation methods. If you want to add disturbance during training, Please uncomment the `RecAug` and `RandAugment` fields under `Train.dataset.transforms` in the configuration file.
-The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse, RandAugment.
+The default perturbation methods are: cvtColor, blur, jitter, Gauss noise, random crop, perspective, color reverse, RandAugment.
Except for RandAugment, each disturbance method is selected with a 50% probability during the training process. For specific code implementation, please refer to:
[rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)
diff --git a/doc/doc_en/code_and_doc.md b/doc/doc_en/code_and_doc.md
new file mode 100644
index 0000000000000000000000000000000000000000..f3ee769e7dd7184226f5385056f2ef04c5000dbf
--- /dev/null
+++ b/doc/doc_en/code_and_doc.md
@@ -0,0 +1,349 @@
+ - Appendix
+
+ This appendix contains python, document specifications and Pull Request process. Please follow the relevant contents
+
+ - [Appendix 1:Python Code Specification](#Appendix1)
+
+ - [Appendix 2:Document Specification](#Appendix2)
+
+ - [Appendix 3:Pull Request Description](#Appendix3)
+
+
+
+ ## Appendix 1:Python Code Specification
+
+ The Python code of PaddleOCR follows [PEP8 Specification]( https://www.python.org/dev/peps/pep-0008/ ), some of the key concerns include the following
+
+ - Space
+
+ - Spaces should be added after commas, semicolons, colons, not before them
+
+ ```python
+ # true:
+ print(x, y)
+
+ # false:
+ print(x , y)
+ ```
+
+ - When specifying a keyword parameter or default parameter value in a function, do not use spaces on both sides of it
+
+ ```python
+ # true:
+ def complex(real, imag=0.0)
+ # false:
+ def complex(real, imag = 0.0)
+ ```
+
+ - comment
+
+ - Inline comments: inline comments are indicated by the` # `sign. Two spaces should be left between code and` # `, and one space should be left between` # `and comments, for example
+
+ ```python
+ x = x + 1 # Compensate for border
+ ```
+
+ - Functions and methods: The definition of each function should include the following:
+
+ - Function description: Utility, input and output of function
+
+ - Args: Name and description of each parameter
+ - Returns: The meaning and type of the return value
+
+ ```python
+ def fetch_bigtable_rows(big_table, keys, other_silly_variable=None):
+ """Fetches rows from a Bigtable.
+
+ Retrieves rows pertaining to the given keys from the Table instance
+ represented by big_table. Silly things may happen if
+ other_silly_variable is not None.
+
+ Args:
+ big_table: An open Bigtable Table instance.
+ keys: A sequence of strings representing the key of each table row
+ to fetch.
+ other_silly_variable: Another optional variable, that has a much
+ longer name than the other args, and which does nothing.
+
+ Returns:
+ A dict mapping keys to the corresponding table row data
+ fetched. Each row is represented as a tuple of strings. For
+ example:
+
+ {'Serak': ('Rigel VII', 'Preparer'),
+ 'Zim': ('Irk', 'Invader'),
+ 'Lrrr': ('Omicron Persei 8', 'Emperor')}
+
+ If a key from the keys argument is missing from the dictionary,
+ then that row was not found in the table.
+ """
+ pass
+ ```
+
+
+
+ ## Appendix 2: Document Specification
+
+ ### 2.1 Overall Description
+
+ - Document Location: If you add new features to your original Markdown file, please **Do not re-create** a new file. If you don't know where to add it, you can first PR the code and then ask the official in commit.
+
+ - New Markdown Document Name: Describe the content of the document in English, typically a combination of lowercase letters and underscores, such as `add_New_Algorithm.md`
+
+ - New Markdown Document Format: Catalog - Body - FAQ
+
+ > The directory generation method can use [this site](https://ecotrust-canada.github.io/markdown-toc/ ) Automatically extract directories after copying MD contents, and then add ` before each heading of the MD file
+
+ - English and Chinese: Any changes or additions to the document need to be made in both Chinese and English documents.
+
+ ### 2.2 Format Specification
+
+ - Title format: The document title format follows the format of: Arabic decimal point combination-space-title (for example, `2.1 XXXX`, `2.XXXX`)
+
+ - Code block: Displays code in code block format that needs to be run, describing the meaning of command parameters before the code block. for example:
+
+ > Pipeline of detection + direction Classify + recognition: Vertical text can be recognized after set direction classifier parameters`--use_angle_cls true`.
+ >
+ > ```
+ > paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true
+ > ```
+
+ - Variable Rrferences: If code variables or command parameters are referenced in line, they need to be represented in line code, for example, above `--use_angle_cls true` with one space in front and one space in back
+
+ - Uniform naming: e.g. PP-OCRv2, PP-OCR mobile, `paddleocr` whl package, PPOCRLabel, Paddle Lite, etc.
+
+ - Supplementary notes: Supplementary notes by reference format `>`.
+
+ - Picture: If a picture is added to the description document, specify the naming of the picture (describing its content) and add the picture under `doc/`.
+
+ - Title: Capitalize the first letter of each word in the title.
+
+
+
+ ## Appendix 3: Pull Request Description
+
+ ### 3.1 PaddleOCR Branch Description
+
+ PaddleOCR will maintain two branches in the future, one for each:
+
+ - release/x.x family branch: stable release version branch, also the default branch. PaddleOCR releases a new release branch based on feature updates and adapts to the release version of Paddle. As versions iterate, more and more release/x.x family branches are maintained by default with the latest version of the release branch.
+ - dygraph branch: For the development branch, adapts the dygraph version of the Paddle dynamic graph to primarily develop new functionality. If you need to redevelop, choose the dygraph branch. To ensure that the dygraph branch pulls out the release/x.x branch when needed, the code for the dygraph branch can only use the valid API in the latest release branch of Paddle. That is, if a new API has been developed in the Paddle dygraph branch but has not yet appeared in the release branch code, do not use it in Paddle OCR. In addition, performance optimization, parameter tuning, policy updates that do not involve API can be developed normally.
+
+ The historical branch of PaddleOCR will no longer be maintained in the future. These branches will continue to be maintained, considering that some of you may still be using them:
+
+ - Develop branch: This branch was used for the development and testing of static diagrams and is currently compatible with version >=1.7. If you have special needs, you can also use this branch to accommodate older versions of Paddle, but you won't update your code until you fix the bug.
+
+ PaddleOCR welcomes you to actively contribute code to repo. Here are some basic processes for contributing code.
+
+ ### 3.2 PaddleOCR Code Submission Process And Specification
+
+ > If you are familiar with Git use, you can jump directly to [Some Conventions For Submitting Code in 3.2.10](#Some_conventions_for_submitting_code)
+
+ #### 3.2.1 Create Your `Remote Repo`
+
+ - In PaddleOCR [GitHub Home]( https://github.com/PaddlePaddle/PaddleOCR ) Click the `Fork` button in the upper left corner to create a `remote repo`in your personal directory, such as ` https://github.com/ {your_name}/PaddleOCR`.
+
+ ![banner](../banner.png)
+
+ - Clone `Remote repo`
+
+ ```
+ # pull code of develop branch
+ git clone https://github.com/{your_name}/PaddleOCR.git -b dygraph
+ cd PaddleOCR
+ ```
+
+ > Clone failures are mostly due to network reasons, try again later or configure the proxy
+
+ #### 3.2.2 Login And Connect Using Token
+
+ Start by viewing the information for the current `remote repo`.
+
+ ```
+ git remote -v
+ # origin https://github.com/{your_name}/PaddleOCR.git (fetch)
+ # origin https://github.com/{your_name}/PaddleOCR.git (push)
+ ```
+
+ Only the information of the clone `remote repo`, i.e. the PaddleOCR under your username, is available. Due to the change in Github's login method, you need to reconfigure the `remote repo` address by means of a Token. The token is generated as follows:
+
+ 1. Find Personal Access Tokens: Click on your avatar in the upper right corner of the Github page and choose Settings --> Developer settings --> Personal access tokens,
+
+ 2. Click Generate new token: Fill in the token name in Note, such as 'paddle'. In Select scopes, select repo (required), admin:repo_hook, delete_repo, etc. You can check them according to your needs. Then click Generate token to generate the token, and finally copy the generated token.
+
+ Delete the original origin configuration
+
+ ```
+ git remote rm origin
+ ```
+
+ Change the remote branch to `https://oauth2:{token}@github.com/{your_name}/PaddleOCR.git`. For example, if the token value is 12345 and your user name is PPOCR, run the following command
+
+ ```
+ git remote add origin https://oauth2:12345@github.com/PPOCR/PaddleOCR.git
+ ```
+
+ This establishes a connection to our own `remote repo`. Next we create a remote host of the original PaddleOCR repo, named upstream.
+
+ ```
+ git remote add upstream https://github.com/PaddlePaddle/PaddleOCR.git
+ ```
+
+ Use `git remote -v` to view current `remote warehouse` information, output as follows, found to include two origin and two upstream of `remote repo` .
+
+ ```
+ origin https://github.com/{your_name}/PaddleOCR.git (fetch)
+ origin https://github.com/{your_name}/PaddleOCR.git (push)
+ upstream https://github.com/PaddlePaddle/PaddleOCR.git (fetch)
+ upstream https://github.com/PaddlePaddle/PaddleOCR.git (push)
+ ```
+
+ This is mainly to keep the local repository up to date when subsequent pull request (PR) submissions are made.
+
+ #### 3.2.3 Create Local Branch
+
+ First get the latest code of upstream, then create a new_branch branch based on the dygraph of the upstream repo (upstream).
+
+ ```
+ git fetch upstream
+ git checkout -b new_branch upstream/dygraph
+ ```
+
+ > If for a newly forked PaddleOCR project, the user's remote repo (origin) has the same branch updates as the upstream repository (upstream), you can also create a new local branch based on the default branch of the origin repo or a specified branch with the following command
+ >
+ > ```
+ > # Create new_branch branch on user remote repo (origin) based on develop branch
+ > git checkout -b new_branch origin/develop
+ > # Create new_branch branch based on upstream remote repo develop branch
+ > # If you need to create a new branch from upstream,
+ > # you need to first use git fetch upstream to get upstream code
+ > git checkout -b new_branch upstream/develop
+ > ```
+
+ The final switch to the new branch is displayed with the following output information.
+
+ ```
+ Branch new_branch set up to track remote branch develop from upstream.
+ Switched to a new branch 'new_branch'
+ ```
+
+ After switching branches, file changes can be made on this branch
+
+ #### 3.2.4 Use Pre-Commit Hook
+
+ Paddle developers use the pre-commit tool to manage Git pre-submit hooks. It helps us format the source code (C++, Python) and automatically check for basic things (such as having only one EOL per file, not adding large files to Git) before committing it.
+
+ The pre-commit test is part of the unit test in Travis-CI. PR that does not satisfy the hook cannot be submitted to PaddleOCR. Install it first and run it in the current directory:
+
+ ```
+ pip install pre-commit
+ pre-commit install
+ ```
+
+ > 1. Paddle uses clang-format to adjust the C/C++ source code format. Make sure the `clang-format` version is above 3.8.
+ >
+ > 2. Yapf installed through pip install pre-commit is slightly different from conda install-c conda-forge pre-commit, and PaddleOCR developers use `pip install pre-commit`.
+
+ #### 3.2.5 Modify And Submit Code
+
+ If you make some changes on `README.Md ` on PaddleOCR, you can view the changed file through `git status`, and then add the changed file using `git add`。
+
+ ```
+ git status # View change files
+ git add README.md
+ pre-commit
+ ```
+
+ Repeat these steps until the pre-comit format check does not error. As shown below.
+
+ ![img](../precommit_pass.png)
+
+ Use the following command to complete the submission.
+
+ ```
+ git commit -m "your commit info"
+ ```
+
+ #### 3.2.6 Keep Local Repo Up To Date
+
+ Get the latest code for upstream and update the current branch. Here the upstream comes from section 2.2, `Connecting to a remote repo`.
+
+ ```
+ git fetch upstream
+ # If you want to commit to another branch, you need to pull code from another branch of upstream, here is develop
+ git pull upstream develop
+ ```
+
+ #### 3.2.7 Push To Remote Repo
+
+ ```
+ git push origin new_branch
+ ```
+
+ #### 3.2.7 Submit Pull Request
+
+ Click the new pull request to select the local branch and the target branch, as shown in the following figure. In the description of PR, fill in the functions completed by the PR. Next, wait for review, and if you need to modify something, update the corresponding branch in origin with the steps above.
+
+ ![banner](../pr.png)
+
+ #### 3.2.8 Sign CLA Agreement And Pass Unit Tests
+
+ - Signing the CLA When submitting a Pull Request to PaddlePaddle for the first time, you need to sign a CLA (Contributor License Agreement) agreement to ensure that your code can be incorporated as follows:
+
+ 1. Please check the Check section in PR, find the license/cla, and click on the right detail to enter the CLA website
+
+ 2. Click Sign in with GitHub to agree on the CLA website and when clicked, it will jump back to your Pull Request page
+
+ #### 3.2.9 Delete Branch
+
+ - Remove remote branch
+
+ After PR is merged into the main repo, we can delete the branch of the remote repofrom the PR page.
+ You can also use `git push origin:branch name` to delete remote branches, such as:
+
+ ```
+ git push origin :new_branch
+ ```
+
+- Delete local branch
+
+ ```
+ # Switch to the development branch, otherwise the current branch cannot be deleted
+ git checkout develop
+
+ # Delete new_ Branch Branch
+ git branch -D new_branch
+ ```
+
+
+
+ #### 3.2.10 Some Conventions For Submitting Code
+
+ In order for official maintainers to better focus on the code itself when reviewing it, please follow the following conventions each time you submit your code:
+
+ 1)Please ensure that the unit tests in Travis-CI pass smoothly. If not, indicate that there is a problem with the submitted code, and the official maintainer generally does not review it.
+
+ 2)Before submitting a Pull Request.
+
+ - Note the number of commits.
+
+ Reason: If you only modify one file and submit more than a dozen commits, each commit will only make a few modifications, which can be very confusing to the reviewer. The reviewer needs to look at each commit individually to see what changes have been made, and does not exclude the fact that changes between commits overlap each other.
+
+ Suggestion: Keep as few commits as possible each time you submit, and supplement your last commit with git commit --amend. For multiple commits that have been Push to a remote warehouse, you can refer to [squash commits after push](https://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed ).
+
+ - Note the name of each commit: it should reflect the content of the current commit, not be too arbitrary.
+
+
+ 3) If you have solved a problem, add in the first comment box of the Pull Request:fix #issue_number,This will automatically close the corresponding Issue when the Pull Request is merged. Key words include:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,please choose the right vocabulary. Detailed reference [Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages).
+
+ In addition, in response to the reviewer's comments, you are requested to abide by the following conventions:
+
+ 1) Each review comment from an official maintainer would like a response, which would better enhance the contribution of the open source community.
+
+ - If you agree to the review opinion and modify it accordingly, give a simple Done.
+ - If you disagree with the review, please give your own reasons for refuting.
+
+ 2)If there are many reviews:
+
+ - Please give an overview of the changes.
+ - Please reply with `start a review', not directly. The reason is that each reply sends an e-mail message, which can cause a mail disaster.
diff --git a/doc/doc_en/community_contribution_en.md b/doc/doc_en/community_contribution_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..43ce20c6d21824e690dcd5ea2059c15f844291c3
--- /dev/null
+++ b/doc/doc_en/community_contribution_en.md
@@ -0,0 +1,100 @@
+# COMMUNITY CONTRIBUTION
+
+Thank you for your support and interest in PaddleOCR. The goal of PaddleOCR is to build a professional, harmonious and supportive open source community with developers. This document presents existing community contributions, explanations for various contributions, and new opportunities and processes to make the contribution process more efficient and clear.
+
+PaddleOCR wants to help any developer with a dream realize their vision and enjoy the joy of creating value through the power of AI.
+
+---
+
+
+
+
+
+> The picture above shows PaddleOCR's current Contributor, updated regularly
+
+## 1. COMMUNITY CONTRIBUTION
+
+### 1.1 PaddleOCR BASED COMMUNITY PROJECT
+
+- 【The lastest】 [FastOCRLabel](https://gitee.com/BaoJianQiang/FastOCRLabel): Complete C# version annotation tool (@ [包建强](https://gitee.com/BaoJianQiang) )
+
+#### 1.1.1 UNIVERSAL TOOL
+
+- [DangoOCR offline version](https://github.com/PantsuDango/DangoOCR):Universal desktop instant translation tool (@ [PantsuDango](https://github.com/PantsuDango))
+- [scr2txt](https://github.com/lstwzd/scr2txt):Screenshot to Text tool (@ [lstwzd](https://github.com/lstwzd))
+- [AI Studio project](https://aistudio.baidu.com/aistudio/projectdetail/1054614?channelType=0&channel=0):English video automatically generates subtitles( @ [叶月水狐](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/322052))
+
+#### 1.1.2 VERTICAL SCENE TOOLS
+
+- [id_card_ocr](https://github.com/baseli/id_card_ocr):Identification of copy of ID card(@ [baseli](https://github.com/baseli))
+- [Paddle_Table_Image_Reader](https://github.com/thunder95/Paddle_Table_Image_Reader): A data assistant that can read tables and pictures(@ [thunder95](https://github.com/thunder95]))
+
+#### 1.1.3 PRE AND POST PROCESSING
+
+- [paddleOCRCorrectOutputs](https://github.com/yuranusduke/paddleOCRCorrectOutputs):Get the key-value of OCR recognition result (@ [yuranusduke](https://github.com/yuranusduke))
+
+### 1.2 NEW FEATURES FOR PaddleOCR
+
+- Thanks [authorfu](https://github.com/authorfu) for contributing Android([#340](https://github.com/PaddlePaddle/PaddleOCR/pull/340)) and [xiadeye](https://github.com/xiadeye) for contributing IOS demo code([#325](https://github.com/PaddlePaddle/PaddleOCR/pull/325)).
+- Thanks [tangmq](https://gitee.com/tangmq) for adding docker deployment service to PaddleOCR to support quick release of callable restful API services([#507](https://github.com/PaddlePaddle/PaddleOCR/pull/507)).
+- Thanks [lijinhan](https://github.com/lijinhan) for adding Java springboot to PaddleOCR and call OCR hubserving interface to complete the use of OCR service deployment([#1027](https://github.com/PaddlePaddle/PaddleOCR/pull/1027)).
+- Thanks [Evezerest](https://github.com/Evezerest), [ninetailskim](https://github.com/ninetailskim), [edencfc](https://github.com/edencfc), [BeyondYourself](https://github.com/BeyondYourself), [1084667371](https://github.com/1084667371) for contributing complete code of [PPOCRLabel](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/PPOCRLabel/README_ch.md).
+
+### 1.3 CODE AND DOCUMENT OPTIMIZATION
+
+- Thanks [zhangxin](https://github.com/ZhangXinNan)([Blog](https://blog.csdn.net/sdlypyzq)) for contributing new visualization methods and adding .gitgnore, handling the problem of manually setting the PYTHONPATH environment variable([#210](https://github.com/PaddlePaddle/PaddleOCR/pull/210)).
+- Thanks [lyl120117](https://github.com/lyl120117) for contributing code to print network structure([#304](https://github.com/PaddlePaddle/PaddleOCR/pull/304)).
+- Thanks [BeyondYourself](https://github.com/BeyondYourself) for making a lot of great suggestions for PaddleOCR and simplifying some code styles of paddleocr([so many commits)](https://github.com/PaddlePaddle/PaddleOCR/commits?author=BeyondYourself).
+- Thanks [Khanh Tran](https://github.com/xxxpsyduck) and [Karl Horky](https://github.com/karlhorky) for contributing modifing English documents.
+
+### 1.4 MULTILINGUAL CORPUS
+
+- Thanks [xiangyubo](https://github.com/xiangyubo) for contributing handwritting Chinese OCR dataset([#321](https://github.com/PaddlePaddle/PaddleOCR/pull/321)).
+- Thanks [Mejans](https://github.com/Mejans) for contributing dictionary and corpus of the new language Occitan to PaddleOCR([#954](https://github.com/PaddlePaddle/PaddleOCR/pull/954)).
+
+## 2. CONTRIBUTION ILLUSTRATING
+
+### 2.1 NEW FUNCTION CLASS
+
+PaddleOCR welcomes community contributions to various services, deployment examples and software applications with paddleOCR as the core. Certified community contributions will be added to the above community contribution table to increase exposure for the majority of developers, which is also the glory of PaddleOCR, including:
+
+- Project form: the project code certified by the official community shall have good specifications and structure, and shall be equipped with a detailed README.md, which describes how to use the project. Through add a line 'paddleocr' to the requirements.txt, which can be automatically included in the usedby of paddleocr.
+
+- Integration method: if it is an update to the existing PaddleOCR tool, it will be integrated into the main repo. If a new function is expanded for paddleocr, please contact the official personnel first to confirm whether the project is integrated into the master repo, *even if the new function is not integrated into the master repo, we will also increase the exposure of your personal project in the way of community contribution.*
+
+
+### 2.2 CODE OPTIMIZATION
+
+If you encounter code bugs and unexpected functions when using PaddleOCR, you can contribute your modifications to PaddleOCR, including:
+
+- Python code specifications are available for reference [Appendix 1:Python code specifications](./code_and_doc.md/#Appendix1).
+
+- Before submitting the code, please confirm again and again that no new bugs will be introduced, and describe the optimization points in the PR. If the PR solves an issue, please connect to the issue in the PR. All PR shall comply with the requirements in Appendix [3.2.10 Some conventions for submitting code.](./code_and_doc.md/#Some conventions for submitting code)
+
+- Please refer to the below before submitting. If you are not familiar with the git submission process, you can also refer to Section 3.2 of [Appendix 3: description of Pull Request](./code_and_doc.md/#Appendix3).If you are not familiar with the git submission process, you can also refer to Section 3.2 of Appendix 3.
+
+**Finally, please add the label Third Party in the title of PR and @ Everest in the description , PR with this label will be treated with high priority`[third-part]`.**
+
+### 2.3 DOCUMENT OPTIMIZATION
+
+If you encounter problems such as unclear document description, missing description and invalid link when using PaddleOCR, you can contribute your modifications to PaddleOCR. For document writing specifications, please refer to [Appendix 2: document specifications](./code_and_doc.md/#Appendix2). **Finally, please add the label Third Party in the title of PR and @ Everest in the description , PR with this label will be treated with high priority`[third-party].**
+
+## 3. MORE CONTRIBUTION OPPORTUNITIES
+
+We encourage developers to use PaddleOCR to realize their ideas. At the same time, we also list some valuable development directions after analysis, which are collected in the regular season of community projects as a whole.
+
+## 4. CONTACT US
+
+We very much welcome developers to contact us before they intend to contribute code, documents, corpus and other contents to PaddleOCR, which can greatly reduce the communication cost in the PR process. At the same time, if you find some ideas difficult to realize personally, we can also recruit like-minded developers for the project in the form of SIG. Projects funded through SIG channels will receive deep R & D support and operational resources (such as official account publicity, live broadcast lessons, etc.).
+
+Our recommended contribution process is:
+
+- By adding the `[Third Party]` mark in the topic of GitHub issue, explain the problems encountered (and the ideas to solve) or the functions to be expanded, and wait for the reply of the person on duty. For example, ` [Third Party] contributes IOS examples to PaddleOCR`.
+- After communicating with us and confirming that the technical scheme or bugs and optimization points are correct, add functions or modify them accordingly, and the codes and documents shall comply with relevant specifications.
+- PR links to the above issue and waits for review.
+
+## 5. THANKS AND FOLLOW-UP
+
+ - After the code is combined, the information will be updated in the first section of this document. The default link is GitHub name and home page. If you need to change the home page, you can also contact us.
+ - New important function classes will be advertised in the user group and enjoy the honor of the open source community.
+ - **If you have a PaddleOCR based project that does not appear in the above list, follow `4. CONTACT US` .**
diff --git a/doc/doc_en/config_en.md b/doc/doc_en/config_en.md
index 9742c3d18ae52191106c5e08371cef1c09238bf6..eda1e13da956ab1eede72b97e62d76b915e02169 100644
--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
@@ -1,7 +1,7 @@
-# Configuration
+# Configuration
- [1. Optional Parameter List](#1-optional-parameter-list)
-- [2. Intorduction to Global Parameters of Configuration File](#2-intorduction-to-global-parameters-of-configuration-file)
+- [2. Introduction to Global Parameters of Configuration File](#2-introduction-to-global-parameters-of-configuration-file)
- [3. Multilingual Config File Generation](#3-multilingual-config-file-generation)
@@ -15,9 +15,9 @@ The following list can be viewed through `--help`
| -c | ALL | Specify configuration file to use | None | **Please refer to the parameter introduction for configuration file usage** |
| -o | ALL | set configuration options | None | Configuration using -o has higher priority than the configuration file selected with -c. E.g: -o Global.use_gpu=false |
-
+
-## 2. Intorduction to Global Parameters of Configuration File
+## 2. Introduction to Global Parameters of Configuration File
Take rec_chinese_lite_train_v2.0.yml as an example
### Global
@@ -30,7 +30,7 @@ Take rec_chinese_lite_train_v2.0.yml as an example
| print_batch_step | Set print log interval | 10 | \ |
| save_model_dir | Set model save path | output/{算法名称} | \ |
| save_epoch_step | Set model save interval | 3 | \ |
-| eval_batch_step | Set the model evaluation interval | 2000 or [1000, 2000] | runing evaluation every 2000 iters or evaluation is run every 2000 iterations after the 1000th iteration |
+| eval_batch_step | Set the model evaluation interval | 2000 or [1000, 2000] | running evaluation every 2000 iters or evaluation is run every 2000 iterations after the 1000th iteration |
| cal_metric_during_train | Set whether to evaluate the metric during the training process. At this time, the metric of the model under the current batch is evaluated | true | \ |
| load_static_weights | Set whether the pre-training model is saved in static graph mode (currently only required by the detection algorithm) | true | \ |
| pretrained_model | Set the path of the pre-trained model | ./pretrain_models/CRNN/best_accuracy | \ |
@@ -65,7 +65,7 @@ In PaddleOCR, the network is divided into four stages: Transform, Backbone, Neck
| Parameter | Use | Defaults | Note |
| :---------------------: | :---------------------: | :--------------: | :--------------------: |
| model_type | Network Type | rec | Currently support`rec`,`det`,`cls` |
-| algorithm | Model name | CRNN | See [algorithm_overview](./algorithm_overview.md) for the support list |
+| algorithm | Model name | CRNN | See [algorithm_overview](./algorithm_overview_en.md) for the support list |
| **Transform** | Set the transformation method | - | Currently only recognition algorithms are supported, see [ppocr/modeling/transform](../../ppocr/modeling/transform) for details |
| name | Transformation class name | TPS | Currently supports `TPS` |
| num_fiducial | Number of TPS control points | 20 | Ten on the top and bottom |
@@ -134,14 +134,14 @@ In PaddleOCR, the network is divided into four stages: Transform, Backbone, Neck
## 3. Multilingual Config File Generation
-PaddleOCR currently supports 80 (except Chinese) language recognition. A multi-language configuration file template is
+PaddleOCR currently supports recognition for 80 languages (besides Chinese). A multi-language configuration file template is
provided under the path `configs/rec/multi_languages`: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。
-There are two ways to create the required configuration file::
+There are two ways to create the required configuration file:
1. Automatically generated by script
-[generate_multi_language_configs.py](../../configs/rec/multi_language/generate_multi_language_configs.py) Can help you generate configuration files for multi-language models
+Script [generate_multi_language_configs.py](../../configs/rec/multi_language/generate_multi_language_configs.py) can help you generate configuration files for multi-language models.
- Take Italian as an example, if your data is prepared in the following format:
```
@@ -196,21 +196,21 @@ Italian is made up of Latin letters, so after executing the command, you will ge
epoch_num: 500
...
character_dict_path: {path/of/dict} # path of dict
-
+
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/ # root directory of training data
label_file_list: ["./train_data/train_list.txt"] # train label path
...
-
+
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/ # root directory of val data
label_file_list: ["./train_data/val_list.txt"] # val label path
...
-
+
```
diff --git a/doc/doc_en/customize_en.md b/doc/doc_en/customize_en.md
index fb47c14f3346e918f32950c8eec5ada76345ce59..80b8302238f61c6e6473d1e2359de30dbebcb929 100644
--- a/doc/doc_en/customize_en.md
+++ b/doc/doc_en/customize_en.md
@@ -22,7 +22,7 @@ For more details about data preparation and training tutorials, refer to the doc
PaddleOCR provides a concatenation tool for detection and recognition models, which can connect any trained detection model and any recognition model into a two-stage text recognition system. The input image goes through four main stages: text detection, text rectification, text recognition, and score filtering to output the text position and recognition results, and at the same time, you can choose to visualize the results.
-When performing prediction, you need to specify the path of a single image or a image folder through the parameter `image_dir`, the parameter `det_model_dir` specifies the path of detection model, and the parameter `rec_model_dir` specifies the path of recogniton model. The visualized results are saved to the `./inference_results` folder by default.
+When performing prediction, you need to specify the path of a single image or a image folder through the parameter `image_dir`, the parameter `det_model_dir` specifies the path of detection model, and the parameter `rec_model_dir` specifies the path of recognition model. The visualized results are saved to the `./inference_results` folder by default.
```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/det/" --rec_model_dir="./inference/rec/"
diff --git a/doc/doc_en/detection_en.md b/doc/doc_en/detection_en.md
index 1be34b330f91b59060fc84af6f5ac44022da1a35..9f54dc06b9be16553518c301296e38e62cf1c8ec 100644
--- a/doc/doc_en/detection_en.md
+++ b/doc/doc_en/detection_en.md
@@ -4,7 +4,7 @@ This section uses the icdar2015 dataset as an example to introduce the training,
- [1. Data and Weights Preparation](#1-data-and-weights-preparatio)
* [1.1 Data Preparation](#11-data-preparation)
- * [1.2 Download Pretrained Model](#12-download-pretrained-model)
+ * [1.2 Download Pre-trained Model](#12-download-pretrained-model)
- [2. Training](#2-training)
* [2.1 Start Training](#21-start-training)
* [2.2 Load Trained Model and Continue Training](#22-load-trained-model-and-continue-training)
@@ -45,7 +45,7 @@ After decompressing the data set and downloading the annotation file, PaddleOCR/
└─ test_icdar2015_label.txt Test annotation of icdar dataset
```
-The provided annotation file format is as follow, seperated by "\t":
+The provided annotation file format is as follow, separated by "\t":
```
" Image file name Image annotation information encoded by json.dumps"
ch4_test_images/img_61.jpg [{"transcription": "MASA", "points": [[310, 104], [416, 141], [418, 216], [312, 179]]}, {...}]
@@ -59,10 +59,10 @@ The `points` in the dictionary represent the coordinates (x, y) of the four poin
If you want to train PaddleOCR on other datasets, please build the annotation file according to the above format.
-### 1.2 Download Pretrained Model
+### 1.2 Download Pre-trained Model
-First download the pretrained model. The detection model of PaddleOCR currently supports 3 backbones, namely MobileNetV3, ResNet18_vd and ResNet50_vd. You can use the model in [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.0/ppcls/modeling/architectures) to replace backbone according to your needs.
-And the responding download link of backbone pretrain weights can be found in (https://github.com/PaddlePaddle/PaddleClas/blob/release%2F2.0/README_cn.md#resnet%E5%8F%8A%E5%85%B6vd%E7%B3%BB%E5%88%97).
+First download the pre-trained model. The detection model of PaddleOCR currently supports 3 backbones, namely MobileNetV3, ResNet18_vd and ResNet50_vd. You can use the model in [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.0/ppcls/modeling/architectures) to replace backbone according to your needs.
+And the responding download link of backbone pre-trained weights can be found in (https://github.com/PaddlePaddle/PaddleClas/blob/release%2F2.0/README_cn.md#resnet%E5%8F%8A%E5%85%B6vd%E7%B3%BB%E5%88%97).
```shell
cd PaddleOCR/
diff --git a/doc/doc_en/distributed_training.md b/doc/doc_en/distributed_training.md
index 519a42f0dc4b9bd4fa18f3f65019e4235282df92..2822ee5e4ea52720a458e4060d8a09be7b98846b 100644
--- a/doc/doc_en/distributed_training.md
+++ b/doc/doc_en/distributed_training.md
@@ -2,7 +2,7 @@
## Introduction
-The high performance of distributed training is one of the core advantages of PaddlePaddle. In the classification task, distributed training can achieve almost linear speedup ratio. Generally, OCR training task need massive training data. Such as recognition, ppocrv2.0 model is trained based on 1800W dataset, which is very time-consuming if using single machine. Therefore, the distributed training is used in paddleocr to speedup the training task. For more information about distributed training, please refer to [distributed training quick start tutorial](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/parameter_server/ps_quick_start.html).
+The high performance of distributed training is one of the core advantages of PaddlePaddle. In the classification task, distributed training can achieve almost linear speedup ratio. Generally, OCR training task need massive training data. Such as recognition, PP-OCR v2.0 model is trained based on 1800W dataset, which is very time-consuming if using single machine. Therefore, the distributed training is used in PaddleOCR to speedup the training task. For more information about distributed training, please refer to [distributed training quick start tutorial](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/parameter_server/ps_quick_start.html).
## Quick Start
@@ -35,7 +35,7 @@ python3 -m paddle.distributed.launch \
**Notice:**
* The IP addresses of different machines need to be separated by commas, which can be queried through `ifconfig` or `ipconfig`.
* Different machines need to be set to be secret free and can `ping` success with others directly, otherwise communication cannot establish between them.
-* The code, data and start command betweent different machines must be completely consistent and then all machines need to run start command. The first machine in the `ip_list` is set to `trainer0`, and so on.
+* The code, data and start command between different machines must be completely consistent and then all machines need to run start command. The first machine in the `ip_list` is set to `trainer0`, and so on.
## Performance comparison
diff --git a/doc/doc_en/enhanced_ctc_loss_en.md b/doc/doc_en/enhanced_ctc_loss_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..908f79e412e2a00e4fec027befc8a1430c077e27
--- /dev/null
+++ b/doc/doc_en/enhanced_ctc_loss_en.md
@@ -0,0 +1,110 @@
+# Enhanced CTC Loss
+
+In OCR recognition, CRNN is a text recognition algorithm widely applied in the industry. In the training phase, it uses CTCLoss to calculate the network loss. In the inference phase, it uses CTCDecode to obtain the decoding result. Although the CRNN algorithm has been proven to achieve reliable recognition results in actual business, users have endless requirements for recognition accuracy. So how to improve the accuracy of text recognition? Taking CTCLoss as the starting point, this paper explores the improved fusion scheme of CTCLoss from three different perspectives: Hard Example Mining, Multi-task Learning, and Metric Learning. Based on the exploration, we propose EnhancedCTCLoss, which includes the following 3 components: Focal-CTC Loss, A-CTC Loss, C-CTC Loss.
+
+## 1. Focal-CTC Loss
+
+Focal Loss was proposed by the paper, "[Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)". When the loss was first proposed, it was mainly to solve the problem of a serious imbalance in the ratio of positive and negative samples in one-stage target detection. This loss function reduces the weight of a large number of simple negative samples in training and also can be understood as a kind of difficult sample mining.
+The form of the loss function is as follows:
+
+
+
+
+
+Among them, y' is the output of the activation function, and the value is between 0-1. It adds a modulation factor (1-y’)^γ and a balance factor α on the basis of the original cross-entropy loss. When α = 1, y = 1, the comparison between the loss function and the cross-entropy loss is shown in the following figure:
+
+
+
+
+
+
+
+As can be seen from the above figure, when γ > 0, the adjustment coefficient (1-y’)^γ gives smaller weight to the easy-to-classify sample loss, making the network pay more attention to the difficult and misclassified samples. The adjustment factor γ is used to adjust the rate at which the weight of simple samples decreases. When γ = 0, it is the cross-entropy loss function. When γ increases, the influence of the adjustment factor will also increase. Experiments revealed that 2 is the optimal value of γ. The balance factor α is used to balance the uneven proportions of the positive and negative samples. In the text, α is taken as 0.25.
+
+For the classic CTC algorithm, suppose a certain feature sequence (f1, f2, ......ft), after CTC decoding, the probability that the result is equal to label is y', then the probability that the CTC decoding result is not equal to label is (1-y'); it is not difficult to find that the CTCLoss value and y' have the following relationship:
+
+
+
+
+
+
+
+Combining the idea of Focal Loss, assigning larger weights to difficult samples and smaller weights to simple samples can make the network focus more on the mining of difficult samples and further improve the accuracy of recognition. Therefore, we propose Focal-CTC Loss. Its definition is as follows:
+
+
+
+
+
+
+
+In the experiment, the value of γ is 2, α = 1, see this for specific implementation: [rec_ctc_loss.py](../../ppocr/losses/rec_ctc_loss.py)
+
+
+
+## 2. A-CTC Loss
+
+A-CTC Loss is short for CTC Loss + ACE Loss. Among them, ACE Loss was proposed by the paper, “[Aggregation Cross-Entropy for Sequence Recognition](https://arxiv.org/abs/1904.08364)”. Compared with CTCLoss, ACE Loss has the following two advantages:
++ ACE Loss can solve the recognition problem of 2-D text, while CTCLoss can only process 1-D text
++ ACE Loss is better than CTC loss in time complexity and space complexity
+
+The advantages and disadvantages of the OCR recognition algorithm summarized by the predecessors are shown in the following figure:
+
+
+
+
+
+
+Although ACELoss does handle 2D predictions, as shown in the figure above, and has advantages in memory usage and inference speed, in practice, we found that using ACELoss alone, the recognition effect is not as good as CTCLoss. Consequently, we tried to combine CTCLoss and ACELoss, and CTCLoss is the mainstay while ACELoss acts as an auxiliary supervision loss. This attempt has achieved better results. On our internal experimental data set, compared to using CTCLoss alone, the recognition accuracy can be improved by about 1%.
+A_CTC Loss is defined as follows:
+
+
+
+
+
+
+
+In the experiment, λ = 0.1. See the ACE loss implementation code: [ace_loss.py](../../ppocr/losses/ace_loss.py)
+
+
+
+## 3. C-CTC Loss
+
+C-CTC Loss is short for CTC Loss + Center Loss. Among them, Center Loss was proposed by the paper, “[A Discriminative Feature Learning Approach for Deep Face Recognition](https://link.springer.com/chapter/10.1007/978-3-319-46478-7_31)“. It was first used in face recognition tasks to increase the distance between classes and reduce the distance within classes. It is an earlier and also widely used algorithm.
+
+In the task of Chinese OCR recognition, through the analysis of bad cases, we found that a major difficulty in Chinese recognition is that there are many similar characters, which are easy to misunderstand. From this, we thought about whether we can learn from the idea of n to increase the class spacing of similar characters, to improve recognition accuracy. However, Metric Learning is mainly used in the field of image recognition, and the label of the training data is a fixed value; for OCR recognition, it is a sequence recognition task essentially, and there is no explicit alignment between features and labels. Therefore, how to combine the two is still a direction worth exploring.
+
+By trying Arcmargin, Cosmargin and other methods, we finally found that Centerloss can help further improve the accuracy of recognition. C_CTC Loss is defined as follows:
+
+
+
+
+
+In the experiment, we set λ=0.25. See the center_loss implementation code: [center_loss.py](../../ppocr/losses/center_loss.py)
+
+It is worth mentioning that in C-CTC Loss, choosing to initialize the Center randomly does not bring significant improvement. Our Center initialization method is as follows:
++ Based on the original CTCLoss, a network N is obtained by training
++ Select the training set, identify the completely correct part, and form the set G
++ Send each sample in G to the network, perform forward calculation, and extract the correspondence between the input of the last FC layer (ie feature) and the result of argmax calculation (ie index)
++ Aggregate features with the same index, calculate the average, and get the initial center of each character.
+
+Taking the configuration file `configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml` as an example, the center extraction command is as follows:
+
+```
+python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o Global.pretrained_model="./output/rec_mobile_pp-OCRv2/best_accuracy"
+```
+
+After running, `train_center.pkl` will be generated in the main directory of PaddleOCR.
+
+
+
+## 4. Experiment
+
+For the above three solutions, we conducted training and evaluation based on Baidu's internal data set. The experimental conditions are shown in the following table:
+
+| algorithm | Focal_CTC | A_CTC | C-CTC |
+| :-------- | :-------- | ----: | :---: |
+| gain | +0.3% | +0.7% | +1.7% |
+
+Based on the above experimental conclusions, we adopted the C-CTC strategy in PP-OCRv2. It is worth mentioning that, because PP-OCRv2 deals with the recognition task of 6625 Chinese characters, the character set is relatively large and there are many similar characters, so the C-CTC solution brings a significant improvement on this task. But if you switch to other OCR recognition tasks, the conclusion may be different. You can try Focal-CTC, A-CTC, C-CTC, and the combined solution EnhancedCTC. We believe it will bring different degrees of improvement.
+
+The unified combined plan is shown in the following file: [rec_enhanced_ctc_loss.py](../../ppocr/losses/rec_enhanced_ctc_loss.py)
\ No newline at end of file
diff --git a/doc/doc_en/environment_en.md b/doc/doc_en/environment_en.md
index 854db648fc255d36ec5c01c710a1d3cab53aaeb5..fc87f10c104628df0268bc6f8910c5914aeba225 100644
--- a/doc/doc_en/environment_en.md
+++ b/doc/doc_en/environment_en.md
@@ -4,9 +4,9 @@ Windows and Mac users are recommended to use Anaconda to build a Python environm
Recommended working environment:
- PaddlePaddle >= 2.0.0 (2.1.2)
-- python3.7
-- CUDA10.1 / CUDA10.2
-- CUDNN 7.6
+- Python 3.7
+- CUDA 10.1 / CUDA 10.2
+- cuDNN 7.6
* [1. Python Environment Setup](#1)
+ [1.1 Windows](#1.1)
@@ -25,7 +25,7 @@ Recommended working environment:
#### 1.1.1 Install Anaconda
-- Note: To use paddlepaddle you need to install python environment first, here we choose python integrated environment Anaconda toolkit
+- Note: To use PaddlePaddle you need to install python environment first, here we choose python integrated environment Anaconda toolkit
- Anaconda is a common python package manager
- After installing Anaconda, you can install the python environment, as well as numpy and other required toolkit environment.
@@ -44,19 +44,19 @@ Recommended working environment:
- - Check conda to add environment variables and ignore the warning that
+ - Check Conda to add environment variables and ignore the warning that
-#### 1.1.2 Opening the terminal and creating the conda environment
+#### 1.1.2 Opening the terminal and creating the Conda environment
- Open Anaconda Prompt terminal: bottom left Windows Start Menu -> Anaconda3 -> Anaconda Prompt start console
-- Create a new conda environment
+- Create a new Conda environment
```shell
# Enter the following command at the command line to create an environment named paddle_env
@@ -70,7 +70,7 @@ Recommended working environment:
-- To activate the conda environment you just created, enter the following command at the command line.
+- To activate the Conda environment you just created, enter the following command at the command line.
```shell
# Activate the paddle_env environment
@@ -91,7 +91,7 @@ The above anaconda environment and python environment are installed
#### 1.2.1 Installing Anaconda
-- Note: To use paddlepaddle you need to install the python environment first, here we choose the python integrated environment Anaconda toolkit
+- Note: To use PaddlePaddle you need to install the python environment first, here we choose the python integrated environment Anaconda toolkit
- Anaconda is a common python package manager
- After installing Anaconda, you can install the python environment, as well as numpy and other required toolkit environment
@@ -108,17 +108,17 @@ The above anaconda environment and python environment are installed
- Just follow the default settings, it will take a while to install
-- It is recommended to install a code editor such as vscode or pycharm
+- It is recommended to install a code editor such as VSCode or PyCharm
-#### 1.2.2 Open a terminal and create a conda environment
+#### 1.2.2 Open a terminal and create a Conda environment
- Open the terminal
- Press command and spacebar at the same time, type "terminal" in the focus search, double click to enter terminal
-- **Add conda to the environment variables**
+- **Add Conda to the environment variables**
- - Environment variables are added so that the system can recognize the conda command
+ - Environment variables are added so that the system can recognize the Conda command
- Open `~/.bash_profile` in the terminal by typing the following command.
@@ -126,7 +126,7 @@ The above anaconda environment and python environment are installed
vim ~/.bash_profile
```
- - Add conda as an environment variable in `~/.bash_profile`.
+ - Add Conda as an environment variable in `~/.bash_profile`.
```shell
# Press i first to enter edit mode
@@ -156,12 +156,12 @@ The above anaconda environment and python environment are installed
- When you are done, press `esc` to exit edit mode, then type `:wq!` and enter to save and exit
- - Verify that the conda command is recognized.
+ - Verify that the Conda command is recognized.
- Enter `source ~/.bash_profile` in the terminal to update the environment variables
- - Enter `conda info --envs` in the terminal again, if it shows that there is a base environment, then conda has been added to the environment variables
+ - Enter `conda info --envs` in the terminal again, if it shows that there is a base environment, then Conda has been added to the environment variables
-- Create a new conda environment
+- Create a new Conda environment
```shell
# Enter the following command at the command line to create an environment called paddle_env
@@ -175,7 +175,7 @@ The above anaconda environment and python environment are installed
-
-- To activate the conda environment you just created, enter the following command at the command line.
+- To activate the Conda environment you just created, enter the following command at the command line.
```shell
# Activate the paddle_env environment
@@ -198,7 +198,7 @@ Linux users can choose to run either Anaconda or Docker. If you are familiar wit
#### 1.3.1 Anaconda environment configuration
-- Note: To use paddlepaddle you need to install the python environment first, here we choose the python integrated environment Anaconda toolkit
+- Note: To use PaddlePaddle you need to install the python environment first, here we choose the python integrated environment Anaconda toolkit
- Anaconda is a common python package manager
- After installing Anaconda, you can install the python environment, as well as numpy and other required toolkit environment
@@ -214,9 +214,9 @@ Linux users can choose to run either Anaconda or Docker. If you are familiar wit
- Select the appropriate version for your operating system
- Type `uname -m` in the terminal to check the command set used by your system
- - Download method 1: Download locally, then transfer the installation package to the linux server
+ - Download method 1: Download locally, then transfer the installation package to the Linux server
- - Download method 2: Directly use linux command line to download
+ - Download method 2: Directly use Linux command line to download
```shell
# First install wget
@@ -277,12 +277,12 @@ Linux users can choose to run either Anaconda or Docker. If you are familiar wit
- When you are done, press `esc` to exit edit mode, then type `:wq!` and enter to save and exit
- - Verify that the conda command is recognized.
+ - Verify that the Conda command is recognized.
- Enter `source ~/.bash_profile` in the terminal to update the environment variables
- - Enter `conda info --envs` in the terminal again, if it shows that there is a base environment, then conda has been added to the environment variables
+ - Enter `conda info --envs` in the terminal again, if it shows that there is a base environment, then Conda has been added to the environment variables
-- Create a new conda environment
+- Create a new Conda environment
```shell
# Enter the following command at the command line to create an environment called paddle_env
@@ -296,7 +296,7 @@ Linux users can choose to run either Anaconda or Docker. If you are familiar wit
-- To activate the conda environment you just created, enter the following command at the command line.
+- To activate the Conda environment you just created, enter the following command at the command line.
```shell
# Activate the paddle_env environment
@@ -335,13 +335,13 @@ sudo docker container exec -it ppocr /bin/bash
## 2. Install PaddlePaddle 2.0
-- If you have cuda9 or cuda10 installed on your machine, please run the following command to install
+- If you have CUDA 9 or CUDA 10 installed on your machine, please run the following command to install
```bash
python3 -m pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
```
-- If you only have cpu on your machine, please run the following command to install
+- If you have no available GPU on your machine, please run the following command to install the CPU version
```bash
python3 -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md
index 019ac4d0ac15aceed89286048d2c4d88a259e501..a8a96e30f020b98b52bb465140b3463cd88beebb 100755
--- a/doc/doc_en/inference_en.md
+++ b/doc/doc_en/inference_en.md
@@ -139,7 +139,7 @@ tar xf ch_ppocr_mobile_v2.0_det_infer.tar
python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/"
```
-The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with'det_res'. Examples of results are as follows:
+The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
![](../imgs_results/det_res_00018069.jpg)
@@ -244,7 +244,7 @@ The visualized text detection results are saved to the `./inference_results` fol
## 3. Text Recognition Model Inference
-The following will introduce the lightweight Chinese recognition model inference, other CTC-based and Attention-based text recognition models inference. For Chinese text recognition, it is recommended to choose the recognition model based on CTC loss. In practice, it is also found that the result of the model based on Attention loss is not as good as the one based on CTC loss. In addition, if the characters dictionary is modified during training, make sure that you use the same characters set during inferencing. Please check below for details.
+The following will introduce the lightweight Chinese recognition model inference, other CTC-based and Attention-based text recognition models inference. For Chinese text recognition, it is recommended to choose the recognition model based on CTC loss. In practice, it is also found that the result of the model based on Attention loss is not as good as the one based on CTC loss. In addition, if the characters dictionary is modified during training, make sure that you use the same characters set during inference. Please check below for details.
diff --git a/doc/doc_en/inference_ppocr_en.md b/doc/doc_en/inference_ppocr_en.md
index 62a672885c86119ae56dc93ef76c2bb746084a05..21f4c64d6dc99054a3480a66cd710b5d09461ca1 100755
--- a/doc/doc_en/inference_ppocr_en.md
+++ b/doc/doc_en/inference_ppocr_en.md
@@ -7,7 +7,7 @@ This article introduces the use of the Python inference engine for the PP-OCR mo
- [Text Detection Model Inference](#DETECTION_MODEL_INFERENCE)
- [Text Recognition Model Inference](#RECOGNITION_MODEL_INFERENCE)
- [1. Lightweight Chinese Recognition Model Inference](#LIGHTWEIGHT_RECOGNITION)
- - [2. Multilingaul Model Inference](#MULTILINGUAL_MODEL_INFERENCE)
+ - [2. Multilingual Model Inference](#MULTILINGUAL_MODEL_INFERENCE)
- [Angle Classification Model Inference](#ANGLE_CLASS_MODEL_INFERENCE)
- [Text Detection Angle Classification and Recognition Inference Concatenation](#CONCATENATION)
@@ -25,7 +25,7 @@ tar xf ch_PP-OCRv2_det_infer.tar
python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer.tar/"
```
-The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with'det_res'. Examples of results are as follows:
+The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
![](../imgs_results/det_res_00018069.jpg)
@@ -75,7 +75,7 @@ Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
-### 2. Multilingaul Model Inference
+### 2. Multilingual Model Inference
If you need to predict [other language models](./models_list_en.md#Multilingual), when using inference model prediction, you need to specify the dictionary path used by `--rec_char_dict_path`. At the same time, in order to get the correct visualization results,
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition:
diff --git a/doc/doc_en/installation_en.md b/doc/doc_en/installation_en.md
index dec384b2f27f8bb36ee67d8b040b532b30e0b028..bb499f5cd4b70da59445bec4f74ca4e0f4e517b1 100644
--- a/doc/doc_en/installation_en.md
+++ b/doc/doc_en/installation_en.md
@@ -1,17 +1,17 @@
## QUICK INSTALLATION
-After testing, paddleocr can run on glibc 2.23. You can also test other glibc versions or install glic 2.23 for the best compatibility.
+After testing, PaddleOCR can run on glibc 2.23. You can also test other glibc versions or install glibc 2.23 for the best compatibility.
PaddleOCR working environment:
- PaddlePaddle 2.0.0
-- python3.7
+- Python 3.7
- glibc 2.23
-It is recommended to use the docker provided by us to run PaddleOCR, please refer to the use of docker [link](https://www.runoob.com/docker/docker-tutorial.html/).
+It is recommended to use the docker provided by us to run PaddleOCR. Please refer to the docker tutorial [link](https://www.runoob.com/docker/docker-tutorial.html/).
-*If you want to directly run the prediction code on mac or windows, you can start from step 2.*
+*If you want to directly run the prediction code on Mac or Windows, you can start from step 2.*
-**1. (Recommended) Prepare a docker environment. The first time you use this docker image, it will be downloaded automatically. Please be patient.**
+**1. (Recommended) Prepare a docker environment. For the first time you use this docker image, it will be downloaded automatically. Please be patient.**
```
# Switch to the working directory
cd /home/Projects
@@ -22,7 +22,7 @@ cd /home/Projects
sudo docker run --name ppocr -v $PWD:/paddle --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
```
-If using CUDA10, please run the following command to create a container.
+With CUDA10, please run the following command to create a container.
It is recommended to set a shared memory greater than or equal to 32G through the --shm-size parameter:
```
sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
@@ -51,11 +51,11 @@ For more software version requirements, please refer to the instructions in [Ins
# Recommend
git clone https://github.com/PaddlePaddle/PaddleOCR
-# If you cannot pull successfully due to network problems, you can also choose to use the code hosting on the cloud:
+# If you cannot pull successfully due to network problems, you can switch to the mirror hosted on Gitee:
git clone https://gitee.com/paddlepaddle/PaddleOCR
-# Note: The cloud-hosting code may not be able to synchronize the update with this GitHub project in real time. There might be a delay of 3-5 days. Please give priority to the recommended method.
+# Note: The mirror on Gitee may not keep in synchronization with the latest update with the project on GitHub. There might be a delay of 3-5 days. Please try GitHub at first.
```
**4. Install third-party libraries**
@@ -66,6 +66,6 @@ pip3 install -r requirements.txt
If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows.
-Please try to download Shapely whl file using [http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely](http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely).
+Please try to download Shapely whl file from [http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely](http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely).
Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)
diff --git a/doc/doc_en/models_en.md b/doc/doc_en/models_en.md
index 37c4a174563abc68085a103e11e2ddb3bd954714..d60ca2a3329f0c1b1ad0bfe6f200d0d5e63dcdaa 100644
--- a/doc/doc_en/models_en.md
+++ b/doc/doc_en/models_en.md
@@ -7,13 +7,13 @@ This section contains two parts. Firstly, [PP-OCR Model Download](./models_list_
Let's first understand some basic concepts.
-- [Introduction about OCR](#introduction-about-ocr)
+- [Introduction to OCR](#introduction-to-ocr)
* [Basic Concepts of OCR Detection Model](#basic-concepts-of-ocr-detection-model)
* [Basic Concepts of OCR Recognition Model](#basic-concepts-of-ocr-recognition-model)
* [PP-OCR Model](#pp-ocr-model)
-## 1. Introduction about OCR
+## 1. Introduction to OCR
This section briefly introduces the basic concepts of OCR detection model and recognition model, and introduces PaddleOCR's PP-OCR model.
diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md
index e3cf251c3439ba009a1de0ba48f7c0aa10b117c4..b07d5f0a9471a1bdff7a93328d63e874fed60e19 100644
--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -1,6 +1,6 @@
# OCR Model List(V2.1, updated on 2021.9.6)
> **Note**
-> 1. Compared with the model v2.0, the 2.1 version of the detection model has a improvement in accuracy, and the 2.1 version of the recognition model is optimized in accuracy and CPU speed.
+> 1. Compared with the model v2.0, the 2.1 version of the detection model has a improvement in accuracy, and the 2.1 version of the recognition model has optimizations in accuracy and speed with CPU.
> 2. Compared with [models 1.1](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md), which are trained with static graph programming paradigm, models 2.0 are the dynamic graph trained version and achieve close performance.
> 3. All models in this tutorial are all ppocr-series models, for more introduction of algorithms and models based on public dataset, you can refer to [algorithm overview tutorial](./algorithm_overview_en.md).
@@ -18,7 +18,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|--- | --- | --- |
|inference model|inference.pdmodel、inference.pdiparams|Used for inference based on Paddle inference engine,[detail](./inference_en.md)|
|trained model, pre-trained model|\*.pdparams、\*.pdopt、\*.states |The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.|
-|slim model|\*.nb| Model compressed by PaddleSim (a model compression tool using PaddlePaddle), which is suitable for mobile-side deployment scenarios (Paddle-Lite is needed for slim model deployment). |
+|slim model|\*.nb| Model compressed by PaddleSlim (a model compression tool using PaddlePaddle), which is suitable for mobile-side deployment scenarios (Paddle-Lite is needed for slim model deployment). |
Relationship of the above models is as follows.
@@ -50,7 +50,7 @@ Relationship of the above models is as follows.
|ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
-**Note:** The `trained model` is finetuned on the `pre-trained model` with real data and synthsized vertical text data, which achieved better performance in real scene. The `pre-trained model` is directly trained on the full amount of real data and synthsized data, which is more suitable for finetune on your own dataset.
+**Note:** The `trained model` is fine-tuned on the `pre-trained model` with real data and synthesized vertical text data, which achieved better performance in real scene. The `pre-trained model` is directly trained on the full amount of real data and synthesized data, which is more suitable for fine-tune on your own dataset.
### 2.2 English Recognition Model
diff --git a/doc/doc_en/multi_languages_en.md b/doc/doc_en/multi_languages_en.md
index 545be5524f2c52c9799d3b013f1aac8baf1a379f..9f09b531d9f6f9912b69804e57cf4e78f0c15531 100644
--- a/doc/doc_en/multi_languages_en.md
+++ b/doc/doc_en/multi_languages_en.md
@@ -28,12 +28,12 @@ The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japane
This document will briefly introduce how to use the multilingual model.
- [1 Installation](#Install)
- - [1.1 paddle installation](#paddleinstallation)
- - [1.2 paddleocr package installation](#paddleocr_package_install)
+ - [1.1 Paddle installation](#paddleinstallation)
+ - [1.2 PaddleOCR package installation](#paddleocr_package_install)
- [2 Quick Use](#Quick_Use)
- [2.1 Command line operation](#Command_line_operation)
- - [2.2 python script running](#python_Script_running)
+ - [2.2 Run with Python script](#python_Script_running)
- [3 Custom Training](#Custom_Training)
- [4 Inference and Deployment](#inference)
- [4 Supported languages and abbreviations](#language_abbreviations)
@@ -42,7 +42,7 @@ This document will briefly introduce how to use the multilingual model.
## 1 Installation
-### 1.1 paddle installation
+### 1.1 Paddle installation
```
# cpu
pip install paddlepaddle
@@ -52,7 +52,7 @@ pip install paddlepaddle-gpu
```
-### 1.2 paddleocr package installation
+### 1.2 PaddleOCR package installation
pip install
@@ -79,8 +79,8 @@ paddleocr -h
* Whole image prediction (detection + recognition)
-Paddleocr currently supports 80 languages, which can be switched by modifying the --lang parameter.
-The specific supported [language] (#language_abbreviations) can be viewed in the table.
+PaddleOCR currently supports 80 languages, which can be specified by the --lang parameter.
+The supported languages are listed in the [table](#language_abbreviations).
``` bash
paddleocr --image_dir doc/imgs_en/254.jpg --lang=en
@@ -90,7 +90,7 @@ paddleocr --image_dir doc/imgs_en/254.jpg --lang=en
-The result is a list, each item contains a text box, text and recognition confidence
+The result is a list. Each item contains a text box, text and recognition confidence
```text
[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]]
[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]]
@@ -110,7 +110,7 @@ paddleocr --image_dir doc/imgs_words_en/word_308.png --det false --lang=en
![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_words_en/word_308.png)
-The result is a tuple, which returns the recognition result and recognition confidence
+The result is a 2-tuple, which contains the recognition result and recognition confidence
```text
(0.99879867, 'LITTLE')
@@ -122,7 +122,7 @@ The result is a tuple, which returns the recognition result and recognition conf
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
```
-The result is a list, each item contains only text boxes
+The result is a list. Each item represents the coordinates of a text box.
```
[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
@@ -132,9 +132,9 @@ The result is a list, each item contains only text boxes
```
-### 2.2 python script running
+### 2.2 Run with Python script
-ppocr also supports running in python scripts for easy embedding in your own code:
+PPOCR is able to run with Python scripts for easy integration with your own code:
* Whole image prediction (detection + recognition)
@@ -167,12 +167,12 @@ Visualization of results:
![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg)
-ppocr also supports direction classification. For more usage methods, please refer to: [whl package instructions](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md).
+PPOCR also supports direction classification. For more detailed usage, please refer to: [whl package instructions](whl_en.md).
## 3 Custom training
-ppocr supports using your own data for custom training or finetune, where the recognition model can refer to [French configuration file](../../configs/rec/multi_language/rec_french_lite_train.yml)
+PPOCR supports using your own data for custom training or fine-tune, where the recognition model can refer to [French configuration file](../../configs/rec/multi_language/rec_french_lite_train.yml)
Modify the training data path, dictionary and other parameters.
For specific data preparation and training process, please refer to: [Text Detection](../doc_en/detection_en.md), [Text Recognition](../doc_en/recognition_en.md), more functions such as predictive deployment,
@@ -183,7 +183,7 @@ For functions such as data annotation, you can read the complete [Document Tutor
## 4 Inference and Deployment
In addition to installing the whl package for quick forecasting,
-ppocr also provides a variety of forecasting deployment methods.
+PPOCR also provides a variety of forecasting deployment methods.
If necessary, you can read related documents:
- [Python Inference](./inference_en.md)
diff --git a/doc/doc_en/paddleOCR_overview_en.md b/doc/doc_en/paddleOCR_overview_en.md
index 073c3ec889b2f21e9e40f5f7d1d6dc719e3dcac9..fe64b0bd6c60f4e678ee2e44a303c124bab479ec 100644
--- a/doc/doc_en/paddleOCR_overview_en.md
+++ b/doc/doc_en/paddleOCR_overview_en.md
@@ -2,7 +2,7 @@
## 1. PaddleOCR Overview
-PaddleOCR contains rich text detection, text recognition and end-to-end algorithms. Combining actual testing and industrial experience, PaddleOCR chooses DB and CRNN as the basic detection and recognition models, and proposes a series of models, named PP-OCR, for industrial applications after a series of optimization strategies. The PP-OCR model is aimed at general scenarios and forms a model library according to different languages. Based on the capabilities of PP-OCR, PaddleOCR releases the PP-Structure tool library for document scene tasks, including two major tasks: layout analysis and table recognition. In order to get through the entire process of industrial landing, PaddleOCR provides large-scale data production tools and a variety of prediction deployment tools to help developers quickly turn ideas into reality.
+PaddleOCR contains rich text detection, text recognition and end-to-end algorithms. With the experience from real world scenarios and the industry, PaddleOCR chooses DB and CRNN as the basic detection and recognition models, and proposes a series of models, named PP-OCR, for industrial applications after a series of optimization strategies. The PP-OCR model is aimed at general scenarios and forms a model library of different languages. Based on the capabilities of PP-OCR, PaddleOCR releases the PP-Structure toolkit for document scene tasks, including two major tasks: layout analysis and table recognition. In order to get through the entire process of industrial landing, PaddleOCR provides large-scale data production tools and a variety of prediction deployment tools to help developers quickly turn ideas into reality.
@@ -18,11 +18,11 @@ PaddleOCR contains rich text detection, text recognition and end-to-end algorith
# Recommend
git clone https://github.com/PaddlePaddle/PaddleOCR
-# If you cannot pull successfully due to network problems, you can also choose to use the code hosting on the cloud:
+# If you cannot pull successfully due to network problems, you can switch to the mirror hosted on Gitee:
git clone https://gitee.com/paddlepaddle/PaddleOCR
-# Note: The cloud-hosting code may not be able to synchronize the update with this GitHub project in real time. There might be a delay of 3-5 days. Please give priority to the recommended method.
+# Note: The mirror on Gitee may not keep in synchronization with the latest project on GitHub. There might be a delay of 3-5 days. Please try GitHub at first.
```
### **2.2 Install third-party libraries**
@@ -34,6 +34,6 @@ pip3 install -r requirements.txt
If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows.
-Please try to download Shapely whl file using [http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely](http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely).
+Please try to download Shapely whl file from [http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely](http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely).
-Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)
\ No newline at end of file
+Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)
diff --git a/doc/doc_en/pgnet_en.md b/doc/doc_en/pgnet_en.md
index e176a1260c734974e2dad843faeb3e5532176629..c7cb3221ccfd897e2fd9062a828c2fe0ceb42024 100644
--- a/doc/doc_en/pgnet_en.md
+++ b/doc/doc_en/pgnet_en.md
@@ -6,18 +6,18 @@
## 1. Brief Introduction
-OCR algorithm can be divided into two-stage algorithm and end-to-end algorithm. The two-stage OCR algorithm is generally divided into two parts, text detection and text recognition algorithm. The text detection algorithm gets the detection box of the text line from the image, and then the recognition algorithm identifies the content of the text box. The end-to-end OCR algorithm can complete text detection and recognition in one algorithm. Its basic idea is to design a model with both detection unit and recognition module, share the CNN features of both and train them together. Because one algorithm can complete character recognition, the end-to-end model is smaller and faster.
+OCR algorithms can be divided into two categories: two-stage algorithm and end-to-end algorithm. The two-stage OCR algorithm is generally divided into two parts, text detection and text recognition algorithm. The text detection algorithm locates the box of the text line from the image, and then the recognition algorithm identifies the content of the text box. The end-to-end OCR algorithm combines text detection and recognition in one algorithm. Its basic idea is to design a model with both detection unit and recognition module, share the CNN features of both and train them together. Because one algorithm can complete character recognition, the end-to-end model is smaller and faster.
### Introduction Of PGNet Algorithm
-In recent years, the end-to-end OCR algorithm has been well developed, including MaskTextSpotter series, TextSnake, TextDragon, PGNet series and so on. Among these algorithms, PGNet algorithm has the advantages that other algorithms do not
-- Pgnet loss is designed to guide training, and no character-level annotations is needed
-- NMS and ROI related operations are not needed, It can accelerate the prediction
+During the recent years, the end-to-end OCR algorithm has been well developed, including MaskTextSpotter series, TextSnake, TextDragon, PGNet series and so on. Among these algorithms, PGNet algorithm has some advantages over the other algorithms.
+- PGNet loss is designed to guide training, and no character-level annotations is needed.
+- NMS and ROI related operations are not needed. It can accelerate the prediction
- The reading order prediction module is proposed
- A graph based modification module (GRM) is proposed to further improve the performance of model recognition
- Higher accuracy and faster prediction speed
-For details of PGNet algorithm, please refer to [paper](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf) ,The schematic diagram of the algorithm is as follows:
+For details of PGNet algorithm, please refer to [paper](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf). The schematic diagram of the algorithm is as follows:
![](../pgnet_framework.png)
-After feature extraction, the input image is sent to four branches: TBO module for text edge offset prediction, TCL module for text centerline prediction, TDO module for text direction offset prediction, and TCC module for text character classification graph prediction.
+After feature extraction, the input image is sent to four branches: TBO module for text edge offset prediction, TCL module for text center-line prediction, TDO module for text direction offset prediction, and TCC module for text character classification graph prediction.
The output of TBO and TCL can get text detection results after post-processing, and TCL, TDO and TCC are responsible for text recognition.
The results of detection and recognition are as follows:
@@ -40,7 +40,7 @@ Please refer to [Operation Environment Preparation](./environment_en.md) to conf
## 3. Quick Use
-### inference model download
+### Inference model download
This section takes the trained end-to-end model as an example to quickly use the model prediction. First, download the trained end-to-end inference model [download address](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/e2e_server_pgnetA_infer.tar)
```
mkdir inference && cd inference
@@ -131,7 +131,7 @@ python3 tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Optimizer.base_lr=0.0
```
#### Load trained model and continue training
-If you expect to load trained model and continue the training again, you can specify the parameter `Global.checkpoints` as the model path to be loaded.
+If you would like to load trained model and continue the training again, you can specify the parameter `Global.checkpoints` as the model path to be loaded.
```shell
python3 tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.checkpoints=./your/trained/model
```
diff --git a/doc/doc_en/training_en.md b/doc/doc_en/training_en.md
index d013f5ac706a2a2b4a5b58ba0a6dff09ab0b4654..1a3165d0ab226d7cbeef356ee750594c759cfe23 100644
--- a/doc/doc_en/training_en.md
+++ b/doc/doc_en/training_en.md
@@ -12,15 +12,15 @@
* [4. FAQ](#3-faq)
-This article will introduce the basic concepts that need to be mastered during model training and the tuning methods during training.
+This article will introduce the basic concepts that is necessary for model training and tuning.
-At the same time, it will briefly introduce the components of the PaddleOCR model training data and how to prepare the data finetune model in the vertical scene.
+At the same time, it will briefly introduce the structure of the training data and how to prepare the data to fine-tune model in vertical scenes.
## 1. Yml Configuration
-The PaddleOCR model uses configuration files to manage network training and evaluation parameters. In the configuration file, you can set the model, optimizer, loss function, and pre- and post-processing parameters of the model. PaddleOCR reads these parameters from the configuration file, and then builds a complete training process to complete the model training. When optimized, the configuration can be completed by modifying the parameters in the configuration file, which is simple to use and convenient to modify.
+The PaddleOCR uses configuration files to control network training and evaluation parameters. In the configuration file, you can set the model, optimizer, loss function, and pre- and post-processing parameters of the model. PaddleOCR reads these parameters from the configuration file, and then builds a complete training process to train the model. Fine-tuning can also be completed by modifying the parameters in the configuration file, which is simple and convenient.
For the complete configuration file description, please refer to [Configuration File](./config_en.md)
@@ -28,13 +28,13 @@ For the complete configuration file description, please refer to [Configuration
## 2. Basic Concepts
-In the process of model training, some hyperparameters need to be manually adjusted to help the model obtain the optimal index at the least loss. Different data volumes may require different hyper-parameters. When you want to finetune your own data or tune the model effect, there are several parameter adjustment strategies for reference:
+During the model training process, some hyper-parameters can be manually specified to obtain the optimal result at the least cost. Different data volumes may require different hyper-parameters. When you want to fine-tune the model based on your own data, there are several parameter adjustment strategies for reference:
### 2.1 Learning Rate
-The learning rate is one of the important hyperparameters for training neural networks. It represents the step length of the gradient moving to the optimal solution of the loss function in each iteration.
-A variety of learning rate update strategies are provided in PaddleOCR, which can be modified through configuration files, for example:
+The learning rate is one of the most important hyper-parameters for training neural networks. It represents the step length of the gradient moving towards the optimal solution of the loss function in each iteration.
+A variety of learning rate update strategies are provided by PaddleOCR, which can be specified in configuration files. For example,
```
Optimizer:
@@ -46,16 +46,15 @@ Optimizer:
warmup_epoch: 5
```
-Piecewise stands for piecewise constant attenuation. Different learning rates are specified in different learning stages,
-and the learning rate is the same in each stage.
+`Piecewise` stands for piece-wise constant attenuation. Different learning rates are specified in different learning stages, and the learning rate stay the same in each stage.
-warmup_epoch means that in the first 5 epochs, the learning rate will gradually increase from 0 to base_lr. For all strategies, please refer to the code [learning_rate.py](../../ppocr/optimizer/learning_rate.py).
+`warmup_epoch` means that in the first 5 epochs, the learning rate will be increased gradually from 0 to base_lr. For all strategies, please refer to the code [learning_rate.py](../../ppocr/optimizer/learning_rate.py).
### 2.2 Regularization
-Regularization can effectively avoid algorithm overfitting. PaddleOCR provides L1 and L2 regularization methods.
-L1 and L2 regularization are the most commonly used regularization methods.
+Regularization can effectively avoid algorithm over-fitting. PaddleOCR provides L1 and L2 regularization methods.
+L1 and L2 regularization are the most widely used regularization methods.
L1 regularization adds a regularization term to the objective function to reduce the sum of absolute values of the parameters;
while in L2 regularization, the purpose of adding a regularization term is to reduce the sum of squared parameters.
The configuration method is as follows:
@@ -95,7 +94,7 @@ The current open source models, data sets and magnitudes are as follows:
- Chinese data set, LSVT street view data set crops the image according to the truth value, and performs position calibration, a total of 30w images. In addition, based on the LSVT corpus, 500w of synthesized data.
- Small language data set, using different corpora and fonts, respectively generated 100w synthetic data set, and using ICDAR-MLT as the verification set.
-Among them, the public data sets are all open source, users can search and download by themselves, or refer to [Chinese data set](./datasets.md), synthetic data is not open source, users can use open source synthesis tools to synthesize by themselves. Synthesis tools include [text_renderer](https://github.com/Sanster/text_renderer), [SynthText](https://github.com/ankush-me/SynthText), [TextRecognitionDataGenerator](https://github.com/Belval/TextRecognitionDataGenerator) etc.
+Among them, the public data sets are all open source, users can search and download by themselves, or refer to [Chinese data set](../doc_ch/datasets.md), synthetic data is not open source, users can use open source synthesis tools to synthesize by themselves. Synthesis tools include [text_renderer](https://github.com/Sanster/text_renderer), [SynthText](https://github.com/ankush-me/SynthText), [TextRecognitionDataGenerator](https://github.com/Belval/TextRecognitionDataGenerator) etc.
@@ -129,17 +128,17 @@ There are several experiences for reference when constructing the data set:
**Q**: How to choose a suitable network input shape when training CRNN recognition?
A: The general height is 32, the longest width is selected, there are two methods:
-
+
(1) Calculate the aspect ratio distribution of training sample images. The selection of the maximum aspect ratio considers 80% of the training samples.
-
+
(2) Count the number of texts in training samples. The selection of the longest number of characters considers the training sample that satisfies 80%. Then the aspect ratio of Chinese characters is approximately considered to be 1, and that of English is 3:1, and the longest width is estimated.
**Q**: During the recognition training, the accuracy of the training set has reached 90, but the accuracy of the verification set has been kept at 70, what should I do?
A: If the accuracy of the training set is 90 and the test set is more than 70, it should be over-fitting. There are two methods to try:
-
+
(1) Add more augmentation methods or increase the [probability] of augmented prob (https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppocr/data/imaug/rec_img_aug.py#L341), The default is 0.4.
-
+
(2) Increase the [l2 dcay value] of the system (https://github.com/PaddlePaddle/PaddleOCR/blob/a501603d54ff5513fc4fc760319472e59da25424/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml#L47)
**Q**: When the recognition model is trained, loss can drop normally, but acc is always 0
diff --git a/doc/doc_en/update_en.md b/doc/doc_en/update_en.md
index 6a95b5be279d7a0b8a204cadd46b283b5eb26690..39fd936d1bd4e5f8d8535805f865792820ee1199 100644
--- a/doc/doc_en/update_en.md
+++ b/doc/doc_en/update_en.md
@@ -5,7 +5,7 @@
- 2021.8.3 released PaddleOCR v2.2, add a new structured documents analysis toolkit, i.e., [PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README.md), support layout analysis and table recognition (One-key to export chart images to Excel files).
- 2021.4.8 release end-to-end text recognition algorithm [PGNet](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf) which is published in AAAI 2021. Find tutorial [here](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/pgnet_en.md);release multi language recognition [models](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md), support more than 80 languages recognition; especically, the performance of [English recognition model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/models_list_en.md#English) is Optimized.
-- 2021.1.21 update more than 25+ multilingual recognition models [models list](./doc/doc_en/models_list_en.md), including:English, Chinese, German, French, Japanese,Spanish,Portuguese Russia Arabic and so on. Models for more languages will continue to be updated [Develop Plan](https://github.com/PaddlePaddle/PaddleOCR/issues/1048).
+- 2021.1.21 update more than 25+ multilingual recognition models [models list](./models_list_en.md), including:English, Chinese, German, French, Japanese,Spanish,Portuguese Russia Arabic and so on. Models for more languages will continue to be updated [Develop Plan](https://github.com/PaddlePaddle/PaddleOCR/issues/1048).
- 2020.12.15 update Data synthesis tool, i.e., [Style-Text](../../StyleText/README.md),easy to synthesize a large number of images which are similar to the target scene image.
- 2020.11.25 Update a new data annotation tool, i.e., [PPOCRLabel](../../PPOCRLabel/README.md), which is helpful to improve the labeling efficiency. Moreover, the labeling results can be used in training of the PP-OCR system directly.
- 2020.9.22 Update the PP-OCR technical article, https://arxiv.org/abs/2009.09941
diff --git a/doc/joinus.PNG b/doc/joinus.PNG
index e03e108c52385dc31fb0daece013c3d80bf08db4..4dd9756b34192ab540970eaab410810815dfa311 100644
Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ
diff --git "a/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb" "b/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb"
index 11626518d5a8e1a6b62227cbdf81d50ce2b0eee5..400f93c257356e45b7c0bfeb1cc0e9109b9d85be 100644
--- "a/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb"
+++ "b/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb"
@@ -2551,7 +2551,7 @@
"\n",
"Paddle Serving是飞桨为方便开发者进行服务化部署而打造的工具,本节主要介绍基于Paddle Serving的PP-OCRv2系统服务化部署过程。\n",
"\n",
- "## 4.1 Padde Serving简介\n",
+ "## 4.1 Paddle Serving简介\n",
"\n",
"Paddle Serving作为飞桨(PaddlePaddle)开源的服务化部署框架,长期目标就是围绕着人工智能落地的最后一公里提供越来越专业、可靠、易用的服务。Paddle Serving目前提供了两套框架C++ Serving和Python Pipeline。Python Pipeline框架倾向于二次开发的便捷性,C++ Serving框架更倾向于追求极致性能。\n",
"\n",
diff --git a/paddleocr.py b/paddleocr.py
index f19d1defee217a8d3dc0653e6b8fd1713cb389fa..f0938c6740606bdb2a96a6f9836602c0fb670650 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -347,8 +347,9 @@ class PaddleOCR(predict_system.TextSystem):
ocr with paddleocr
args:
img: img for ocr, support ndarray, img_path and list or ndarray
- det: use text detection or not, if false, only rec will be exec. default is True
- rec: use text recognition or not, if false, only det will be exec. default is True
+ det: use text detection or not. If false, only rec will be exec. Default is True
+ rec: use text recognition or not. If false, only det will be exec. Default is True
+ cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
"""
assert isinstance(img, (np.ndarray, list, str))
if isinstance(img, list) and det == True:
diff --git a/ppocr/metrics/cls_metric.py b/ppocr/metrics/cls_metric.py
index 09817200234dc8d8b5d091ebbe33f07f4aad2cf6..6c077518ce205d4ec4d426aaedb8c0af880122ee 100644
--- a/ppocr/metrics/cls_metric.py
+++ b/ppocr/metrics/cls_metric.py
@@ -16,6 +16,7 @@
class ClsMetric(object):
def __init__(self, main_indicator='acc', **kwargs):
self.main_indicator = main_indicator
+ self.eps = 1e-5
self.reset()
def __call__(self, pred_label, *args, **kwargs):
@@ -28,7 +29,7 @@ class ClsMetric(object):
all_num += 1
self.correct_num += correct_num
self.all_num += all_num
- return {'acc': correct_num / all_num, }
+ return {'acc': correct_num / (all_num + self.eps), }
def get_metric(self):
"""
@@ -36,7 +37,7 @@ class ClsMetric(object):
'acc': 0
}
"""
- acc = self.correct_num / self.all_num
+ acc = self.correct_num / (self.all_num + self.eps)
self.reset()
return {'acc': acc}
diff --git a/ppocr/metrics/rec_metric.py b/ppocr/metrics/rec_metric.py
index b0ccd974f24f1c7e0c9a8e1d414373021c4288e6..b047bbcb972cadf227daaeb8797c46095ac0af43 100644
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -20,6 +20,7 @@ class RecMetric(object):
def __init__(self, main_indicator='acc', is_filter=False, **kwargs):
self.main_indicator = main_indicator
self.is_filter = is_filter
+ self.eps = 1e-5
self.reset()
def _normalize_text(self, text):
@@ -47,8 +48,8 @@ class RecMetric(object):
self.all_num += all_num
self.norm_edit_dis += norm_edit_dis
return {
- 'acc': correct_num / all_num,
- 'norm_edit_dis': 1 - norm_edit_dis / (all_num + 1e-3)
+ 'acc': correct_num / (all_num + self.eps),
+ 'norm_edit_dis': 1 - norm_edit_dis / (all_num + self.eps)
}
def get_metric(self):
@@ -58,8 +59,8 @@ class RecMetric(object):
'norm_edit_dis': 0,
}
"""
- acc = 1.0 * self.correct_num / (self.all_num + 1e-3)
- norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + 1e-3)
+ acc = 1.0 * self.correct_num / (self.all_num + self.eps)
+ norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps)
self.reset()
return {'acc': acc, 'norm_edit_dis': norm_edit_dis}
diff --git a/ppocr/metrics/table_metric.py b/ppocr/metrics/table_metric.py
index 80d1c789ecc3979bd4c33620af91ccd28012f7a8..ca4d6474202b4e85cadf86ccb2fe2726c7fa9aeb 100644
--- a/ppocr/metrics/table_metric.py
+++ b/ppocr/metrics/table_metric.py
@@ -12,9 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
+
+
class TableMetric(object):
def __init__(self, main_indicator='acc', **kwargs):
self.main_indicator = main_indicator
+ self.eps = 1e-5
self.reset()
def __call__(self, pred, batch, *args, **kwargs):
@@ -31,9 +34,7 @@ class TableMetric(object):
correct_num += 1
self.correct_num += correct_num
self.all_num += all_num
- return {
- 'acc': correct_num * 1.0 / all_num,
- }
+ return {'acc': correct_num * 1.0 / (all_num + self.eps), }
def get_metric(self):
"""
@@ -41,7 +42,7 @@ class TableMetric(object):
'acc': 0,
}
"""
- acc = 1.0 * self.correct_num / self.all_num
+ acc = 1.0 * self.correct_num / (self.all_num + self.eps)
self.reset()
return {'acc': acc}
diff --git a/ppstructure/README.md b/ppstructure/README.md
index abcc5281aa24c56e6a3f22e377de816b0bd66712..1d201a7c6e54f6ed71be6d1872b7f4b226ad35ad 100644
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
@@ -96,7 +96,7 @@ In PP-Structure, the image will be divided into 5 types of areas **text, title,
#### 6.1.1 Layout analysis
-Layout analysis classifies image by region, including the use of Python scripts of layout analysis tools, extraction of designated category detection boxes, performance indicators, and custom training layout analysis models. For details, please refer to [document](layout/README.md).
+Layout analysis classifies image by region, including the use of Python scripts of layout analysis tools, extraction of designated category detection boxes, performance indicators, and custom training layout analysis models. For details, please refer to [document](layout/README_en.md).
#### 6.1.2 Table recognition
diff --git a/ppstructure/docs/kie_en.md b/ppstructure/docs/kie_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..a424968a9b5a33132afe52a4850cfe541919ae1c
--- /dev/null
+++ b/ppstructure/docs/kie_en.md
@@ -0,0 +1,77 @@
+
+
+# Key Information Extraction(KIE)
+
+This section provides a tutorial example on how to quickly use, train, and evaluate a key information extraction(KIE) model, [SDMGR](https://arxiv.org/abs/2103.14470), in PaddleOCR.
+
+[SDMGR(Spatial Dual-Modality Graph Reasoning)](https://arxiv.org/abs/2103.14470) is a KIE algorithm that classifies each detected text region into predefined categories, such as order ID, invoice number, amount, and etc.
+
+
+* [1. Quick Use](#1-----)
+* [2. Model Training](#2-----)
+* [3. Model Evaluation](#3-----)
+
+
+
+## 1. Quick Use
+
+[Wildreceipt dataset](https://paperswithcode.com/dataset/wildreceipt) is used for this tutorial. It contains 1765 photos, with 25 classes, and 50000 text boxes, which can be downloaded by wget:
+
+```shell
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/wildreceipt.tar && tar xf wildreceipt.tar
+```
+
+Download the pretrained model and predict the result:
+
+```shell
+cd PaddleOCR/
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/kie_vgg16.tar && tar xf kie_vgg16.tar
+python3.7 tools/infer_kie.py -c configs/kie/kie_unet_sdmgr.yml -o Global.checkpoints=kie_vgg16/best_accuracy Global.infer_img=../wildreceipt/1.txt
+```
+
+The prediction result is saved as `./output/sdmgr_kie/predicts_kie.txt`, and the visualization results are saved in the folder`/output/sdmgr_kie/kie_results/`.
+
+The visualization results are shown in the figure below:
+
+
+
+
+
+
+## 2. Model Training
+
+Create a softlink to the folder, `PaddleOCR/train_data`:
+```shell
+cd PaddleOCR/ && mkdir train_data && cd train_data
+
+ln -s ../../wildreceipt ./
+```
+
+The configuration file used for training is `configs/kie/kie_unet_sdmgr.yml`. The default training data path in the configuration file is `train_data/wildreceipt`. After preparing the data, you can execute the model training with the following command:
+```shell
+python3.7 tools/train.py -c configs/kie/kie_unet_sdmgr.yml -o Global.save_model_dir=./output/kie/
+```
+
+
+## 3. Model Evaluation
+
+After training, you can execute the model evaluation with the following command:
+
+```shell
+python3.7 tools/eval.py -c configs/kie/kie_unet_sdmgr.yml -o Global.checkpoints=./output/kie/best_accuracy
+```
+
+**Reference:**
+
+
+
+```bibtex
+@misc{sun2021spatial,
+ title={Spatial Dual-Modality Graph Reasoning for Key Information Extraction},
+ author={Hongbin Sun and Zhanghui Kuang and Xiaoyu Yue and Chenhao Lin and Wayne Zhang},
+ year={2021},
+ eprint={2103.14470},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md
index 446c577ec39cf24dd4b8699558c633a1308fa444..668775c6da2b06d973f69a9ce81a37396460cbdf 100644
--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
@@ -39,7 +39,7 @@ paddleocr --image_dir=../doc/table/1.png --type=structure
* VQA
-coming soon
+请参考:[文档视觉问答](../vqa/README.md)。
@@ -74,7 +74,7 @@ im_show.save('result.jpg')
* VQA
-comming soon
+请参考:[文档视觉问答](../vqa/README.md)。
@@ -101,7 +101,7 @@ dict 里各个字段说明如下
* VQA
-comming soon
+请参考:[文档视觉问答](../vqa/README.md)。
@@ -116,9 +116,9 @@ comming soon
| model_name_or_path | VQA SER模型地址 | None |
| max_seq_length | VQA SER模型最大支持token长度 | 512 |
| label_map_path | VQA SER 标签文件地址 | ./vqa/labels/labels_ser.txt |
-| mode | pipeline预测模式,structure: 版面分析+表格识别; vqa: ser文档信息抽取 | structure |
+| mode | pipeline预测模式,structure: 版面分析+表格识别; VQA: SER文档信息抽取 | structure |
-大部分参数和paddleocr whl包保持一致,见 [whl包文档](../doc/doc_ch/whl.md)
+大部分参数和PaddleOCR whl包保持一致,见 [whl包文档](../../doc/doc_ch/whl.md)
运行完成后,每张图片会在`output`字段指定的目录下有一个同名目录,图片里的每个表格会存储为一个excel,图片区域会被裁剪之后保存下来,excel文件和图片名名为表格在图片里的坐标。
@@ -133,16 +133,16 @@ cd ppstructure
# 下载模型
mkdir inference && cd inference
-# 下载超轻量级中文OCR模型的检测模型并解压
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
-# 下载超轻量级中文OCR模型的识别模型并解压
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
-# 下载超轻量级英文表格英寸模型并解压
+# 下载PP-OCRv2文本检测模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
+# 下载PP-OCRv2文本识别模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 下载超轻量级英文表格预测模型并解压
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
cd ..
-python3 predict_system.py --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
- --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
+ --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
--image_dir=../doc/table/1.png \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md
index 30a11a20e5de90500d1408f671ba914f336a0b43..94fa76055b93cefab0ac507a6007ec148aa12945 100644
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
@@ -41,7 +41,7 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_tab
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
cd ..
# run
-python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_dict_path=../ppocr/utils/dict/en_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ../output/table
+python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ../output/table
```
Note: The above model is trained on the PubLayNet dataset and only supports English scanning scenarios. If you need to identify other scenarios, you need to train the model yourself and replace the three fields `det_model_dir`, `rec_model_dir`, `table_model_dir`.
diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md
index 33276b36e4973e83d7efa673b90013cf5727dfe2..ef0f1ae5c4554e69e4cbeb0fcd783e6d98f96a41 100644
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
@@ -56,7 +56,7 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_tab
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
cd ..
# 执行预测
-python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_dict_path=../ppocr/utils/dict/en_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ../output/table
+python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ../output/table
```
运行完成后,每张图片的excel表格会保存到output字段指定的目录下
diff --git a/test_tipc/readme.md b/test_tipc/readme.md
index 8b2489f3445ddfa87c1e587d6da81992fdb90e64..7b7548cd7296760d4caec0ed741c47137d86ece1 100644
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -68,14 +68,14 @@ test_tipc/
├── model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt # 测试Linux上c++预测的配置文件
├── model_linux_gpu_normal_normal_infer_python_jetson.txt # 测试Jetson上python预测的配置文件
├── train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt # 测试Linux上多机多卡、混合精度训练和python预测的配置文件
- ├── ...
+ ├── ...
├── ch_ppocr_server_v2.0_det # ch_ppocr_server_v2.0_det模型的测试配置文件目录
- ├── ...
+ ├── ...
├── ch_ppocr_mobile_v2.0_rec # ch_ppocr_mobile_v2.0_rec模型的测试配置文件目录
- ├── ...
+ ├── ...
├── ch_ppocr_server_v2.0_det # ch_ppocr_server_v2.0_det模型的测试配置文件目录
- ├── ...
- ├── ...
+ ├── ...
+ ├── ...
├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对
├── python_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型python预测fp32精度的结果
├── python_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型python预测fp16精度的结果
@@ -119,7 +119,7 @@ bash test_tipc/test_train_inference_python.sh configs/[model_name]/[params_file_
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer'
# 运行测试
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer'
-```
+```
关于本示例命令的更多信息可查看[基础训练预测使用文档](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/test_tipc/docs/test_train_inference_python.md#22-%E5%8A%9F%E8%83%BD%E6%B5%8B%E8%AF%95)。
### 配置文件命名规范
@@ -136,9 +136,9 @@ bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobil
## 4. 开始测试
-各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程:
-- [test_train_inference_python 使用](docs/test_train_inference_python.md) :测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
+各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程:
+- [test_train_inference_python 使用](docs/test_train_inference_python.md) :测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
- [test_inference_cpp 使用](docs/test_inference_cpp.md):测试基于C++的模型推理。
- [test_serving 使用](docs/test_serving.md):测试基于Paddle Serving的服务化部署功能。
-- [test_lite_arm_cpu_cpp 使用](docs/test_lite_arm_cpu_cpp.md):测试基于Paddle-Lite的ARM CPU端c++预测部署功能。
+- [test_lite_arm_cpp 使用](docs/test_lite_arm_cpp.md):测试基于Paddle-Lite的ARM CPU端c++预测部署功能。
- [test_paddle2onnx 使用](docs/test_paddle2onnx.md):测试Paddle2ONNX的模型转化功能,并验证正确性。
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
index 9bde89d78e0ee78c7b650306047b036488a3eab9..4ad83c66977540d73bdc9bedb8b93bf465e8b6fc 100644
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -284,7 +284,6 @@ else
set_amp_config=" "
fi
for trainer in ${trainer_list[*]}; do
- eval ${env}
flag_quant=False
if [ ${trainer} = ${pact_key} ]; then
run_train=${pact_trainer}
@@ -344,6 +343,7 @@ else
# run eval
if [ ${eval_py} != "null" ]; then
+ eval ${env}
set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}"
eval $eval_cmd
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index 8d674809a5fe22e458fcb0c68419a7313e71d5f6..16789b81cd0364af91f15a4a90ddd614a3f87611 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -92,11 +92,11 @@ class TextSystem(object):
self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list,
rec_res)
filter_boxes, filter_rec_res = [], []
- for box, rec_reuslt in zip(dt_boxes, rec_res):
- text, score = rec_reuslt
+ for box, rec_result in zip(dt_boxes, rec_res):
+ text, score = rec_result
if score >= self.drop_score:
filter_boxes.append(box)
- filter_rec_res.append(rec_reuslt)
+ filter_rec_res.append(rec_result)
return filter_boxes, filter_rec_res
diff --git a/tools/infer_cls.py b/tools/infer_cls.py
index 7522e43907b50b84cc52930ff4eeb8e537cb2c73..ab6a49120b6e22621b462b680a161d70ee965e78 100755
--- a/tools/infer_cls.py
+++ b/tools/infer_cls.py
@@ -73,8 +73,8 @@ def main():
images = paddle.to_tensor(images)
preds = model(images)
post_result = post_process_class(preds)
- for rec_reuslt in post_result:
- logger.info('\t result: {}'.format(rec_reuslt))
+ for rec_result in post_result:
+ logger.info('\t result: {}'.format(rec_result))
logger.info("success!")
diff --git a/tools/program.py b/tools/program.py
index 743ace090cc608523fc76c4e90864b60a0934e86..10299940d61dd0c7b6df770e7441d3c6551954a9 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -511,7 +511,7 @@ def preprocess(is_train=False):
config['Global']['distributed'] = dist.get_world_size() != 1
- if config['Global']['use_visualdl']:
+ if config['Global']['use_visualdl'] and dist.get_rank() == 0:
from visualdl import LogWriter
save_model_dir = config['Global']['save_model_dir']
vdl_writer_path = '{}/vdl/'.format(save_model_dir)