提交 92326ef8 编写于 作者: W WenmuZhou

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR into master

# ex: set ts=8 noet:
all: qt5 test
test: testpy3
testpy2:
python -m unittest discover tests
testpy3:
python3 -m unittest discover tests
qt4: qt4py2
qt5: qt5py3
qt4py2:
pyrcc4 -py2 -o libs/resources.py resources.qrc
qt4py3:
pyrcc4 -py3 -o libs/resources.py resources.qrc
qt5py3:
pyrcc5 -o libs/resources.py resources.qrc
clean:
rm -rf ~/.labelImgSettings.pkl *.pyc dist labelImg.egg-info __pycache__ build
pip_upload:
python3 setup.py upload
long_description:
restview --long-description
.PHONY: all
......@@ -61,7 +61,7 @@ from libs.zoomWidget import ZoomWidget
from libs.autoDialog import AutoDialog
from libs.labelDialog import LabelDialog
from libs.colorDialog import ColorDialog
from libs.labelFile import LabelFile, LabelFileError, LabelFileFormat
from libs.labelFile import LabelFile, LabelFileError
from libs.toolBar import ToolBar
from libs.ustr import ustr
from libs.hashableQListWidgetItem import HashableQListWidgetItem
......@@ -91,7 +91,7 @@ class WindowMixin(object):
class MainWindow(QMainWindow, WindowMixin):
FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = list(range(3))
def __init__(self, defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None, language="zh-CN"):
def __init__(self, lang="ch", defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
super(MainWindow, self).__init__()
self.setWindowTitle(__appname__)
......@@ -99,15 +99,15 @@ class MainWindow(QMainWindow, WindowMixin):
self.settings = Settings()
self.settings.load()
settings = self.settings
self.lang = lang
# Load string bundle for i18n
if language not in ['zh-CN', 'en']:
language = 'zh-CN'
self.stringBundle = StringBundle.getBundle(localeStr=language) # 'en'
if lang not in ['ch', 'en']:
lang = 'en'
self.stringBundle = StringBundle.getBundle(localeStr='zh-CN' if lang=='ch' else 'en') # 'en'
getStr = lambda strId: self.stringBundle.getString(strId)
self.defaultSaveDir = defaultSaveDir
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=True, lang="ch")
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang)
if os.path.exists('./data/paddle.png'):
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
......@@ -162,7 +162,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.AutoRecognition.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.AutoRecognition.setIcon(newIcon('Auto'))
# self.AutoRecognition.setIconSize(QSize(100,20))
self.AutoRecognition.setFixedSize(QSize(80,30))
# self.AutoRecognition.setFixedSize(QSize(80,30))
# self.AutoRecognition.setStyleSheet('text-align:center;')#border:none;font-size : 12pt;
autoRecLayout = QHBoxLayout()
autoRecLayout.setContentsMargins(0, 0, 0, 0)
......@@ -189,18 +189,18 @@ class MainWindow(QMainWindow, WindowMixin):
self.editButton = QToolButton()
self.reRecogButton = QToolButton()
self.reRecogButton.setIcon(newIcon('reRec', 30))
self.reRecogButton.setFixedSize(QSize(80,30))
# self.reRecogButton.setFixedSize(QSize(80,30))
self.reRecogButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.newButton = QToolButton()
self.newButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.newButton.setFixedSize(QSize(80, 30))
# self.newButton.setFixedSize(QSize(80, 30))
self.SaveButton = QToolButton()
self.SaveButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.SaveButton.setFixedSize(QSize(60, 30))
# self.SaveButton.setFixedSize(QSize(60, 30))
self.DelButton = QToolButton()
self.DelButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
self.DelButton.setFixedSize(QSize(80, 30))
# self.DelButton.setFixedSize(QSize(80, 30))
lefttoptoolbox = QHBoxLayout()
......@@ -423,10 +423,10 @@ class MainWindow(QMainWindow, WindowMixin):
'Ctrl+D', 'copy', getStr('dupBoxDetail'),
enabled=False)
hideAll = action('&Hide\nRectBox', partial(self.togglePolygons, False),
hideAll = action(getStr('hideBox'), partial(self.togglePolygons, False),
'Ctrl+H', 'hide', getStr('hideAllBoxDetail'),
enabled=False)
showAll = action('&Show\nRectBox', partial(self.togglePolygons, True),
showAll = action(getStr('showBox'), partial(self.togglePolygons, True),
'Ctrl+A', 'hide', getStr('showAllBoxDetail'),
enabled=False)
......@@ -593,7 +593,7 @@ class MainWindow(QMainWindow, WindowMixin):
zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth))
addActions(self.menus.autolabel, (saveRec, None, help)) # alcm,
addActions(self.menus.autolabel, (alcm, saveRec, None, help)) #
self.menus.file.aboutToShow.connect(self.updateFileMenu)
......@@ -787,7 +787,7 @@ class MainWindow(QMainWindow, WindowMixin):
QMessageBox.information(self, u'Information', msg)
def showStepsDialog(self):
msg = steps()
msg = stepsInfo(self.lang)
QMessageBox.information(self, u'Information', msg)
def createShape(self):
......@@ -1917,7 +1917,50 @@ class MainWindow(QMainWindow, WindowMixin):
def autolcm(self):
print('autolabelchoosemodel')
vbox = QVBoxLayout()
hbox = QHBoxLayout()
self.panel = QLabel()
self.panel.setText(self.stringBundle.getString('choseModelLg'))
self.panel.setAlignment(Qt.AlignLeft)
self.comboBox = QComboBox()
self.comboBox.setObjectName("comboBox")
self.comboBox.addItems(['Chinese & English', 'English', 'French', 'German', 'Korean', 'Japanese'])
# self.comboBox_lg = QComboBox()
# self.comboBox_lg.setObjectName("comboBox_language")
vbox.addWidget(self.panel)
vbox.addWidget(self.comboBox)
self.dialog = QDialog()
self.dialog.resize(300, 100)
self.okBtn = QPushButton(self.stringBundle.getString('ok'))
self.cancelBtn = QPushButton(self.stringBundle.getString('cancel'))
self.okBtn.clicked.connect(self.modelChoose)
self.cancelBtn.clicked.connect(self.cancel)
self.dialog.setWindowTitle(self.stringBundle.getString('choseModelLg'))
hbox.addWidget(self.okBtn)
hbox.addWidget(self.cancelBtn)
vbox.addWidget(self.panel)
vbox.addLayout(hbox)
self.dialog.setLayout(vbox)
self.dialog.setWindowModality(Qt.ApplicationModal)
self.dialog.exec_()
if self.filePath:
self.AutoRecognition.setEnabled(True)
def modelChoose(self):
print(self.comboBox.currentText())
lg_idx = {'Chinese & English': 'ch', 'English': 'en', 'French': 'french', 'German': 'german',
'Korean': 'korean', 'Japanese': 'japan'}
del self.ocr
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False,
lang=lg_idx[self.comboBox.currentText()])
self.dialog.close()
def cancel(self):
self.dialog.close()
def loadFilestate(self, saveDir):
self.fileStatepath = saveDir + '/fileState.txt'
......@@ -2020,18 +2063,15 @@ def get_main_app(argv=[]):
app.setWindowIcon(newIcon("app"))
# Tzutalin 201705+: Accept extra agruments to change predefined class file
argparser = argparse.ArgumentParser()
argparser.add_argument("image_dir", nargs="?")
argparser.add_argument("language", default='zh-CN',nargs="?")
argparser.add_argument("predefined_classes_file",
argparser.add_argument("--lang", default='ch', nargs="?")
argparser.add_argument("--predefined_classes_file",
default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"),
nargs="?")
argparser.add_argument("save_dir", nargs="?")
args = argparser.parse_args(argv[1:])
# Usage : labelImg.py image predefClassFile saveDir
win = MainWindow(args.image_dir,
args.predefined_classes_file,
args.save_dir,
args.language)
win = MainWindow(lang=args.lang,
defaultPrefdefClassFile=args.predefined_classes_file,
)
win.show()
return app, win
......
......@@ -24,11 +24,9 @@ python PPOCRLabel.py
#### Ubuntu Linux
```
sudo apt-get install pyqt5-dev-tools
sudo apt-get install trash-cli
pip3 install pyqt5
pip3 install trash-cli
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
sudo pip3 install -r requirements/requirements-linux-python3.txt
make qt5py3
python3 PPOCRLabel.py
```
......@@ -38,7 +36,6 @@ pip3 install pyqt5
pip3 uninstall opencv-python # 由于mac版本的opencv与pyqt有冲突,需先手动卸载opencv
pip3 install opencv-contrib-python-headless # 安装headless版本的open-cv
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
make qt5py3
python3 PPOCRLabel.py
```
......@@ -75,6 +72,20 @@ python3 PPOCRLabel.py
| rec_gt.txt | 识别标签。可直接用于PPOCR识别模型训练。需用户手动点击菜单栏“PaddleOCR” - "保存识别结果"后产生。 |
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
## 说明
### 内置模型
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
- 模型语言切换:用户可通过菜单栏中 "PaddleOCR" - "选择模型" 切换内置模型语言,目前支持的语言包括法文、德文、韩文、日文。具体模型下载链接可参考[PaddleOCR模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md).
- 自定义模型:用户可根据[自定义模型代码使用](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B),通过修改PPOCRLabel.py中针对[PaddleOCR类的实例化](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110)替换成自己训练的模型
### 错误提示
- 如果同时使用whl包安装了paddleocr,其优先级大于通过paddleocr.py调用PaddleOCR类,whl包未更新时会导致程序异常。
- PPOCRLabel**不支持对中文文件名**的图片进行自动标注。
- 如果您在打开软件过程中出现**objc[XXXXX]**开头的错误,证明您的opencv版本太高,建议安装4.2版本:
```
pip install opencv-python==4.2.0.32
```
### 参考资料
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
# PPOCRLabel
PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. It is written in python3 and pyqt5. Support rectangular frame labeling and four-point labeling mode. Annotations can be directly used for the training of PPOCR detection and recognition models.
<img src="./data/gif/steps.gif" width="100%"/>
## Installation
### 1. Install PaddleOCR
Refer to [PaddleOCR installation document](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md) to prepare PaddleOCR
### 2. Install PPOCRLabel
#### Windows + Anaconda
Download and install [Anaconda](https://www.anaconda.com/download/#download) (Python 3+)
```
conda install pyqt=5
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
pyrcc5 -o libs/resources.py resources.qrc
python PPOCRLabel.py --lang en
```
#### Ubuntu Linux
```
pip3 install pyqt5
pip3 install trash-cli
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
python3 PPOCRLabel.py --lang en
```
#### macOS
```
pip3 install pyqt5
pip3 uninstall opencv-python # Uninstall opencv manually as it conflicts with pyqt
pip3 install opencv-contrib-python-headless # Install the headless version of opencv
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
python3 PPOCRLabel.py --lang en
```
## Usage
### Steps
1. Build and launch using the instructions above.
2. Click 'Open Dir' in Menu/File to select the folder of the picture.<sup>[1]</sup>
3. Click 'Auto recognition', use PPOCR model to automatically annotate images which marked with 'X' <sup>[2]</sup>before the file name.
4. Create Box:
4.1 Click 'Create RectBox' or press 'W' in English keyboard mode to draw a new rectangle detection box. Click and release left mouse to select a region to annotate the text area.
4.2 Press 'P' to enter four-point labeling mode which enables you to create any four-point shape by clicking four points with the left mouse button in succession and DOUBLE CLICK the left mouse as the signal of labeling completion.
5. After the marking frame is drawn, the user clicks "OK", and the detection frame will be pre-assigned a "TEMPORARY" label.
6. Click 're-Recognition', model will rewrite ALL recognition results in ALL detection box<sup>[3]</sup>.
7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.
8. Click "Save", the image status will switch to "√",then the program automatically jump to the next.
9. Click "Delete Image" and the image will be deleted to the recycle bin.
10. Labeling result: After closing the application or switching the file path, the manually saved label will be stored in *Label.txt* under the opened picture folder.
Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
### Note
[1] PPOCRLabel uses the opened folder as the project. After opening the image folder, the picture will not be displayed in the dialog. Instead, the pictures under the folder will be directly imported into the program after clicking "Open Dir".
[2] The image status indicates whether the user has saved the image manually. If it has not been saved manually it is "X", otherwise it is "√", PPOCRLabel will not relabel pictures with a status of "√".
[3] After clicking "Re-recognize", the model will overwrite ALL recognition results in the picture.
Therefore, if the recognition result has been manually changed before, it may change after re-recognition.
[4] The files produced by PPOCRLabel include the following, please do not manually change the contents, otherwise it will cause the program to be abnormal.
| File name | Description |
| :-----------: | :----------------------------------------------------------: |
| Label.txt | The detection label file can be directly used for PPOCR detection model training. After the user saves 10 label results, the file will be automatically saved. It will also be written when the user closes the application or changes the file folder. |
| fileState.txt | The picture status file save the image in the current folder that has been manually confirmed by the user. |
| Cache.cach | Cache files to save the results of model recognition. |
| rec_gt.txt | The recognition label file, which can be directly used for PPOCR identification model training, is generated after the user clicks on the menu bar "PaddleOCR"-"Save recognition result". |
| crop_img | The recognition data, generated at the same time with *rec_gt.txt* |
### Built-in Model
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languages​include French, German, Korean, and Japanese.
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
## Related
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
*.spec
build
dist
pyinstaller
python-2.*
pywin32*
virtual-wine
venv_wine
PyQt4-*
lxml-*
windows_v*
linux_v*
### Deploy to PyPI
```
cd [ROOT]
sh build-tools/build-for-pypi.sh
```
### Build for Ubuntu
```
cd build-tools
sh run-in-container.sh
sh envsetup.sh
sh build-ubuntu-binary.sh
```
### Build for Windows
```
cd build-tools
sh run-in-container.sh
sh envsetup.sh
sh build-windows-binary.sh
```
### Build for macOS High Sierra
```
cd build-tools
./build-for-macos.sh
```
Note: If there are some problems, try to
```
sudo rm -rf virtual-wne venv_wine
```
#!/bin/sh
brew install python@2
pip install --upgrade virtualenv
# clone labelimg source
rm -rf /tmp/labelImgSetup
mkdir /tmp/labelImgSetup
cd /tmp/labelImgSetup
curl https://codeload.github.com/tzutalin/labelImg/zip/master --output labelImg.zip
unzip labelImg.zip
rm labelImg.zip
# setup python3 space
virtualenv --system-site-packages -p python3 /tmp/labelImgSetup/labelImg-py3
source /tmp/labelImgSetup/labelImg-py3/bin/activate
cd labelImg-master
# build labelImg app
pip install py2app
pip install PyQt5 lxml
make qt5py3
rm -rf build dist
python setup.py py2app -A
mv "/tmp/labelImgSetup/labelImg-master/dist/labelImg.app" /Applications
# deactivate python3
deactivate
cd ../
rm -rf /tmp/labelImgSetup
echo 'DONE'
#!/bin/sh
# Packaging and Release
docker run --workdir=$(pwd)/ --volume="/home/$USER:/home/$USER" tzutalin/py2qt4 /bin/sh -c 'make qt4py2; make test;sudo python setup.py sdist;sudo python setup.py install'
while true; do
read -p "Do you wish to deploy this to PyPI(twine upload dist/* or pip install dist/*)?" yn
case $yn in
[Yy]* ) docker run -it --rm --workdir=$(pwd)/ --volume="/home/$USER:/home/$USER" tzutalin/py2qt4; break;;
[Nn]* ) exit;;
* ) echo "Please answer yes or no.";;
esac
done
# python setup.py register
# python setup.py sdist upload
# Net pypi: twine upload dist/*
# Test before upladoing: pip install dist/labelImg.tar.gz
#!/bin/bash
### Ubuntu use pyinstall v3.0
THIS_SCRIPT_PATH=`readlink -f $0`
THIS_SCRIPT_DIR=`dirname ${THIS_SCRIPT_PATH}`
cd pyinstaller
git checkout v3.2
cd ${THIS_SCRIPT_DIR}
rm -r build
rm -r dist
rm labelImg.spec
python pyinstaller/pyinstaller.py --hidden-import=xml \
--hidden-import=xml.etree \
--hidden-import=xml.etree.ElementTree \
--hidden-import=lxml.etree \
-D -F -n labelImg -c "../labelImg.py" -p ../libs -p ../
FOLDER=$(git describe --abbrev=0 --tags)
FOLDER="linux_"$FOLDER
rm -rf "$FOLDER"
mkdir "$FOLDER"
cp dist/labelImg $FOLDER
cp -rf ../data $FOLDER/data
zip "$FOLDER.zip" -r $FOLDER
#!/bin/bash
### Window requires pyinstall v2.1
wine msiexec -i python-2.7.8.msi
wine pywin32-218.win32-py2.7.exe
wine PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe
wine lxml-3.7.3.win32-py2.7.exe
THIS_SCRIPT_PATH=`readlink -f $0`
THIS_SCRIPT_DIR=`dirname ${THIS_SCRIPT_PATH}`
cd pyinstaller
git checkout v2.1
cd ${THIS_SCRIPT_DIR}
echo ${THIS_SCRIPT_DIR}
#. venv_wine/bin/activate
rm -r build
rm -r dist
rm labelImg.spec
wine c:/Python27/python.exe pyinstaller/pyinstaller.py --hidden-import=xml \
--hidden-import=xml.etree \
--hidden-import=xml.etree.ElementTree \
--hidden-import=lxml.etree \
-D -F -n labelImg -c "../labelImg.py" -p ../libs -p ../
FOLDER=$(git describe --abbrev=0 --tags)
FOLDER="windows_"$FOLDER
rm -rf "$FOLDER"
mkdir "$FOLDER"
cp dist/labelImg.exe $FOLDER
cp -rf ../data $FOLDER/data
zip "$FOLDER.zip" -r $FOLDER
#!/bin/sh
THIS_SCRIPT_PATH=`readlink -f $0`
THIS_SCRIPT_DIR=`dirname ${THIS_SCRIPT_PATH}`
#OS Ubuntu 14.04
### Common packages for linux/windows
if [ ! -e "pyinstaller" ]; then
git clone https://github.com/pyinstaller/pyinstaller
cd pyinstaller
git checkout v2.1 -b v2.1
cd ${THIS_SCRIPT_DIR}
fi
echo "Going to clone and download packages for building windows"
#Pacakges
#> pyinstaller (2.1)
#> wine (1.6.2)
#> virtual-wine (0.1)
#> python-2.7.8.msi
#> pywin32-218.win32-py2.7.exe
## tool to install on Ubuntu
#$ sudo apt-get install wine
### Clone a repo to create virtual wine env
if [ ! -e "virtual-wine" ]; then
git clone https://github.com/htgoebel/virtual-wine.git
fi
apt-get install scons
### Create virtual env
rm -rf venv_wine
./virtual-wine/vwine-setup venv_wine
#### Active virutal env
. venv_wine/bin/activate
### Use wine to install packages to virtual env
if [ ! -e "python-2.7.8.msi" ]; then
wget "https://www.python.org/ftp/python/2.7.8/python-2.7.8.msi"
fi
if [ ! -e "pywin32-218.win32-py2.7.exe" ]; then
wget "http://nchc.dl.sourceforge.net/project/pywin32/pywin32/Build%20218/pywin32-218.win32-py2.7.exe"
fi
if [ ! -e "PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe" ]; then
wget "http://nchc.dl.sourceforge.net/project/pyqt/PyQt4/PyQt-4.11.4/PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe"
fi
if [ ! -e "lxml-3.7.3.win32-py2.7.exe" ]; then
wget "https://pypi.python.org/packages/a3/f6/a28c5cf63873f6c55a3eb7857b736379229b85ba918261d2e88cf886905e/lxml-3.7.3.win32-py2.7.exe#md5=a0f746355876aca4ca5371cb0f1d13ce"
fi
#!/bin/sh
docker run -it \
--user $(id -u) \
-e DISPLAY=unix$DISPLAY \
--workdir=$(pwd) \
--volume="/home/$USER:/home/$USER" \
--volume="/etc/group:/etc/group:ro" \
--volume="/etc/passwd:/etc/passwd:ro" \
--volume="/etc/shadow:/etc/shadow:ro" \
--volume="/etc/sudoers.d:/etc/sudoers.d:ro" \
-v /tmp/.X11-unix:/tmp/.X11-unix \
tzutalin/py2qt4
因为 它太大了无法显示 source diff 。你可以改为 查看blob
......@@ -150,18 +150,33 @@ def get_rotate_crop_image(img, points):
except Exception as e:
print(e)
def steps():
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
"2. 打开文件夹:在菜单栏点击 “文件” - 打开目录 选择待标记图片的文件夹.\n"\
"3. 自动标注:点击 ”自动标注“,使用PPOCR超轻量模型对图片文件名前图片状态为 “X” 的图片进行自动标注。\n" \
"4. 手动标注:点击 “矩形标注”(推荐直接在英文模式下点击键盘中的 “W”),用户可对当前图片中模型未检出的部分进行手动" \
"绘制标记框。点击键盘P,则使用四点标注模式(或点击“编辑” - “四点标注”),用户依次点击4个点后,双击左键表示标注完成。\n" \
"5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。\n" \
"6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别。\n" \
"7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。\n" \
"8. 保存:点击 “保存”,图片状态切换为 “√”,跳转至下一张。\n" \
"9. 删除:点击 “删除图像”,图片将会被删除至回收站。\n" \
"10. 标注结果:关闭应用程序或切换文件路径后,手动保存过的标签将会被存放在所打开图片文件夹下的" \
"*Label.txt*中。在菜单栏点击 “PaddleOCR” - 保存识别结果后,会将此类图片的识别训练数据保存在*crop_img*文件夹下," \
"识别标签保存在*rec_gt.txt*中。\n"
def stepsInfo(lang='en'):
if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
"2. 打开文件夹:在菜单栏点击 “文件” - 打开目录 选择待标记图片的文件夹.\n"\
"3. 自动标注:点击 ”自动标注“,使用PPOCR超轻量模型对图片文件名前图片状态为 “X” 的图片进行自动标注。\n" \
"4. 手动标注:点击 “矩形标注”(推荐直接在英文模式下点击键盘中的 “W”),用户可对当前图片中模型未检出的部分进行手动" \
"绘制标记框。点击键盘P,则使用四点标注模式(或点击“编辑” - “四点标注”),用户依次点击4个点后,双击左键表示标注完成。\n" \
"5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。\n" \
"6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别。\n" \
"7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。\n" \
"8. 保存:点击 “保存”,图片状态切换为 “√”,跳转至下一张。\n" \
"9. 删除:点击 “删除图像”,图片将会被删除至回收站。\n" \
"10. 标注结果:关闭应用程序或切换文件路径后,手动保存过的标签将会被存放在所打开图片文件夹下的" \
"*Label.txt*中。在菜单栏点击 “PaddleOCR” - 保存识别结果后,会将此类图片的识别训练数据保存在*crop_img*文件夹下," \
"识别标签保存在*rec_gt.txt*中。\n"
else:
msg = "1. Build and launch using the instructions above.\n" \
"2. Click 'Open Dir' in Menu/File to select the folder of the picture.\n"\
"3. Click 'Auto recognition', use PPOCR model to automatically annotate images which marked with 'X' before the file name."\
"4. Create Box:\n"\
"4.1 Click 'Create RectBox' or press 'W' in English keyboard mode to draw a new rectangle detection box. Click and release left mouse to select a region to annotate the text area.\n"\
"4.2 Press 'P' to enter four-point labeling mode which enables you to create any four-point shape by clicking four points with the left mouse button in succession and DOUBLE CLICK the left mouse as the signal of labeling completion.\n"\
"5. After the marking frame is drawn, the user clicks 'OK', and the detection frame will be pre-assigned a TEMPORARY label.\n"\
"6. Click re-Recognition, model will rewrite ALL recognition results in ALL detection box.\n"\
"7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.\n"\
"8. Click 'Save', the image status will switch to '√',then the program automatically jump to the next.\n"\
"9. Click 'Delete Image' and the image will be deleted to the recycle bin.\n"\
"10. Labeling result: After closing the application or switching the file path, the manually saved label will be stored in *Label.txt* under the opened picture folder.\n"\
" Click PaddleOCR-Save Recognition Results in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*.\n"
return msg
\ No newline at end of file
......@@ -87,4 +87,10 @@ creatPolygon=四点标注
drawSquares=正方形标注
saveRec=保存识别结果
tempLabel=待识别
steps=操作步骤
\ No newline at end of file
steps=操作步骤
choseModelLg=选择模型语言
cancel=取消
ok=确认
autolabeling=自动标注中
hideBox=隐藏所有标注
showBox=显示所有标注
\ No newline at end of file
......@@ -76,10 +76,10 @@ ImageResize=Image Resize
IR=Image Resize
autoRecognition=Auto Recognition
reRecognition=Re-recognition
mfile=file
medit=eidt
mview=view
mhelp=help
mfile=File
medit=Eidt
mview=View
mhelp=Help
iconList=Icon List
detectionBoxposition=Detection box position
recognitionResult=Recognition result
......@@ -87,4 +87,10 @@ creatPolygon=Create Quadrilateral
drawSquares=Draw Squares
saveRec=Save Recognition Result
tempLabel=TEMPORARY
setps=Steps
\ No newline at end of file
steps=Steps
choseModelLg=Choose Model Language
cancel=Cancel
ok=OK
autolabeling=Automatic Labeling
hideBox=Hide All Box
showBox=Show All Box
\ No newline at end of file
......@@ -4,6 +4,7 @@ English | [简体中文](README_ch.md)
PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools that help users train better models and apply them into practice.
**Recent updates**
- 2020.11.25 Update a new data annotation tool, i.e., [PPOCRLabel](./PPOCRLabel/README_en.md), which is helpful to improve the labeling efficiency. Moreover, the labeling results can be used in training of the PP-OCR system directly.
- 2020.9.22 Update the PP-OCR technical article, https://arxiv.org/abs/2009.09941
- 2020.9.19 Update the ultra lightweight compressed ppocr_mobile_slim series models, the overall model size is 3.5M (see [PP-OCR Pipeline](#PP-OCR-Pipeline)), suitable for mobile deployment. [Model Downloads](#Supported-Chinese-model-list)
- 2020.9.17 Update the ultra lightweight ppocr_mobile series and general ppocr_server series Chinese and English ocr models, which are comparable to commercial effects. [Model Downloads](#Supported-Chinese-model-list)
......@@ -77,30 +78,32 @@ For a new language request, please refer to [Guideline for new language_requests
- [Installation](./doc/doc_en/installation_en.md)
- [Quick Start](./doc/doc_en/quickstart_en.md)
- [Code Structure](./doc/doc_en/tree_en.md)
- Algorithm introduction
- Algorithm Introduction
- [Text Detection Algorithm](./doc/doc_en/algorithm_overview_en.md)
- [Text Recognition Algorithm](./doc/doc_en/algorithm_overview_en.md)
- [PP-OCR Pipeline](#PP-OCR-Pipeline)
- Model training/evaluation
- Model Training/Evaluation
- [Text Detection](./doc/doc_en/detection_en.md)
- [Text Recognition](./doc/doc_en/recognition_en.md)
- [Direction Classification](./doc/doc_en/angle_class_en.md)
- [Yml Configuration](./doc/doc_en/config_en.md)
- Inference and Deployment
- [Quick inference based on pip](./doc/doc_en/whl_en.md)
- [Quick Inference Based on PIP](./doc/doc_en/whl_en.md)
- [Python Inference](./doc/doc_en/inference_en.md)
- [C++ Inference](./deploy/cpp_infer/readme_en.md)
- [Serving](./deploy/hubserving/readme_en.md)
- [Mobile](./deploy/lite/readme_en.md)
- [Model Quantization](./deploy/slim/quantization/README_en.md)
- [Model Compression](./deploy/slim/prune/README_en.md)
- [Benchmark](./doc/doc_en/benchmark_en.md)
- [Benchmark](./doc/doc_en/benchmark_en.md)
- Data Annotation and Synthesis
- [Semi-automatic Annotation Tool](./PPOCRLabel/README_en.md)
- [Data Annotation Tools](./doc/doc_en/data_annotation_en.md)
- [Data Synthesis Tools](./doc/doc_en/data_synthesis_en.md)
- Datasets
- [General OCR Datasets(Chinese/English)](./doc/doc_en/datasets_en.md)
- [HandWritten_OCR_Datasets(Chinese)](./doc/doc_en/handwritten_datasets_en.md)
- [Various OCR Datasets(multilingual)](./doc/doc_en/vertical_and_multilingual_datasets_en.md)
- [Data Annotation Tools](./doc/doc_en/data_annotation_en.md)
- [Data Synthesis Tools](./doc/doc_en/data_synthesis_en.md)
- [Visualization](#Visualization)
- [New language requests](#language_requests)
- [FAQ](./doc/doc_en/FAQ_en.md)
......@@ -177,3 +180,5 @@ We welcome all the contributions to PaddleOCR and appreciate for your feedback v
- Thanks [tangmq](https://gitee.com/tangmq) for contributing Dockerized deployment services to PaddleOCR and supporting the rapid release of callable Restful API services.
- Thanks [lijinhan](https://github.com/lijinhan) for contributing a new way, i.e., java SpringBoot, to achieve the request for the Hubserving deployment.
- Thanks [Mejans](https://github.com/Mejans) for contributing the Occitan corpus and character set.
- Thanks [LKKlein](https://github.com/LKKlein) for contributing a new deploying package with the Golang program language.
- Thanks [Evezerest](https://github.com/Evezerest), [ninetailskim](https://github.com/ninetailskim), [edencfc](https://github.com/edencfc), [BeyondYourself](https://github.com/BeyondYourself) and [1084667371](https://github.com/1084667371) for contributing a new data annotation tool, i.e., PPOCRLabel。
......@@ -4,8 +4,8 @@
PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力使用者训练出更好的模型,并应用落地。
**近期更新**
- 2020.11.30 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,共计119个常见问题及解答,并且计划以后每周一都会更新,欢迎大家持续关注。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.11.16 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,共计109个常见问题及解答,并且计划以后每周一都会更新,欢迎大家持续关注。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
- 2020.9.19 更新超轻量压缩ppocr_mobile_slim系列模型,整体模型3.5M(详见[PP-OCR Pipeline](#PP-OCR)),适合在移动端部署使用。[模型下载](#模型下载)
- 2020.9.17 更新超轻量ppocr_mobile系列和通用ppocr_server系列中英文ocr模型,媲美商业效果。[模型下载](#模型下载)
......@@ -100,8 +100,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [效果展示](#效果展示)
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- [【理论篇】OCR通用27个问题](./doc/doc_ch/FAQ.md)
- [【实战篇】PaddleOCR实战72个问题](./doc/doc_ch/FAQ.md)
- [【理论篇】OCR通用29个问题](./doc/doc_ch/FAQ.md)
- [【实战篇】PaddleOCR实战80个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书)
......
......@@ -9,47 +9,42 @@
## PaddleOCR常见问题汇总(持续更新)
* [近期更新(2020.11.16](#近期更新)
* [近期更新(2020.11.30](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
* [【理论篇】OCR通用27个问题](#OCR通用问题)
* [【理论篇】OCR通用29个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集5题](#数据集)
* [模型训练调优7题](#模型训练调优)
* [预测部署8题](#预测部署)
* [【实战篇】PaddleOCR实战72个问题](#PaddleOCR实战问题)
* [数据集7题](#数据集2)
* [模型训练调优7题](#模型训练调优2)
* [预测部署8题](#预测部署2)
* [【实战篇】PaddleOCR实战80个问题](#PaddleOCR实战问题)
* [使用咨询20题](#使用咨询)
* [数据集14题](#数据集)
* [模型训练调优21题](#模型训练调优)
* [预测部署21题](#预测部署)
* [数据集17题](#数据集3)
* [模型训练调优21题](#模型训练调优3)
* [预测部署22题](#预测部署3)
<a name="近期更新"></a>
## 近期更新(2020.11.23
## 近期更新(2020.11.30
#### Q3.2.11:有哪些标注工具可以标注OCR数据集
#### Q3.2.15: 文本标注工具PPOCRLabel有什么特色
**A**:您可以参考:https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/data_annotation_en.md。
我们计划推出高效标注OCR数据的标注工具,请您持续关注PaddleOCR的近期更新。
**A**: PPOCRLabel是一个半自动文本标注工具,它使用基于PPOCR的中英文OCR模型,预先预测文本检测和识别结果,然后用户对上述结果进行校验和修正就行,大大提高用户的标注效率。同时导出的标注结果直接适配PPOCR训练所需要的数据格式,
#### Q3.2.12:一些特殊场景的数据识别效果差,但是数据量很少,不够用来finetune怎么办
#### Q3.2.16: 文本标注工具PPOCRLabel,可以更换模型吗
**A**:您可以合成一些接近使用场景的数据用于训练。
我们计划推出基于特定场景的文本数据合成工具,请您持续关注PaddleOCR的近期更新。
**A**: PPOCRLabel中OCR部署方式采用的基于pip安装whl包快速推理,可以参考相关文档更换模型路径,进行特定任务的标注适配。基于pip安装whl包快速推理的文档如下,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md。
#### Q3.2.13:特殊字符(例如一些标点符号)识别效果不好怎么办
#### Q3.2.17: 文本标注工具PPOCRLabel支持的运行环境有哪些
**A**:首先请您确认要识别的特殊字符是否在字典中。
如果字符在已经字典中但效果依然不好,可能是由于识别数据较少导致的,您可以增加相应数据finetune模型。
**A**: PPOCRLabel可运行于Linux、Windows、MacOS等多种系统。操作步骤可以参考文档,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/README.md
#### Q3.2.14:PaddleOCR可以识别灰度图吗
#### Q2.2.6: 当训练数据量少时,如何获取更多的数据
**A**:PaddleOCR的模型均为三通道输入。如果您想使用灰度图作为输入,建议直接用3通道的模式读入灰度图,
或者将单通道图像转换为三通道图像再识别。例如,opencv的cvtColor函数就可以将灰度图转换为RGB三通道模式。
**A**: 当训练数据量少时,可以尝试以下三种方式获取更多的数据:(1)人工采集更多的训练数据,最直接也是最有效的方式。(2)基于PIL和opencv基本图像处理或者变换。例如PIL中ImageFont, Image, ImageDraw三个模块将文字写到背景中,opencv的旋转仿射变换,高斯滤波等。(3)利用数据生成算法合成数据,例如pix2pix等算法。
#### Q3.1.20:PaddleOCR与百度的其他OCR产品有什么区别
#### Q2.2.7: 论文《Editing Text in the Wild》中文本合成方法SRNet有什么特点
**A**:PaddleOCR主要聚焦通用ocr,如果有垂类需求,您可以用PaddleOCR+垂类数据自己训练;
如果缺少带标注的数据,或者不想投入研发成本,建议直接调用开放的API,开放的API覆盖了目前比较常见的一些垂类。
**A**: SRNet是借鉴GAN中图像到图像转换、风格迁移的想法合成文本数据。不同于通用GAN的方法只选择一个分支,SRNet将文本合成任务分解为三个简单的子模块,提升合成数据的效果。这三个子模块为不带背景的文本风格迁移模块、背景抽取模块和融合模块。PaddleOCR计划将在2020年12月中旬开源基于SRNet的实用模型。
<a name="OCR精选10个问题"></a>
......@@ -141,6 +136,8 @@
<a name="OCR通用问题"></a>
## 【理论篇】OCR通用问题
<a name="基础知识"></a>
### 基础知识
#### Q2.1.1:CRNN能否识别两行的文字?还是说必须一行?
......@@ -169,6 +166,7 @@
**A**:处理字符的时候,把多字符的当作一个字就行,字典中每行是一个字。
<a name="数据集2"></a>
### 数据集
#### Q2.2.1:支持空格的模型,标注数据的时候是不是要标注空格?中间几个空格都要标注出来么?
......@@ -191,7 +189,15 @@
**A**:使用基于分割的方法,如DB,检测密集文本行时,最好收集一批数据进行训练,并且在训练时,并将生成二值图像的shrink_ratio参数调小一些。
#### Q2.2.6: 当训练数据量少时,如何获取更多的数据?
**A**: 当训练数据量少时,可以尝试以下三种方式获取更多的数据:(1)人工采集更多的训练数据,最直接也是最有效的方式。(2)基于PIL和opencv基本图像处理或者变换。例如PIL中ImageFont, Image, ImageDraw三个模块将文字写到背景中,opencv的旋转仿射变换,高斯滤波等。(3)利用数据生成算法合成数据,例如pix2pix等算法。
#### Q2.2.7: 论文《Editing Text in the Wild》中文本合成方法SRNet有什么特点?
**A**: SRNet是借鉴GAN中图像到图像转换、风格迁移的想法合成文本数据。不同于通用GAN的方法只选择一个分支,SRNet将文本合成任务分解为三个简单的子模块,提升合成数据的效果。这三个子模块为不带背景的文本风格迁移模块、背景抽取模块和融合模块。PaddleOCR计划将在2020年12月中旬开源基于SRNet的实用模型。
<a name="模型训练调优2"></a>
### 模型训练调优
#### Q2.3.1:如何更换文本检测/识别的backbone?
......@@ -233,6 +239,7 @@
(2)调大系统的[l2 dcay值](https://github.com/PaddlePaddle/PaddleOCR/blob/a501603d54ff5513fc4fc760319472e59da25424/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml#L47)
<a name="预测部署2"></a>
### 预测部署
#### Q2.4.1:请问对于图片中的密集文字,有什么好的处理办法吗?
......@@ -280,6 +287,7 @@
<a name="PaddleOCR实战问题"></a>
## 【实战篇】PaddleOCR实战问题
<a name="使用咨询"></a>
### 使用咨询
#### Q3.1.1:OSError: [WinError 126] 找不到指定的模块。mac pro python 3.4 shapely import 问题
......@@ -377,7 +385,7 @@
**A**:PaddleOCR主要聚焦通用ocr,如果有垂类需求,您可以用PaddleOCR+垂类数据自己训练;
如果缺少带标注的数据,或者不想投入研发成本,建议直接调用开放的API,开放的API覆盖了目前比较常见的一些垂类。
<a name="数据集3"></a>
### 数据集
#### Q3.2.1:如何制作PaddleOCR支持的数据格式
......@@ -456,7 +464,19 @@
**A**:PaddleOCR的模型均为三通道输入。如果您想使用灰度图作为输入,建议直接用3通道的模式读入灰度图,
或者将单通道图像转换为三通道图像再识别。例如,opencv的cvtColor函数就可以将灰度图转换为RGB三通道模式。
#### Q3.2.15: 文本标注工具PPOCRLabel有什么特色?
**A**: PPOCRLabel是一个半自动文本标注工具,它使用基于PPOCR的中英文OCR模型,预先预测文本检测和识别结果,然后用户对上述结果进行校验和修正就行,大大提高用户的标注效率。同时导出的标注结果直接适配PPOCR训练所需要的数据格式,
#### Q3.2.16: 文本标注工具PPOCRLabel,可以更换模型吗?
**A**: PPOCRLabel中OCR部署方式采用的基于pip安装whl包快速推理,可以参考相关文档更换模型路径,进行特定任务的标注适配。基于pip安装whl包快速推理的文档如下,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md。
#### Q3.2.17: 文本标注工具PPOCRLabel支持的运行环境有哪些?
**A**: PPOCRLabel可运行于Linux、Windows、MacOS等多种系统。操作步骤可以参考文档,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/README.md
<a name="模型训练调优3"></a>
### 模型训练调优
#### Q3.3.1:文本长度超过25,应该怎么处理?
......@@ -574,7 +594,7 @@ return paddle.reader.multiprocess_reader(readers, False, queue_size=320)
(3)在训练的时候,文本长度超过25的训练图像都会被丢弃,因此需要看下真正参与训练的图像有多少,太少的话也容易过拟合。
<a name="预测部署3"></a>
### 预测部署
#### Q3.4.1:如何pip安装opt模型转换工具?
......
......@@ -4,6 +4,8 @@
### 安装whl包
首先需要参照[安装文档](installation.md)安装paddlepaddle,然后开始安装paddleocr package
pip安装
```bash
pip install paddleocr
......@@ -166,7 +168,7 @@ paddleocr -h
* 检测+分类+识别全流程
```bash
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --cls true
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
```
结果是一个list,每个item包含了文本框,文字和识别置信度
```bash
......@@ -190,7 +192,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
* 分类+识别
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
```
结果是一个list,每个item只包含识别结果和识别置信度
......@@ -222,7 +224,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
* 单独执行分类
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false --rec false
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec false
```
结果是一个list,每个item只包含分类结果和分类置信度
......@@ -258,7 +260,7 @@ im_show.save('result.jpg')
### 通过命令行使用
```bash
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true
```
## 参数说明
......@@ -295,4 +297,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
| lang | 模型语言类型,目前支持 中文(ch)和英文(en) | ch |
| det | 前向时使用启动检测 | TRUE |
| rec | 前向时是否启动识别 | TRUE |
| cls | 前向时是否启动分类 | FALSE |
| cls | 前向时是否启动分类, 此参数仅存在于`代码使用`模式 | FALSE |
......@@ -2,6 +2,9 @@
## Get started quickly
### install package
First, you need to refer to [installation document](installation_en.md) to install paddlepaddle, and then start to install paddleocr package
install by pypi
```bash
pip install paddleocr
......@@ -172,7 +175,7 @@ paddleocr -h
* detection classification and recognition
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true -cls true --lang en
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en
```
Output will be a list, each item contains bounding box, text and recognition confidence
......@@ -198,7 +201,7 @@ Output will be a list, each item contains bounding box, text and recognition con
* classification and recognition
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --lang en
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en
```
Output will be a list, each item contains text and recognition confidence
......@@ -221,7 +224,7 @@ Output will be a list, each item only contains bounding box
* only recognition
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --cls false --lang en
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en
```
Output will be a list, each item contains text and recognition confidence
......@@ -231,7 +234,7 @@ Output will be a list, each item contains text and recognition confidence
* only classification
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --rec false
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --rec false
```
Output will be a list, each item contains classification result and confidence
......@@ -268,7 +271,7 @@ im_show.save('result.jpg')
### Use by command line
```bash
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true
```
## Parameter Description
......@@ -305,4 +308,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
| lang | The support language, now only Chinese(ch)、English(en)、French(french)、German(german)、Korean(korean)、Japanese(japan) are supported | ch |
| det | Enable detction when `ppocr.ocr` func exec | TRUE |
| rec | Enable recognition when `ppocr.ocr` func exec | TRUE |
| cls | Enable classification when `ppocr.ocr` func exec | FALSE |
| cls | Enable classification when `ppocr.ocr` func exec,this parameter only exists in `code usage` mode | FALSE |
doc/joinus.PNG

225.2 KB | W: | H:

doc/joinus.PNG

415.1 KB | W: | H:

doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
  • 2-up
  • Swipe
  • Onion skin
......@@ -160,6 +160,7 @@ class RecModel(object):
"We set img_shape to be the same , it may affect the inference effect"
)
image_shape = deepcopy(self.image_shape)
image_shape.insert(0, -1)
image = fluid.data(name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
if self.loss_type == "srn":
......
# PaddleOCR-GO
本服务是[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)的golang部署版本。
## 1. 环境准备
### 运行环境
- go: 1.14
- OpenCV: 4.3.0
- PaddlePaddle: 1.8.4
- 编译环境:cmake 3.15.4 | gcc 4.8.5
- 基于Centos 7.4运行环境编译,Windows请自行解决`OpenCV``PaddlePaddle`的编译问题
*另外,以下编译以`.bashrc`个人环境变量配置文件,如果使用`zsh`,请自行更换为`.zshrc`*
### 1.1 安装golang
从官网下载[golang](https://golang.org/dl/),建议选择1.13版本以上进行安装。下载完成后,直接解压你需要的安装目录,并配置相关环境变量,此处以1.14版本为例。
```shell
# 下载golang
wget https://golang.org/dl/go1.14.10.linux-amd64.tar.gz
# 解压到 /usr/local 目录下
tar -xzvf go1.14.10.linux-amd64.tar.gz -C /usr/local
# 配置GOROOT,即go的安装目录
echo "export GOROOT=/usr/local/go" >> ~/.bashrc
echo "export PATH=$PATH:$GOROOT/bin" >> ~/.bashrc
# 配置GOPATH,即go相关package的安装目录,可自定义一个目录
echo "export GOPATH=$HOME/golang" >> ~/.bashrc
echo "export PATH=$PATH:$GOPATH/bin" >> ~/.bashrc
# 配置GOPROXY,即go mod包管理器的下载代理,同时打开mod模式
echo "export GO111MODULE=on" >> ~/.bashrc
echo "export GOPROXY=https://mirrors.aliyun.com/goproxy/" >> ~/.bashrc
source ~/.bashrc
```
### 1.2 编译OpenCV库
go语言中,OpenCV的使用主要以[gocv](https://github.com/hybridgroup/gocv)包为主,gocv使用cgo调用OpenCV提供接口,因此还是需要编译OpenCV库。
**踩坑指南之一:[gocv官方实现](https://github.com/hybridgroup/gocv)中,部分接口并没有与原版C++的OpenCV的API保持一致,导致图片处理结果会出现一定的数值偏差。为处理这种偏差,[该仓库](https://github.com/LKKlein/gocv)fork了一份gocv官方源码,并对部分这些不一致的API进行了修正,保证结果与其他语言的一致性。**
对于OpenCV的编译,gocv官方提供了[Makefile](https://github.com/LKKlein/gocv/blob/lk/Makefile),可以一键进行安装,具体安装步骤详见[官方指南](https://github.com/LKKlein/gocv/blob/lk/README_ORIGIN.md#ubuntulinux)
这里提供逐步安装的方式,方便排查错误。
- 下载并解压OpenCV-4.3.0和OpenCV-Contrib-4.3.0
```shell
# 创建opencv安装目录
mkdir -p ~/opencv
# 下载OpenCV
cd ~/opencv
curl -sL https://github.com/opencv/opencv/archive/4.3.0.zip > opencv.zip
unzip -q opencv.zip
rm -rf opencv.zip
# 下载OpenCV-Contrib
curl -sL https://github.com/opencv/opencv_contrib/archive/4.3.0.zip > opencv-contrib.zip
unzip -q opencv-contrib.zip
rm -rf opencv-contrib.zip
```
- 安装相关依赖
```shell
sudo yum -y install pkgconfig cmake git gtk2-devel libpng-devel libjpeg-devel libtiff-devel tbb tbb-devel libdc1394-devel
```
- 编译安装
```shell
mkdir -p ~/.local/opencv-4.3.0
cd ~/opencv/opencv-4.3.0
mkdir build
cd build
cmake -D WITH_IPP=OFF \
-D WITH_OPENGL=OFF \
-D WITH_QT=OFF \
-D BUILD_EXAMPLES=OFF \
-D BUILD_TESTS=OFF \
-D BUILD_PERF_TESTS=OFF \
-D BUILD_opencv_java=OFF \
-D BUILD_opencv_python=OFF \
-D BUILD_opencv_python2=OFF \
-D BUILD_opencv_python3=OFF \
-D OPENCV_GENERATE_PKGCONFIG=ON \
-D CMAKE_INSTALL_PREFIX=$HOME/.local/opencv-4.3.0 \
-D OPENCV_ENABLE_NONFREE=ON \
-D OPENCV_EXTRA_MODULES_PATH=$HOME/opencv/opencv_contrib-4.3.0/modules ..
make -j8
make install
sudo ldconfig
```
make进行编译时,可能出现因`xfeatures2d`的两个模块下载失败导致的编译失败,这里只需要手动下载这部分文件到`$HOME/opencv/opencv_contrib-4.3.0/modules/xfeatures2d/src`目录下,然后重新执行`make -j8`即可。这部分文件地址可参考[这里](https://github.com/opencv/opencv_contrib/issues/1301#issuecomment-447181426)给出的链接。
- 配置环境变量
```shell
echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$HOME/.local/opencv-4.3.0/lib64/pkgconfig" >> ~/.bashrc
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.local/opencv-4.3.0/lib64" >> ~/.bashrc
source ~/.bashrc
```
- 验证安装
```shell
# 安装gocv包,先mod init
go mod init opencv
go get -u github.com/LKKlein/gocv
# 验证安装结果
cd $GOPATH/pkg/mod/github.com/!l!k!klein/gocv@v0.28.0
go run ./cmd/version/main.go
# 输出
# gocv version: 0.28.0
# opencv lib version: 4.3.0
```
### 1.3 编译PaddlePaddle的C语言API
go语言只能通过cgo调用C语言API,而不能直接与C++进行交互,因此需要编译PaddlePaddle的C语言API。当然,也可以自己写C语言调用C++的代码和头文件,这样就可以直接使用PaddlePaddle提供的已编译的C++推理库,无需自己手动编译,详见[该仓库](https://github.com/LKKlein/paddleocr-go/tree/dev_cxx)
- 获取PaddlePaddle源代码
```shell
cd ~
git clone --recurse-submodules https://github.com/paddlepaddle/paddle
# 切换到v1.8.4版本
cd paddle
git checkout v1.8.4
# 目前版本无论单卡还是多卡都需要先安装nccl
git clone https://github.com/NVIDIA/nccl.git
make -j8
make install
```
- 编译Paddle源代码
**踩坑指南之二:PaddlePaddle的C语言API实现有一个bug,即获取输入输出变量名时只能获取到第一个模型的变量名,后续模型都无法获取输入输出变量名,进而无法获取到模型输出,详情见[issue](https://github.com/PaddlePaddle/Paddle/issues/28309)。因此,编译前需要手动将`paddle/fluid/inference/capi/pd_predictor.cc`文件中`210行`与`215行`的`static`删除。**
在处理完该bug之后,才能进行后续编译。相关编译参数见[官方文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12),注意部分参数需要相关依赖,请确保依赖完整再启用。
```shell
# 创建c++推理库文件夹
mkdir -p ~/paddle_inference
export PADDLE_INFER=`$HOME/paddle_inference`
# 执行编译
export PADDLE_ROOT=`pwd`
mkdir build
cd build
cmake -DFLUID_INFERENCE_INSTALL_DIR=$PADDLE_INFER \
-DWITH_CONTRIB=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_PYTHON=OFF \
-DWITH_MKL=ON \
-DWITH_GPU=ON \
-DON_INFER=ON \
--WITH_MKLDNN=ON \
--WITH_XBYAK=ON \
--WITH_DSO=OFF ..
make
make inference_lib_dist
```
编译完成后,可以在`build/fluid_inference_c_install_dir`目录下,看到以下生成的文件
```
build/fluid_inference_c_install_dir
├── paddle
├── third_party
└── version.txt
```
其中`paddle`就是Paddle库的C语言预测API,`version.txt`中包含当前预测库的版本信息。最后,将C推理库配置到环境变量。
```shell
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PADDLE_ROOT/build/fluid_inference_c_install_dir/paddle/lib" >> ~/.bashrc
echo "export LIBRARY_PATH=$LIBRARY_PATH:$PADDLE_ROOT/build/fluid_inference_c_install_dir/paddle/lib" >> ~/.bashrc
souce ~/.bashrc
```
## 2. paddleocr-go预测库
### 2.1 安装paddleocr-go
确保C推理库已配置到环境变量,然后直接执行安装命令
```shell
go get -u github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go
```
### 2.2 相关使用API
在go中使用import引入包
```go
import github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/ocr
```
- 预测结果结构体
```go
type OCRText struct {
BBox [][]int `json:"bbox"`
Text string `json:"text"`
Score float64 `json:"score"`
}
```
一张图的OCR结果包含多个`OCRText`结果,每个结果包含预测框、预测文本、预测文本得分。
- OCR预测类
```go
func NewOCRSystem(confFile string, a map[string]interface{}) *OCRSystem
```
`OCRSystem`是主要对外提供API的结构;
`confFile`是yaml配置文件的路径,可在配置文件中修改相关预测参数,也可以传空字符串,这时会全部使用默认配置;
`a`是可以在代码中直接定义的配置参数,优先级高于配置文件,会覆盖配置文件和默认配置的参数。
- 单张图预测API
```go 
func (ocr *OCRSystem) PredictOneImage(img gocv.Mat) []OCRText
```
- 图片文件夹预测API
```go
func (ocr *OCRSystem) PredictDirImages(dirname string) map[string][]OCRText
```
`dirname`图片文件夹的目录,默认会预测改目录下所有`jpg``png`图片,并返回每张图的预测结果。
- OCR Server
```go
func (ocr *OCRSystem) StartServer(port string)
```
开启OCR预测Server,开启后,使用`post`请求上传需要识别的图片至`http://$ip:$port/ocr`即可直接获取该图片上所有文本的识别结果。其中,`$ip`是开启服务的主机`ip``127.0.0.1`的本地ip, `$port`是传入的端口参数。
## 3. 预测demo
### 3.1 生成预测demo
以下两种方式均可生成预测demo文件,任选其一即可
- 通过下载`paddleocr-go`代码并编译
```shell
git clone https://github.com/PaddlePaddle/PaddleOCR
cd PaddleOCR/thirdparty/paddleocr-go
# 确保C动态库路径已在环境变量中,执行以下命令生成ppocr-go文件
go build ppocr-go.go
```
- 通过go package自动安装
```shell
# 执行后会自动在$GOPATH/bin下生成ppocr-go文件,如果配置了PATH=$PATH:$GOPATH/bin,以下预测命令可以去掉`./`,直接执行ppocr-go
go get -u github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go
```
### 3.2 修改预测配置
当前给定的配置文件`config/conf.yaml`中,包含了默认的OCR预测配置参数,可根据个人需要更改相关参数。
比如,将`use_gpu`改为`false`,使用CPU执行预测;将`det_model_dir`, `rec_model_dir`, `cls_model_dir`都更改为自己的本地模型路径,也或者是更改字典`rec_char_dict_path`的路径,这四个路径如果配置http链接,会自动下载到本地目录。另外,配置参数包含了预测引擎、检测模型、检测阈值、方向分类模型、识别模型及阈值的相关参数,具体参数的意义可参见[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E)
### 3.3 执行预测demo
预测demo提供了三种预测方式,分别是单张图预测、文件夹批量预测、OCR Server预测。三者命令行优先级依次降低。
#### 3.3.1 单张图预测
```shell
./ppocr-go --config config/conf.yaml --image images/test.jpg
```
执行完成,会输出以下内容:
<img src="./images/result/single_img_result.jpg" style="zoom:80%;" />
#### 3.3.2 文件夹批量预测
```shell
./ppocr-go --config config/conf.yaml --image_dir ./images
```
执行完成,会输出以下内容:
<img src="./images/result/img_dir_result.jpg" style="zoom:80%;" />
#### 3.3.3 开启OCR Server
```shell
./ppocr-go --use_servering --port=18600
```
开启服务后,可以在其他客户端中通过`post`请求进行ocr预测。此处以`Python`客户端为例,如下所示
```python
import requests
files = {'image': open('images/test.jpg','rb')}
url = "http://127.0.0.1:18600/ocr"
r = requests.post(url, files=files)
print(r.text)
```
执行完成可以得到以下结果
![](./images/result/python_client_result.jpg)
最后,在Python中将上述结果可视化可以得到以下结果
![](./images/result/python_vis_result.jpg)
# params for prediction engine
use_gpu: true
ir_optim: true
enable_mkldnn: false
# use_zero_copy_run: true
use_tensorrt: false
num_cpu_threads: 6
gpu_id: 0
gpu_mem: 2000
# params for text detector
det_algorithm: "DB"
det_model_dir: "https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar"
det_max_side_len: 960
# DB parmas
det_db_thresh: 0.3
det_db_box_thresh: 0.5
det_db_unclip_ratio: 2.0
# EAST parmas
det_east_score_thresh: 0.8
det_east_cover_thresh: 0.1
det_east_nms_thresh: 0.2
# params for text recognizer
rec_algorithm: "CRNN"
rec_model_dir: "https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar"
rec_image_shape: [3, 32, 320]
rec_char_type: "ch"
rec_batch_num: 30
max_text_length: 25
rec_char_dict_path: "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/develop/ppocr/utils/ppocr_keys_v1.txt"
use_space_char: true
# params for text classifier
use_angle_cls: false
cls_model_dir: "https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar"
cls_image_shape: [3, 48, 192]
label_list: ["0", "180"]
cls_batch_num: 30
cls_thresh: 0.9
lang: ch
det: true
rec: true
cls: false
\ No newline at end of file
module github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go
go 1.14
require (
github.com/LKKlein/gocv v0.28.0
github.com/ctessum/go.clipper v0.0.0-20200522184404-9c744fa3e86c
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776
)
github.com/LKKlein/gocv v0.28.0 h1:1MMvs9uYf+QGPi86it2pUmN8RRoyMnPLUefKB/Jf1Q0=
github.com/LKKlein/gocv v0.28.0/go.mod h1:MP408EL7eakRU3vzjsozzfELSX7HDDGdMpWANV1IOHY=
github.com/PaddlePaddle/PaddleOCR v1.1.0 h1:zmPevInTs5P+ctSokI9sWQLTThmJBUCo/JCLbB5xbps=
github.com/ctessum/go.clipper v0.0.0-20200522184404-9c744fa3e86c h1:VXCsVlam0R2Yl7VET2GxZBPdOa7gFRexyhfWb9v9QtM=
github.com/ctessum/go.clipper v0.0.0-20200522184404-9c744fa3e86c/go.mod h1:KRMo3PCsooJP3LmCwKI76dkd7f3ki3zwYLHR7Iwbi5k=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
package ocr
import (
"bytes"
"encoding/json"
"errors"
"image"
"image/color"
"io"
"log"
"math"
"net/http"
"path"
"path/filepath"
"sort"
"strings"
"github.com/LKKlein/gocv"
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/paddle"
)
type PaddleModel struct {
predictor *paddle.Predictor
input *paddle.ZeroCopyTensor
outputs []*paddle.ZeroCopyTensor
useGPU bool
deviceID int
initGPUMem int
numThreads int
useMKLDNN bool
useTensorRT bool
useIROptim bool
}
func NewPaddleModel(args map[string]interface{}) *PaddleModel {
return &PaddleModel{
useGPU: getBool(args, "use_gpu", false),
deviceID: getInt(args, "gpu_id", 0),
initGPUMem: getInt(args, "gpu_mem", 1000),
numThreads: getInt(args, "num_cpu_threads", 6),
useMKLDNN: getBool(args, "enable_mkldnn", false),
useTensorRT: getBool(args, "use_tensorrt", false),
useIROptim: getBool(args, "ir_optim", true),
}
}
func (model *PaddleModel) LoadModel(modelDir string) {
config := paddle.NewAnalysisConfig()
config.DisableGlogInfo()
config.SetModel(modelDir+"/model", modelDir+"/params")
if model.useGPU {
config.EnableUseGpu(model.initGPUMem, model.deviceID)
} else {
config.DisableGpu()
config.SetCpuMathLibraryNumThreads(model.numThreads)
if model.useMKLDNN {
config.EnableMkldnn()
}
}
// config.EnableMemoryOptim()
if model.useIROptim {
config.SwitchIrOptim(true)
}
// false for zero copy tensor
config.SwitchUseFeedFetchOps(false)
config.SwitchSpecifyInputNames(true)
model.predictor = paddle.NewPredictor(config)
model.input = model.predictor.GetInputTensors()[0]
model.outputs = model.predictor.GetOutputTensors()
}
type OCRText struct {
BBox [][]int `json:"bbox"`
Text string `json:"text"`
Score float64 `json:"score"`
}
type TextPredictSystem struct {
detector *DBDetector
cls *TextClassifier
rec *TextRecognizer
}
func NewTextPredictSystem(args map[string]interface{}) *TextPredictSystem {
sys := &TextPredictSystem{
detector: NewDBDetector(getString(args, "det_model_dir", ""), args),
rec: NewTextRecognizer(getString(args, "rec_model_dir", ""), args),
}
if getBool(args, "use_angle_cls", false) {
sys.cls = NewTextClassifier(getString(args, "cls_model_dir", ""), args)
}
return sys
}
func (sys *TextPredictSystem) sortBoxes(boxes [][][]int) [][][]int {
sort.Slice(boxes, func(i, j int) bool {
if boxes[i][0][1] < boxes[j][0][1] {
return true
}
if boxes[i][0][1] > boxes[j][0][1] {
return false
}
return boxes[i][0][0] < boxes[j][0][0]
})
for i := 0; i < len(boxes)-1; i++ {
if math.Abs(float64(boxes[i+1][0][1]-boxes[i][0][1])) < 10 && boxes[i+1][0][0] < boxes[i][0][0] {
boxes[i], boxes[i+1] = boxes[i+1], boxes[i]
}
}
return boxes
}
func (sys *TextPredictSystem) getRotateCropImage(img gocv.Mat, box [][]int) gocv.Mat {
cropW := int(math.Sqrt(math.Pow(float64(box[0][0]-box[1][0]), 2) + math.Pow(float64(box[0][1]-box[1][1]), 2)))
cropH := int(math.Sqrt(math.Pow(float64(box[0][0]-box[3][0]), 2) + math.Pow(float64(box[0][1]-box[3][1]), 2)))
ptsstd := make([]image.Point, 4)
ptsstd[0] = image.Pt(0, 0)
ptsstd[1] = image.Pt(cropW, 0)
ptsstd[2] = image.Pt(cropW, cropH)
ptsstd[3] = image.Pt(0, cropH)
points := make([]image.Point, 4)
points[0] = image.Pt(box[0][0], box[0][1])
points[1] = image.Pt(box[1][0], box[1][1])
points[2] = image.Pt(box[2][0], box[2][1])
points[3] = image.Pt(box[3][0], box[3][1])
M := gocv.GetPerspectiveTransform(points, ptsstd)
defer M.Close()
dstimg := gocv.NewMat()
gocv.WarpPerspectiveWithParams(img, &dstimg, M, image.Pt(cropW, cropH),
gocv.InterpolationCubic, gocv.BorderReplicate, color.RGBA{0, 0, 0, 0})
if float64(dstimg.Rows()) >= float64(dstimg.Cols())*1.5 {
srcCopy := gocv.NewMat()
gocv.Transpose(dstimg, &srcCopy)
defer dstimg.Close()
gocv.Flip(srcCopy, &srcCopy, 0)
return srcCopy
}
return dstimg
}
func (sys *TextPredictSystem) Run(img gocv.Mat) []OCRText {
srcimg := gocv.NewMat()
defer srcimg.Close()
img.CopyTo(&srcimg)
boxes := sys.detector.Run(img)
if len(boxes) == 0 {
return nil
}
boxes = sys.sortBoxes(boxes)
cropimages := make([]gocv.Mat, len(boxes))
for i := 0; i < len(boxes); i++ {
tmpbox := make([][]int, len(boxes[i]))
for j := 0; j < len(tmpbox); j++ {
tmpbox[j] = make([]int, len(boxes[i][j]))
copy(tmpbox[j], boxes[i][j])
}
cropimg := sys.getRotateCropImage(srcimg, tmpbox)
cropimages[i] = cropimg
}
if sys.cls != nil {
cropimages = sys.cls.Run(cropimages)
}
recResult := sys.rec.Run(cropimages, boxes)
return recResult
}
type OCRSystem struct {
args map[string]interface{}
tps *TextPredictSystem
}
func NewOCRSystem(confFile string, a map[string]interface{}) *OCRSystem {
args, err := ReadYaml(confFile)
if err != nil {
log.Printf("Read config file %v failed! Please check. err: %v\n", confFile, err)
log.Println("Program will use default config.")
args = defaultArgs
}
for k, v := range a {
args[k] = v
}
return &OCRSystem{
args: args,
tps: NewTextPredictSystem(args),
}
}
func (ocr *OCRSystem) StartServer(port string) {
http.HandleFunc("/ocr", ocr.predictHandler)
log.Println("OCR Server has been started on port :", port)
err := http.ListenAndServe(":"+port, nil)
if err != nil {
log.Panicf("http error! error: %v\n", err)
}
}
func (ocr *OCRSystem) predictHandler(w http.ResponseWriter, r *http.Request) {
if r.Method != "POST" {
w.Write([]byte(errors.New("post method only").Error()))
return
}
r.ParseMultipartForm(32 << 20)
var buf bytes.Buffer
file, header, err := r.FormFile("image")
if err != nil {
w.Write([]byte(err.Error()))
return
}
defer file.Close()
ext := strings.ToLower(path.Ext(header.Filename))
if ext != ".jpg" && ext != ".png" {
w.Write([]byte(errors.New("only support image endswith jpg/png").Error()))
return
}
io.Copy(&buf, file)
img, err2 := gocv.IMDecode(buf.Bytes(), gocv.IMReadColor)
defer img.Close()
if err2 != nil {
w.Write([]byte(err2.Error()))
return
}
result := ocr.PredictOneImage(img)
if output, err3 := json.Marshal(result); err3 != nil {
w.Write([]byte(err3.Error()))
} else {
w.Write(output)
}
}
func (ocr *OCRSystem) PredictOneImage(img gocv.Mat) []OCRText {
return ocr.tps.Run(img)
}
func (ocr *OCRSystem) PredictDirImages(dirname string) map[string][]OCRText {
if dirname == "" {
return nil
}
imgs, _ := filepath.Glob(dirname + "/*.jpg")
tmpimgs, _ := filepath.Glob(dirname + "/*.png")
imgs = append(imgs, tmpimgs...)
results := make(map[string][]OCRText, len(imgs))
for i := 0; i < len(imgs); i++ {
imgname := imgs[i]
img := ReadImage(imgname)
defer img.Close()
res := ocr.PredictOneImage(img)
results[imgname] = res
}
return results
}
package ocr
var (
defaultArgs = map[string]interface{}{
"use_gpu": true,
"ir_optim": true,
"enable_mkldnn": false,
"use_tensorrt": false,
"num_cpu_threads": 6,
"gpu_id": 0,
"gpu_mem": 2000,
"det_algorithm": "DB",
"det_model_dir": "https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar",
"det_max_side_len": 960,
"det_db_thresh": 0.3,
"det_db_box_thresh": 0.5,
"det_db_unclip_ratio": 2.0,
"det_east_score_thresh": 0.8,
"det_east_cover_thresh": 0.1,
"det_east_nms_thresh": 0.2,
"rec_algorithm": "CRNN",
"rec_model_dir": "https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar",
"rec_image_shape": []interface{}{3, 32, 320},
"rec_char_type": "ch",
"rec_batch_num": 30,
"max_text_length": 25,
"rec_char_dict_path": "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/develop/ppocr/utils/ppocr_keys_v1.txt",
"use_space_char": true,
"use_angle_cls": false,
"cls_model_dir": "https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar",
"cls_image_shape": []interface{}{3, 48, 192},
"label_list": []interface{}{"0", "180"},
"cls_batch_num": 30,
"cls_thresh": 0.9,
"lang": "ch",
"det": true,
"rec": true,
"cls": false,
}
)
package ocr
import (
"log"
"os"
"time"
"github.com/LKKlein/gocv"
)
type TextClassifier struct {
*PaddleModel
batchNum int
thresh float64
shape []int
labels []string
}
type ClsResult struct {
Score float32
Label int64
}
func NewTextClassifier(modelDir string, args map[string]interface{}) *TextClassifier {
shapes := []int{3, 48, 192}
if v, ok := args["cls_image_shape"]; ok {
for i, s := range v.([]interface{}) {
shapes[i] = s.(int)
}
}
cls := &TextClassifier{
PaddleModel: NewPaddleModel(args),
batchNum: getInt(args, "cls_batch_num", 30),
thresh: getFloat64(args, "cls_thresh", 0.9),
shape: shapes,
}
if checkModelExists(modelDir) {
home, _ := os.UserHomeDir()
modelDir, _ = downloadModel(home+"/.paddleocr/cls", modelDir)
} else {
log.Panicf("cls model path: %v not exist! Please check!", modelDir)
}
cls.LoadModel(modelDir)
return cls
}
func (cls *TextClassifier) Run(imgs []gocv.Mat) []gocv.Mat {
batch := cls.batchNum
var clsTime int64 = 0
clsout := make([]ClsResult, len(imgs))
srcimgs := make([]gocv.Mat, len(imgs))
c, h, w := cls.shape[0], cls.shape[1], cls.shape[2]
for i := 0; i < len(imgs); i += batch {
j := i + batch
if len(imgs) < j {
j = len(imgs)
}
normImgs := make([]float32, (j-i)*c*h*w)
for k := i; k < j; k++ {
tmp := gocv.NewMat()
imgs[k].CopyTo(&tmp)
srcimgs[k] = tmp
img := clsResize(imgs[k], cls.shape)
data := normPermute(img, []float32{0.5, 0.5, 0.5}, []float32{0.5, 0.5, 0.5}, 255.0)
copy(normImgs[(k-i)*c*h*w:], data)
}
st := time.Now()
cls.input.SetValue(normImgs)
cls.input.Reshape([]int32{int32(j - i), int32(c), int32(h), int32(w)})
cls.predictor.SetZeroCopyInput(cls.input)
cls.predictor.ZeroCopyRun()
cls.predictor.GetZeroCopyOutput(cls.outputs[0])
cls.predictor.GetZeroCopyOutput(cls.outputs[1])
var probout [][]float32
var labelout []int64
if len(cls.outputs[0].Shape()) == 2 {
probout = cls.outputs[0].Value().([][]float32)
} else {
labelout = cls.outputs[0].Value().([]int64)
}
if len(cls.outputs[1].Shape()) == 2 {
probout = cls.outputs[1].Value().([][]float32)
} else {
labelout = cls.outputs[1].Value().([]int64)
}
clsTime += int64(time.Since(st).Milliseconds())
for no, label := range labelout {
score := probout[no][label]
clsout[i+no] = ClsResult{
Score: score,
Label: label,
}
if label%2 == 1 && float64(score) > cls.thresh {
gocv.Rotate(srcimgs[i+no], &srcimgs[i+no], gocv.Rotate180Clockwise)
}
}
}
log.Println("cls num: ", len(clsout), ", cls time elapse: ", clsTime, "ms")
return srcimgs
}
package ocr
import (
"log"
"os"
"time"
"github.com/LKKlein/gocv"
)
type DBDetector struct {
*PaddleModel
preProcess DetPreProcess
postProcess DetPostProcess
}
func NewDBDetector(modelDir string, args map[string]interface{}) *DBDetector {
maxSideLen := getInt(args, "det_max_side_len", 960)
thresh := getFloat64(args, "det_db_thresh", 0.3)
boxThresh := getFloat64(args, "det_db_box_thresh", 0.5)
unClipRatio := getFloat64(args, "det_db_unclip_ratio", 2.0)
detector := &DBDetector{
PaddleModel: NewPaddleModel(args),
preProcess: NewDBProcess(make([]int, 0), maxSideLen),
postProcess: NewDBPostProcess(thresh, boxThresh, unClipRatio),
}
if checkModelExists(modelDir) {
home, _ := os.UserHomeDir()
modelDir, _ = downloadModel(home+"/.paddleocr/det", modelDir)
} else {
log.Panicf("det model path: %v not exist! Please check!", modelDir)
}
detector.LoadModel(modelDir)
return detector
}
func (det *DBDetector) Run(img gocv.Mat) [][][]int {
oriH := img.Rows()
oriW := img.Cols()
data, resizeH, resizeW := det.preProcess.Run(img)
st := time.Now()
det.input.SetValue(data)
det.input.Reshape([]int32{1, 3, int32(resizeH), int32(resizeW)})
det.predictor.SetZeroCopyInput(det.input)
det.predictor.ZeroCopyRun()
det.predictor.GetZeroCopyOutput(det.outputs[0])
ratioH, ratioW := float64(resizeH)/float64(oriH), float64(resizeW)/float64(oriW)
boxes := det.postProcess.Run(det.outputs[0], oriH, oriW, ratioH, ratioW)
log.Println("det_box num: ", len(boxes), ", time elapse: ", time.Since(st))
return boxes
}
package ocr
import (
"log"
"os"
"time"
"github.com/LKKlein/gocv"
)
type TextRecognizer struct {
*PaddleModel
batchNum int
textLen int
shape []int
charType string
labels []string
}
func NewTextRecognizer(modelDir string, args map[string]interface{}) *TextRecognizer {
shapes := []int{3, 32, 320}
if v, ok := args["rec_image_shape"]; ok {
for i, s := range v.([]interface{}) {
shapes[i] = s.(int)
}
}
home, _ := os.UserHomeDir()
labelpath := getString(args, "rec_char_dict_path", home+"/.paddleocr/rec/ppocr_keys_v1.txt")
labels := readLines2StringSlice(labelpath)
if getBool(args, "use_space_char", true) {
labels = append(labels, " ")
}
rec := &TextRecognizer{
PaddleModel: NewPaddleModel(args),
batchNum: getInt(args, "rec_batch_num", 30),
textLen: getInt(args, "max_text_length", 25),
charType: getString(args, "rec_char_type", "ch"),
shape: shapes,
labels: labels,
}
if checkModelExists(modelDir) {
modelDir, _ = downloadModel(home+"/.paddleocr/rec/ch", modelDir)
} else {
log.Panicf("rec model path: %v not exist! Please check!", modelDir)
}
rec.LoadModel(modelDir)
return rec
}
func (rec *TextRecognizer) Run(imgs []gocv.Mat, bboxes [][][]int) []OCRText {
recResult := make([]OCRText, 0, len(imgs))
batch := rec.batchNum
var recTime int64 = 0
c, h, w := rec.shape[0], rec.shape[1], rec.shape[2]
for i := 0; i < len(imgs); i += batch {
j := i + batch
if len(imgs) < j {
j = len(imgs)
}
maxwhratio := 0.0
for k := i; k < j; k++ {
h, w := imgs[k].Rows(), imgs[k].Cols()
ratio := float64(w) / float64(h)
if ratio > maxwhratio {
maxwhratio = ratio
}
}
if rec.charType == "ch" {
w = int(32 * maxwhratio)
}
normimgs := make([]float32, (j-i)*c*h*w)
for k := i; k < j; k++ {
data := crnnPreprocess(imgs[k], rec.shape, []float32{0.5, 0.5, 0.5},
[]float32{0.5, 0.5, 0.5}, 255.0, maxwhratio, rec.charType)
defer imgs[k].Close()
copy(normimgs[(k-i)*c*h*w:], data)
}
st := time.Now()
rec.input.SetValue(normimgs)
rec.input.Reshape([]int32{int32(j - i), int32(c), int32(h), int32(w)})
rec.predictor.SetZeroCopyInput(rec.input)
rec.predictor.ZeroCopyRun()
rec.predictor.GetZeroCopyOutput(rec.outputs[0])
rec.predictor.GetZeroCopyOutput(rec.outputs[1])
recIdxBatch := rec.outputs[0].Value().([][]int64)
recIdxLod := rec.outputs[0].Lod()
predictBatch := rec.outputs[1].Value().([][]float32)
predictLod := rec.outputs[1].Lod()
recTime += int64(time.Since(st).Milliseconds())
for rno := 0; rno < len(recIdxLod)-1; rno++ {
predIdx := make([]int, 0, 2)
for beg := recIdxLod[rno]; beg < recIdxLod[rno+1]; beg++ {
predIdx = append(predIdx, int(recIdxBatch[beg][0]))
}
if len(predIdx) == 0 {
continue
}
words := ""
for n := 0; n < len(predIdx); n++ {
words += rec.labels[predIdx[n]]
}
score := 0.0
count := 0
blankPosition := int(rec.outputs[1].Shape()[1])
for beg := predictLod[rno]; beg < predictLod[rno+1]; beg++ {
argMaxID, maxVal := argmax(predictBatch[beg])
if blankPosition-1-argMaxID > 0 {
score += float64(maxVal)
count++
}
}
score = score / float64(count)
recResult = append(recResult, OCRText{
BBox: bboxes[i+rno],
Text: words,
Score: score,
})
}
}
log.Println("rec num: ", len(recResult), ", rec time elapse: ", recTime, "ms")
return recResult
}
package ocr
import (
"image"
"image/color"
"math"
"sort"
"github.com/LKKlein/gocv"
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/paddle"
clipper "github.com/ctessum/go.clipper"
)
type xFloatSortBy [][]float32
func (a xFloatSortBy) Len() int { return len(a) }
func (a xFloatSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a xFloatSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
type xIntSortBy [][]int
func (a xIntSortBy) Len() int { return len(a) }
func (a xIntSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a xIntSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
type DetPostProcess interface {
Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int
}
type DBPostProcess struct {
thresh float64
boxThresh float64
maxCandidates int
unClipRatio float64
minSize int
}
func NewDBPostProcess(thresh, boxThresh, unClipRatio float64) *DBPostProcess {
return &DBPostProcess{
thresh: thresh,
boxThresh: boxThresh,
unClipRatio: unClipRatio,
maxCandidates: 1000,
minSize: 3,
}
}
func (d *DBPostProcess) getMinBoxes(rect gocv.RotatedRect) [][]float32 {
points := gocv.NewMat()
gocv.BoxPoints(rect, &points)
defer points.Close()
array := d.mat2slice(points)
sort.Sort(xFloatSortBy(array))
point1, point2, point3, point4 := array[0], array[1], array[2], array[3]
if array[3][1] <= array[2][1] {
point2, point3 = array[3], array[2]
} else {
point2, point3 = array[2], array[3]
}
if array[1][1] <= array[0][1] {
point1, point4 = array[1], array[0]
} else {
point1, point4 = array[0], array[1]
}
array = [][]float32{point1, point2, point3, point4}
return array
}
func (d *DBPostProcess) mat2slice(mat gocv.Mat) [][]float32 {
array := make([][]float32, mat.Rows())
for i := 0; i < mat.Rows(); i++ {
tmp := make([]float32, mat.Cols())
for j := 0; j < mat.Cols(); j++ {
tmp[j] = mat.GetFloatAt(i, j)
}
array[i] = tmp
}
return array
}
func (d *DBPostProcess) boxScoreFast(array [][]float32, pred gocv.Mat) float64 {
height, width := pred.Rows(), pred.Cols()
boxX := []float32{array[0][0], array[1][0], array[2][0], array[3][0]}
boxY := []float32{array[0][1], array[1][1], array[2][1], array[3][1]}
xmin := clip(int(math.Floor(float64(minf(boxX)))), 0, width-1)
xmax := clip(int(math.Ceil(float64(maxf(boxX)))), 0, width-1)
ymin := clip(int(math.Floor(float64(minf(boxY)))), 0, height-1)
ymax := clip(int(math.Ceil(float64(maxf(boxY)))), 0, height-1)
mask := gocv.NewMatWithSize(ymax-ymin+1, xmax-xmin+1, gocv.MatTypeCV8UC1)
defer mask.Close()
ppt := make([][]image.Point, 1)
ppt[0] = make([]image.Point, 4)
ppt[0][0] = image.Point{int(array[0][0]) - xmin, int(array[0][1]) - ymin}
ppt[0][1] = image.Point{int(array[1][0]) - xmin, int(array[1][1]) - ymin}
ppt[0][2] = image.Point{int(array[2][0]) - xmin, int(array[2][1]) - ymin}
ppt[0][3] = image.Point{int(array[3][0]) - xmin, int(array[3][1]) - ymin}
gocv.FillPoly(&mask, ppt, color.RGBA{0, 0, 1, 0})
croppedImg := pred.Region(image.Rect(xmin, ymin, xmax+1, ymax+1))
s := croppedImg.MeanWithMask(mask)
return s.Val1
}
func (d *DBPostProcess) unClip(box [][]float32) gocv.RotatedRect {
var area, dist float64
for i := 0; i < 4; i++ {
area += float64(box[i][0]*box[(i+1)%4][1] - box[i][1]*box[(i+1)%4][0])
dist += math.Sqrt(float64(
(box[i][0]-box[(i+1)%4][0])*(box[i][0]-box[(i+1)%4][0]) +
(box[i][1]-box[(i+1)%4][1])*(box[i][1]-box[(i+1)%4][1]),
))
}
area = math.Abs(area / 2.0)
distance := area * d.unClipRatio / dist
offset := clipper.NewClipperOffset()
path := make([]*clipper.IntPoint, 4)
path[0] = &clipper.IntPoint{X: clipper.CInt(box[0][0]), Y: clipper.CInt(box[0][1])}
path[1] = &clipper.IntPoint{X: clipper.CInt(box[1][0]), Y: clipper.CInt(box[1][1])}
path[2] = &clipper.IntPoint{X: clipper.CInt(box[2][0]), Y: clipper.CInt(box[2][1])}
path[3] = &clipper.IntPoint{X: clipper.CInt(box[3][0]), Y: clipper.CInt(box[3][1])}
offset.AddPath(clipper.Path(path), clipper.JtRound, clipper.EtClosedPolygon)
soln := offset.Execute(distance)
points := make([]image.Point, 0, 4)
for i := 0; i < len(soln); i++ {
for j := 0; j < len(soln[i]); j++ {
points = append(points, image.Point{int(soln[i][j].X), int(soln[i][j].Y)})
}
}
var res gocv.RotatedRect
if len(points) <= 0 {
points = make([]image.Point, 4)
points[0] = image.Pt(0, 0)
points[1] = image.Pt(1, 0)
points[2] = image.Pt(1, 1)
points[3] = image.Pt(0, 1)
res = gocv.RotatedRect{
Contour: points,
BoundingRect: image.Rect(0, 0, 1, 1),
Center: gocv.Point2f{X: 0.5, Y: 0.5},
Width: 1,
Height: 1,
Angle: 0,
}
} else {
res = gocv.MinAreaRect(points)
}
return res
}
func (d *DBPostProcess) boxesFromBitmap(pred gocv.Mat, mask gocv.Mat, ratioH float64, ratioW float64) [][][]int {
height, width := mask.Rows(), mask.Cols()
mask.MultiplyUChar(255)
contours := gocv.FindContours(mask, gocv.RetrievalList, gocv.ChainApproxSimple)
numContours := len(contours)
if numContours > d.maxCandidates {
numContours = d.maxCandidates
}
boxes := make([][][]int, 0, numContours)
for i := 0; i < numContours; i++ {
contour := contours[i]
boundingbox := gocv.MinAreaRect(contour)
if boundingbox.Width < float32(d.minSize) || boundingbox.Height < float32(d.minSize) {
continue
}
points := d.getMinBoxes(boundingbox)
score := d.boxScoreFast(points, pred)
if score < d.boxThresh {
continue
}
box := d.unClip(points)
if box.Width < float32(d.minSize+2) || box.Height < float32(d.minSize+2) {
continue
}
cliparray := d.getMinBoxes(box)
dstHeight, dstWidth := pred.Rows(), pred.Cols()
intcliparray := make([][]int, 4)
for i := 0; i < 4; i++ {
p := []int{
int(float64(clip(int(math.Round(
float64(cliparray[i][0]/float32(width)*float32(dstWidth)))), 0, dstWidth)) / ratioW),
int(float64(clip(int(math.Round(
float64(cliparray[i][1]/float32(height)*float32(dstHeight)))), 0, dstHeight)) / ratioH),
}
intcliparray[i] = p
}
boxes = append(boxes, intcliparray)
}
return boxes
}
func (d *DBPostProcess) orderPointsClockwise(box [][]int) [][]int {
sort.Sort(xIntSortBy(box))
leftmost := [][]int{box[0], box[1]}
rightmost := [][]int{box[2], box[3]}
if leftmost[0][1] > leftmost[1][1] {
leftmost[0], leftmost[1] = leftmost[1], leftmost[0]
}
if rightmost[0][1] > rightmost[1][1] {
rightmost[0], rightmost[1] = rightmost[1], rightmost[0]
}
return [][]int{leftmost[0], rightmost[0], rightmost[1], leftmost[1]}
}
func (d *DBPostProcess) filterTagDetRes(boxes [][][]int, oriH, oriW int) [][][]int {
points := make([][][]int, 0, len(boxes))
for i := 0; i < len(boxes); i++ {
boxes[i] = d.orderPointsClockwise(boxes[i])
for j := 0; j < len(boxes[i]); j++ {
boxes[i][j][0] = clip(boxes[i][j][0], 0, oriW-1)
boxes[i][j][1] = clip(boxes[i][j][1], 0, oriH-1)
}
}
for i := 0; i < len(boxes); i++ {
rectW := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][1][0]), 2.0) +
math.Pow(float64(boxes[i][0][1]-boxes[i][1][1]), 2.0)))
rectH := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][3][0]), 2.0) +
math.Pow(float64(boxes[i][0][1]-boxes[i][3][1]), 2.0)))
if rectW <= 4 || rectH <= 4 {
continue
}
points = append(points, boxes[i])
}
return points
}
func (d *DBPostProcess) Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int {
v := output.Value().([][][][]float32)
shape := output.Shape()
height, width := int(shape[2]), int(shape[3])
pred := gocv.NewMatWithSize(height, width, gocv.MatTypeCV32F)
bitmap := gocv.NewMatWithSize(height, width, gocv.MatTypeCV8UC1)
thresh := float32(d.thresh)
for i := 0; i < height; i++ {
for j := 0; j < width; j++ {
pred.SetFloatAt(i, j, v[0][0][i][j])
if v[0][0][i][j] > thresh {
bitmap.SetUCharAt(i, j, 1)
} else {
bitmap.SetUCharAt(i, j, 0)
}
}
}
mask := gocv.NewMat()
kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Point{2, 2})
gocv.Dilate(bitmap, &mask, kernel)
boxes := d.boxesFromBitmap(pred, mask, ratioH, ratioW)
dtboxes := d.filterTagDetRes(boxes, oriH, oriW)
return dtboxes
}
package ocr
import (
"image"
"image/color"
"math"
"github.com/LKKlein/gocv"
)
func resizeByShape(img gocv.Mat, resizeShape []int) (gocv.Mat, int, int) {
resizeH := resizeShape[0]
resizeW := resizeShape[1]
gocv.Resize(img, &img, image.Pt(resizeW, resizeH), 0, 0, gocv.InterpolationLinear)
return img, resizeH, resizeW
}
func resizeByMaxLen(img gocv.Mat, maxLen int) (gocv.Mat, int, int) {
oriH := img.Rows()
oriW := img.Cols()
var resizeH, resizeW int = oriH, oriW
var ratio float64 = 1.0
if resizeH > maxLen || resizeW > maxLen {
if resizeH > resizeW {
ratio = float64(maxLen) / float64(resizeH)
} else {
ratio = float64(maxLen) / float64(resizeW)
}
}
resizeH = int(float64(resizeH) * ratio)
resizeW = int(float64(resizeW) * ratio)
if resizeH%32 == 0 {
resizeH = resizeH
} else if resizeH/32 <= 1 {
resizeH = 32
} else {
resizeH = (resizeH/32 - 1) * 32
}
if resizeW%32 == 0 {
resizeW = resizeW
} else if resizeW/32 <= 1 {
resizeW = 32
} else {
resizeW = (resizeW/32 - 1) * 32
}
if resizeW <= 0 || resizeH <= 0 {
return gocv.NewMat(), 0, 0
}
gocv.Resize(img, &img, image.Pt(resizeW, resizeH), 0, 0, gocv.InterpolationLinear)
return img, resizeH, resizeW
}
func normPermute(img gocv.Mat, mean []float32, std []float32, scaleFactor float32) []float32 {
img.ConvertTo(&img, gocv.MatTypeCV32F)
img.DivideFloat(scaleFactor)
c := gocv.Split(img)
data := make([]float32, img.Rows()*img.Cols()*img.Channels())
for i := 0; i < 3; i++ {
c[i].SubtractFloat(mean[i])
c[i].DivideFloat(std[i])
defer c[i].Close()
x, _ := c[i].DataPtrFloat32()
copy(data[i*img.Rows()*img.Cols():], x)
}
return data
}
type DetPreProcess interface {
Run(gocv.Mat) ([]float32, int, int)
}
type DBPreProcess struct {
resizeType int
imageShape []int
maxSideLen int
mean []float32
std []float32
scaleFactor float32
}
func NewDBProcess(shape []int, sideLen int) *DBPreProcess {
db := &DBPreProcess{
resizeType: 0,
imageShape: shape,
maxSideLen: sideLen,
mean: []float32{0.485, 0.456, 0.406},
std: []float32{0.229, 0.224, 0.225},
scaleFactor: 255.0,
}
if len(shape) > 0 {
db.resizeType = 1
}
if sideLen == 0 {
db.maxSideLen = 2400
}
return db
}
func (d *DBPreProcess) Run(img gocv.Mat) ([]float32, int, int) {
var resizeH, resizeW int
if d.resizeType == 0 {
img, resizeH, resizeW = resizeByMaxLen(img, d.maxSideLen)
} else {
img, resizeH, resizeW = resizeByShape(img, d.imageShape)
}
im := normPermute(img, d.mean, d.std, d.scaleFactor)
return im, resizeH, resizeW
}
func clsResize(img gocv.Mat, resizeShape []int) gocv.Mat {
imgH, imgW := resizeShape[1], resizeShape[2]
h, w := img.Rows(), img.Cols()
ratio := float64(w) / float64(h)
var resizeW int
if math.Ceil(float64(imgH)*ratio) > float64(imgW) {
resizeW = imgW
} else {
resizeW = int(math.Ceil(float64(imgH) * ratio))
}
gocv.Resize(img, &img, image.Pt(resizeW, imgH), 0, 0, gocv.InterpolationLinear)
if resizeW < imgW {
gocv.CopyMakeBorder(img, &img, 0, 0, 0, imgW-resizeW, gocv.BorderConstant, color.RGBA{0, 0, 0, 0})
}
return img
}
func crnnPreprocess(img gocv.Mat, resizeShape []int, mean []float32, std []float32,
scaleFactor float32, whRatio float64, charType string) []float32 {
imgH := resizeShape[1]
imgW := resizeShape[2]
if charType == "ch" {
imgW = int(32 * whRatio)
}
h, w := img.Rows(), img.Cols()
ratio := float64(w) / float64(h)
var resizeW int
if math.Ceil(float64(imgH)*ratio) > float64(imgW) {
resizeW = imgW
} else {
resizeW = int(math.Ceil(float64(imgH) * ratio))
}
gocv.Resize(img, &img, image.Pt(resizeW, imgH), 0, 0, gocv.InterpolationLinear)
img.ConvertTo(&img, gocv.MatTypeCV32F)
img.DivideFloat(scaleFactor)
img.SubtractScalar(gocv.NewScalar(float64(mean[0]), float64(mean[1]), float64(mean[2]), 0))
img.DivideScalar(gocv.NewScalar(float64(std[0]), float64(std[1]), float64(std[2]), 0))
if resizeW < imgW {
gocv.CopyMakeBorder(img, &img, 0, 0, 0, imgW-resizeW, gocv.BorderConstant, color.RGBA{0, 0, 0, 0})
}
c := gocv.Split(img)
data := make([]float32, img.Rows()*img.Cols()*img.Channels())
for i := 0; i < 3; i++ {
defer c[i].Close()
x, _ := c[i].DataPtrFloat32()
copy(data[i*img.Rows()*img.Cols():], x)
}
return data
}
package ocr
import (
"archive/tar"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"strings"
"github.com/LKKlein/gocv"
"gopkg.in/yaml.v3"
)
func getString(args map[string]interface{}, key string, dv string) string {
if f, ok := args[key]; ok {
return f.(string)
}
return dv
}
func getFloat64(args map[string]interface{}, key string, dv float64) float64 {
if f, ok := args[key]; ok {
return f.(float64)
}
return dv
}
func getInt(args map[string]interface{}, key string, dv int) int {
if i, ok := args[key]; ok {
return i.(int)
}
return dv
}
func getBool(args map[string]interface{}, key string, dv bool) bool {
if b, ok := args[key]; ok {
return b.(bool)
}
return dv
}
func ReadImage(image_path string) gocv.Mat {
img := gocv.IMRead(image_path, gocv.IMReadColor)
if img.Empty() {
log.Printf("Could not read image %s\n", image_path)
os.Exit(1)
}
return img
}
func clip(value, min, max int) int {
if value <= min {
return min
} else if value >= max {
return max
}
return value
}
func minf(data []float32) float32 {
v := data[0]
for _, val := range data {
if val < v {
v = val
}
}
return v
}
func maxf(data []float32) float32 {
v := data[0]
for _, val := range data {
if val > v {
v = val
}
}
return v
}
func mini(data []int) int {
v := data[0]
for _, val := range data {
if val < v {
v = val
}
}
return v
}
func maxi(data []int) int {
v := data[0]
for _, val := range data {
if val > v {
v = val
}
}
return v
}
func argmax(arr []float32) (int, float32) {
max_value, index := arr[0], 0
for i, item := range arr {
if item > max_value {
max_value = item
index = i
}
}
return index, max_value
}
func checkModelExists(modelPath string) bool {
if isPathExist(modelPath+"/model") && isPathExist(modelPath+"/params") {
return true
}
if strings.HasPrefix(modelPath, "http://") ||
strings.HasPrefix(modelPath, "ftp://") || strings.HasPrefix(modelPath, "https://") {
return true
}
return false
}
func downloadFile(filepath, url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
log.Println("[download_file] from:", url, " to:", filepath)
return err
}
func isPathExist(path string) bool {
if _, err := os.Stat(path); err == nil {
return true
} else if os.IsNotExist(err) {
return false
}
return false
}
func downloadModel(modelDir, modelPath string) (string, error) {
if modelPath != "" && (strings.HasPrefix(modelPath, "http://") ||
strings.HasPrefix(modelPath, "ftp://") || strings.HasPrefix(modelPath, "https://")) {
if checkModelExists(modelDir) {
return modelDir, nil
}
_, suffix := path.Split(modelPath)
outPath := filepath.Join(modelDir, suffix)
outDir := filepath.Dir(outPath)
if !isPathExist(outDir) {
os.MkdirAll(outDir, os.ModePerm)
}
if !isPathExist(outPath) {
err := downloadFile(outPath, modelPath)
if err != nil {
return "", err
}
}
if strings.HasSuffix(outPath, ".tar") && !checkModelExists(modelDir) {
unTar(modelDir, outPath)
os.Remove(outPath)
return modelDir, nil
}
return modelDir, nil
}
return modelPath, nil
}
func unTar(dst, src string) (err error) {
fr, err := os.Open(src)
if err != nil {
return err
}
defer fr.Close()
tr := tar.NewReader(fr)
for {
hdr, err := tr.Next()
switch {
case err == io.EOF:
return nil
case err != nil:
return err
case hdr == nil:
continue
}
var dstFileDir string
if strings.Contains(hdr.Name, "model") {
dstFileDir = filepath.Join(dst, "model")
} else if strings.Contains(hdr.Name, "params") {
dstFileDir = filepath.Join(dst, "params")
}
switch hdr.Typeflag {
case tar.TypeDir:
continue
case tar.TypeReg:
file, err := os.OpenFile(dstFileDir, os.O_CREATE|os.O_RDWR, os.FileMode(hdr.Mode))
if err != nil {
return err
}
_, err2 := io.Copy(file, tr)
if err2 != nil {
return err2
}
file.Close()
}
}
return nil
}
func readLines2StringSlice(filepath string) []string {
if strings.HasPrefix(filepath, "http://") || strings.HasPrefix(filepath, "https://") {
home, _ := os.UserHomeDir()
dir := home + "/.paddleocr/rec/"
_, suffix := path.Split(filepath)
f := dir + suffix
if !isPathExist(f) {
err := downloadFile(f, filepath)
if err != nil {
log.Println("download ppocr key file error! You can specify your local dict path by conf.yaml.")
return nil
}
}
filepath = f
}
content, err := ioutil.ReadFile(filepath)
if err != nil {
log.Println("read ppocr key file error!")
return nil
}
lines := strings.Split(string(content), "\n")
return lines
}
func ReadYaml(yamlPath string) (map[string]interface{}, error) {
data, err := ioutil.ReadFile(yamlPath)
if err != nil {
return nil, err
}
var body interface{}
if err := yaml.Unmarshal(data, &body); err != nil {
return nil, err
}
body = convertYaml2Map(body)
return body.(map[string]interface{}), nil
}
func convertYaml2Map(i interface{}) interface{} {
switch x := i.(type) {
case map[interface{}]interface{}:
m2 := map[string]interface{}{}
for k, v := range x {
m2[k.(string)] = convertYaml2Map(v)
}
return m2
case []interface{}:
for i, v := range x {
x[i] = convertYaml2Map(v)
}
}
return i
}
package paddle
// #cgo CFLAGS: -I../paddle_c/include
// #cgo LDFLAGS: -lpaddle_fluid_c
// #include <stdbool.h>
import "C"
import "fmt"
func ConvertCBooleanToGo(b C.bool) bool {
var c_false C.bool
if b != c_false {
return true
}
return false
}
func numel(shape []int32) int32 {
n := int32(1)
for _, d := range shape {
n *= d
}
return n
}
func bug(format string, args ...interface{}) error {
return fmt.Errorf("Bug %v", fmt.Sprintf(format, args...))
}
package paddle
// #include <stdbool.h>
// #include <stdlib.h>
// #include <paddle_c_api.h>
import "C"
import (
"runtime"
"unsafe"
)
type Precision C.Precision
const (
Precision_FLOAT32 Precision = C.kFloat32
Precision_INT8 Precision = C.kInt8
Precision_HALF Precision = C.kHalf
)
type AnalysisConfig struct {
c *C.PD_AnalysisConfig
}
func NewAnalysisConfig() *AnalysisConfig {
c_config := C.PD_NewAnalysisConfig()
config := &AnalysisConfig{c: c_config}
runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
return config
}
func (config *AnalysisConfig) finalize() {
C.PD_DeleteAnalysisConfig(config.c)
}
func (config *AnalysisConfig) SetModel(model, params string) {
c_model := C.CString(model)
defer C.free(unsafe.Pointer(c_model))
var c_params *C.char
if params == "" {
c_params = nil
} else {
c_params = C.CString(params)
defer C.free(unsafe.Pointer(c_params))
}
C.PD_SetModel(config.c, c_model, c_params)
}
func (config *AnalysisConfig) ModelDir() string {
return C.GoString(C.PD_ModelDir(config.c))
}
func (config *AnalysisConfig) ProgFile() string {
return C.GoString(C.PD_ProgFile(config.c))
}
func (config *AnalysisConfig) ParamsFile() string {
return C.GoString(C.PD_ParamsFile(config.c))
}
func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb int, device_id int) {
C.PD_EnableUseGpu(config.c, C.int(memory_pool_init_size_mb), C.int(device_id))
}
func (config *AnalysisConfig) DisableGpu() {
C.PD_DisableGpu(config.c)
}
func (config *AnalysisConfig) UseGpu() bool {
return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
}
func (config *AnalysisConfig) GpuDeviceId() int {
return int(C.PD_GpuDeviceId(config.c))
}
func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
return int(C.PD_MemoryPoolInitSizeMb(config.c))
}
func (config *AnalysisConfig) EnableCudnn() {
C.PD_EnableCUDNN(config.c)
}
func (config *AnalysisConfig) CudnnEnabled() bool {
return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
}
func (config *AnalysisConfig) SwitchIrOptim(x bool) {
C.PD_SwitchIrOptim(config.c, C.bool(x))
}
func (config *AnalysisConfig) IrOptim() bool {
return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
}
func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
}
func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
}
func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
}
func (config *AnalysisConfig) SpecifyInputName() bool {
return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
}
func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int, max_batch_size int, min_subgraph_size int, precision Precision, use_static bool, use_calib_mode bool) {
C.PD_EnableTensorRtEngine(config.c, C.int(workspace_size), C.int(max_batch_size), C.int(min_subgraph_size), C.Precision(precision), C.bool(use_static), C.bool(use_calib_mode))
}
func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
}
func (config *AnalysisConfig) SwitchIrDebug(x bool) {
C.PD_SwitchIrDebug(config.c, C.bool(x))
}
func (config *AnalysisConfig) EnableMkldnn() {
C.PD_EnableMKLDNN(config.c)
}
func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
}
func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
return int(C.PD_CpuMathLibraryNumThreads(config.c))
}
func (config *AnalysisConfig) EnableMkldnnQuantizer() {
C.PD_EnableMkldnnQuantizer(config.c)
}
func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
}
// SetModelBuffer
// ModelFromMemory
func (config *AnalysisConfig) EnableMemoryOptim() {
C.PD_EnableMemoryOptim(config.c)
}
func (config *AnalysisConfig) MemoryOptimEnabled() bool {
return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
}
func (config *AnalysisConfig) EnableProfile() {
C.PD_EnableProfile(config.c)
}
func (config *AnalysisConfig) ProfileEnabled() bool {
return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
}
func (config *AnalysisConfig) DisableGlogInfo() {
C.PD_DisableGlogInfo(config.c)
}
func (config *AnalysisConfig) DeletePass(pass string) {
c_pass := C.CString(pass)
defer C.free(unsafe.Pointer(c_pass))
C.PD_DeletePass(config.c, c_pass)
}
func (config *AnalysisConfig) SetInValid() {
C.PD_SetInValid(config.c)
}
func (config *AnalysisConfig) IsValid() bool {
return ConvertCBooleanToGo(C.PD_IsValid(config.c))
}
package paddle
// #include <stdbool.h>
// #include "paddle_c_api.h"
import "C"
import (
"reflect"
"runtime"
"unsafe"
)
type Predictor struct {
c *C.PD_Predictor
}
func NewPredictor(config *AnalysisConfig) *Predictor {
c_predictor := C.PD_NewPredictor((*config).c)
predictor := &Predictor{c: c_predictor}
runtime.SetFinalizer(predictor, (*Predictor).finalize)
return predictor
}
func (predictor *Predictor) finalize() {
C.PD_DeletePredictor(predictor.c)
}
func DeletePredictor(predictor *Predictor) {
C.PD_DeletePredictor(predictor.c)
}
func (predictor *Predictor) GetInputNum() int {
return int(C.PD_GetInputNum(predictor.c))
}
func (predictor *Predictor) GetOutputNum() int {
return int(C.PD_GetOutputNum(predictor.c))
}
func (predictor *Predictor) GetInputName(n int) string {
return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
}
func (predictor *Predictor) GetOutputName(n int) string {
return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
}
func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
var result [](*ZeroCopyTensor)
for i := 0; i < predictor.GetInputNum(); i++ {
tensor := NewZeroCopyTensor()
tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
result = append(result, tensor)
}
return result
}
func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
var result [](*ZeroCopyTensor)
for i := 0; i < predictor.GetOutputNum(); i++ {
tensor := NewZeroCopyTensor()
tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
result = append(result, tensor)
}
return result
}
func (predictor *Predictor) GetInputNames() []string {
names := make([]string, predictor.GetInputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetInputName(i)
}
return names
}
func (predictor *Predictor) GetOutputNames() []string {
names := make([]string, predictor.GetOutputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetOutputName(i)
}
return names
}
func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
C.PD_SetZeroCopyInput(predictor.c, tensor.c)
}
func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
tensor.name = C.GoString(tensor.c.name)
var shape []int32
shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
tensor.Reshape(shape)
}
func (predictor *Predictor) ZeroCopyRun() {
C.PD_ZeroCopyRun(predictor.c)
}
package paddle
// #include <stdbool.h>
// #include <stdlib.h>
// #include <string.h>
// #include <paddle_c_api.h>
import "C"
import (
"reflect"
"runtime"
"unsafe"
)
type PaddleDType C.PD_DataType
const (
FLOAT32 PaddleDType = C.PD_FLOAT32
INT32 PaddleDType = C.PD_INT32
INT64 PaddleDType = C.PD_INT64
UINT8 PaddleDType = C.PD_UINT8
UNKDTYPE PaddleDType = C.PD_UNKDTYPE
)
var types = []struct {
gotype reflect.Type
dtype PaddleDType
}{
{reflect.TypeOf(float32(0)), FLOAT32},
{reflect.TypeOf(int32(0)), INT32},
{reflect.TypeOf(int64(0)), INT64},
{reflect.TypeOf(uint8(0)), UINT8},
}
func typeOfDataType(dtype PaddleDType) reflect.Type {
var ret reflect.Type
for _, t := range types {
if t.dtype == dtype {
ret = t.gotype
}
}
return ret
}
func sizeofDataType(dtype PaddleDType) int32 {
switch dtype {
case UINT8:
return int32(C.sizeof_uchar)
case INT32:
return int32(C.sizeof_int)
case INT64:
return int32(C.sizeof_longlong)
case FLOAT32:
return int32(C.sizeof_float)
}
return -1
}
func shapeAndTypeOf(val reflect.Value) (shape []int32, dt PaddleDType) {
gotype := val.Type()
for gotype.Kind() == reflect.Array || gotype.Kind() == reflect.Slice {
shape = append(shape, int32(val.Len()))
if val.Len() > 0 {
val = val.Index(0)
}
gotype = gotype.Elem()
}
for _, t := range types {
if gotype.Kind() == t.gotype.Kind() {
return shape, PaddleDType(t.dtype)
}
}
return shape, dt
}
type ZeroCopyTensor struct {
c *C.PD_ZeroCopyTensor
name string
shape []int32
}
func NewZeroCopyTensor() *ZeroCopyTensor {
c_tensor := C.PD_NewZeroCopyTensor()
tensor := &ZeroCopyTensor{c: c_tensor}
runtime.SetFinalizer(tensor, (*ZeroCopyTensor).finalize)
return tensor
}
func (tensor *ZeroCopyTensor) finalize() {
C.PD_DeleteZeroCopyTensor(tensor.c)
}
func (tensor *ZeroCopyTensor) Shape() []int32 {
return tensor.shape
}
func (tensor *ZeroCopyTensor) Name() string {
return C.GoString(tensor.c.name)
}
func (tensor *ZeroCopyTensor) Rename(name string) {
tensor.name = name
tensor.c.name = (*C.char)(unsafe.Pointer(tensor.c.name))
}
func (tensor *ZeroCopyTensor) Reshape(shape []int32) {
tensor.shape = make([]int32, len(shape))
copy(tensor.shape, shape)
length := C.sizeof_int * C.size_t(len(shape))
if tensor.c.shape.capacity < C.size_t(length) {
if tensor.c.shape.capacity != C.size_t(0) {
C.free(tensor.c.shape.data)
}
tensor.c.shape.data = C.malloc(length)
tensor.c.shape.capacity = length
}
tensor.c.shape.length = length
C.memcpy(tensor.c.shape.data, unsafe.Pointer(&shape[0]), length)
}
func (tensor *ZeroCopyTensor) DataType() PaddleDType {
return PaddleDType(tensor.c.dtype)
}
func (tensor *ZeroCopyTensor) SetValue(value interface{}) {
val := reflect.ValueOf(value)
shape, dtype := shapeAndTypeOf(val)
num := numel(shape)
length := C.size_t(sizeofDataType(dtype) * num)
if tensor.c.data.capacity < length {
if tensor.c.data.capacity != C.size_t(0) {
C.free(tensor.c.data.data)
}
tensor.c.data.data = C.malloc(length)
tensor.c.data.capacity = length
}
tensor.c.data.length = length
switch dtype {
case PaddleDType(UINT8):
data := val.Interface().([]uint8)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(INT32):
data := val.Interface().([]int32)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(INT64):
data := val.Interface().([]int64)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(FLOAT32):
data := val.Interface().([]float32)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
}
tensor.c.dtype = C.PD_DataType(dtype)
}
func (tensor *ZeroCopyTensor) tensorData() []byte {
cbytes := tensor.c.data.data
length := tensor.c.data.length
var slice []byte
if unsafe.Sizeof(unsafe.Pointer(nil)) == 8 {
slice = (*[1<<50 - 1]byte)(unsafe.Pointer(cbytes))[:length:length]
} else {
slice = (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length]
}
return slice
}
func (tensor *ZeroCopyTensor) Value() interface{} {
t := typeOfDataType(PaddleDType(tensor.c.dtype))
data := tensor.tensorData()
return decodeTensor(data, tensor.Shape(), t).Interface()
}
// It isn't safe to use reflect.SliceHeader as it uses a uintptr for Data and
// this is not inspected by the garbage collector
type sliceHeader struct {
Data unsafe.Pointer
Len int
Cap int
}
func decodeTensor(raw []byte, shape []int32, t reflect.Type) reflect.Value {
// Create a 1-dimensional slice of the base large enough for the data and
// copy the data in.
n := int(numel(shape))
l := n * int(t.Size())
typ := reflect.SliceOf(t)
slice := reflect.MakeSlice(typ, n, n)
baseBytes := *(*[]byte)(unsafe.Pointer(&sliceHeader{
Data: unsafe.Pointer(slice.Pointer()),
Len: l,
Cap: l,
}))
copy(baseBytes, raw)
if len(shape) == 0 {
// for n
return slice.Index(0)
}
if len(shape) == 1 {
// for {}
return slice
}
// for {{} {}} {{} {}} {{} {}}
if n == 0 {
n = int(numel(shape[:len(shape)-1]))
}
for i := len(shape) - 2; i >= 0; i-- {
underlyingSize := typ.Elem().Size()
typ = reflect.SliceOf(typ)
subsliceLen := int(shape[i+1])
if subsliceLen != 0 {
n = n / subsliceLen
}
data := unsafe.Pointer(slice.Pointer())
nextSlice := reflect.MakeSlice(typ, n, n)
for j := 0; j < n; j++ {
// This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen]
setSliceInSlice(nextSlice, j, sliceHeader{
Data: unsafe.Pointer(uintptr(data) + (uintptr(j*subsliceLen) * underlyingSize)),
Len: subsliceLen,
Cap: subsliceLen,
})
}
slice = nextSlice
}
return slice
}
// setSliceInSlice sets slice[index] = content.
func setSliceInSlice(slice reflect.Value, index int, content sliceHeader) {
const sliceSize = unsafe.Sizeof(sliceHeader{})
// We must cast slice.Pointer to uninptr & back again to avoid GC issues.
// See https://github.com/google/go-cmp/issues/167#issuecomment-546093202
*(*sliceHeader)(unsafe.Pointer(uintptr(unsafe.Pointer(slice.Pointer())) + (uintptr(index) * sliceSize))) = content
}
func (tensor *ZeroCopyTensor) Lod() []uint {
var val []uint
valHdr := (*reflect.SliceHeader)(unsafe.Pointer(&val))
valHdr.Data = uintptr(unsafe.Pointer(tensor.c.lod.data))
valHdr.Len = int(tensor.c.lod.length / C.sizeof_size_t)
valHdr.Cap = int(tensor.c.lod.length / C.sizeof_size_t)
return val
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#if defined(_WIN32)
#ifdef PADDLE_ON_INFERENCE
#define PADDLE_CAPI_EXPORT __declspec(dllexport)
#else
#define PADDLE_CAPI_EXPORT __declspec(dllimport)
#endif // PADDLE_ON_INFERENCE
#else
#define PADDLE_CAPI_EXPORT __attribute__((visibility("default")))
#endif // _WIN32
#ifdef __cplusplus
extern "C" {
#endif
enum PD_DataType { PD_FLOAT32, PD_INT32, PD_INT64, PD_UINT8, PD_UNKDTYPE };
typedef enum PD_DataType PD_DataType;
typedef struct PD_PaddleBuf PD_PaddleBuf;
typedef struct PD_AnalysisConfig PD_AnalysisConfig;
typedef struct PD_Predictor PD_Predictor;
typedef struct PD_Buffer {
void* data;
size_t length;
size_t capacity;
} PD_Buffer;
typedef struct PD_ZeroCopyTensor {
PD_Buffer data;
PD_Buffer shape;
PD_Buffer lod;
PD_DataType dtype;
char* name;
} PD_ZeroCopyTensor;
PADDLE_CAPI_EXPORT extern PD_ZeroCopyTensor* PD_NewZeroCopyTensor();
PADDLE_CAPI_EXPORT extern void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor*);
PADDLE_CAPI_EXPORT extern void PD_InitZeroCopyTensor(PD_ZeroCopyTensor*);
PADDLE_CAPI_EXPORT extern void PD_DestroyZeroCopyTensor(PD_ZeroCopyTensor*);
PADDLE_CAPI_EXPORT extern void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor*);
typedef struct PD_ZeroCopyData {
char* name;
void* data;
PD_DataType dtype;
int* shape;
int shape_size;
} PD_ZeroCopyData;
typedef struct InTensorShape {
char* name;
int* tensor_shape;
int shape_size;
} InTensorShape;
PADDLE_CAPI_EXPORT extern PD_PaddleBuf* PD_NewPaddleBuf();
PADDLE_CAPI_EXPORT extern void PD_DeletePaddleBuf(PD_PaddleBuf* buf);
PADDLE_CAPI_EXPORT extern void PD_PaddleBufResize(PD_PaddleBuf* buf,
size_t length);
PADDLE_CAPI_EXPORT extern void PD_PaddleBufReset(PD_PaddleBuf* buf, void* data,
size_t length);
PADDLE_CAPI_EXPORT extern bool PD_PaddleBufEmpty(PD_PaddleBuf* buf);
PADDLE_CAPI_EXPORT extern void* PD_PaddleBufData(PD_PaddleBuf* buf);
PADDLE_CAPI_EXPORT extern size_t PD_PaddleBufLength(PD_PaddleBuf* buf);
// PaddleTensor
typedef struct PD_Tensor PD_Tensor;
PADDLE_CAPI_EXPORT extern PD_Tensor* PD_NewPaddleTensor();
PADDLE_CAPI_EXPORT extern void PD_DeletePaddleTensor(PD_Tensor* tensor);
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorName(PD_Tensor* tensor,
char* name);
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorDType(PD_Tensor* tensor,
PD_DataType dtype);
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorData(PD_Tensor* tensor,
PD_PaddleBuf* buf);
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorShape(PD_Tensor* tensor,
int* shape, int size);
PADDLE_CAPI_EXPORT extern const char* PD_GetPaddleTensorName(
const PD_Tensor* tensor);
PADDLE_CAPI_EXPORT extern PD_DataType PD_GetPaddleTensorDType(
const PD_Tensor* tensor);
PADDLE_CAPI_EXPORT extern PD_PaddleBuf* PD_GetPaddleTensorData(
const PD_Tensor* tensor);
PADDLE_CAPI_EXPORT extern const int* PD_GetPaddleTensorShape(
const PD_Tensor* tensor, int* size);
// AnalysisPredictor
PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config,
PD_Tensor* inputs, int in_size,
PD_Tensor** output_data,
int* out_size, int batch_size);
PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(
const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int* out_size);
// AnalysisConfig
enum Precision { kFloat32 = 0, kInt8, kHalf };
typedef enum Precision Precision;
PADDLE_CAPI_EXPORT extern PD_AnalysisConfig* PD_NewAnalysisConfig();
PADDLE_CAPI_EXPORT extern void PD_DeleteAnalysisConfig(
PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetModel(PD_AnalysisConfig* config,
const char* model_dir,
const char* params_path);
PADDLE_CAPI_EXPORT
extern void PD_SetProgFile(PD_AnalysisConfig* config, const char* x);
PADDLE_CAPI_EXPORT extern void PD_SetParamsFile(PD_AnalysisConfig* config,
const char* x);
PADDLE_CAPI_EXPORT extern void PD_SetOptimCacheDir(PD_AnalysisConfig* config,
const char* opt_cache_dir);
PADDLE_CAPI_EXPORT extern const char* PD_ModelDir(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern const char* PD_ProgFile(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern const char* PD_ParamsFile(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableUseGpu(PD_AnalysisConfig* config,
int memory_pool_init_size_mb,
int device_id);
PADDLE_CAPI_EXPORT extern void PD_DisableGpu(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_UseGpu(const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern int PD_GpuDeviceId(const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern int PD_MemoryPoolInitSizeMb(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern float PD_FractionOfGpuMemoryForPool(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableCUDNN(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_CudnnEnabled(const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SwitchIrOptim(PD_AnalysisConfig* config,
bool x);
PADDLE_CAPI_EXPORT extern bool PD_IrOptim(const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SwitchUseFeedFetchOps(
PD_AnalysisConfig* config, bool x);
PADDLE_CAPI_EXPORT extern bool PD_UseFeedFetchOpsEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SwitchSpecifyInputNames(
PD_AnalysisConfig* config, bool x);
PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
PD_AnalysisConfig* config, int workspace_size, int max_batch_size,
int min_subgraph_size, Precision precision, bool use_static,
bool use_calib_mode);
PADDLE_CAPI_EXPORT extern bool PD_TensorrtEngineEnabled(
const PD_AnalysisConfig* config);
typedef struct PD_MaxInputShape {
char* name;
int* shape;
int shape_size;
} PD_MaxInputShape;
PADDLE_CAPI_EXPORT extern void PD_SwitchIrDebug(PD_AnalysisConfig* config,
bool x);
PADDLE_CAPI_EXPORT extern void PD_EnableMKLDNN(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetMkldnnCacheCapacity(
PD_AnalysisConfig* config, int capacity);
PADDLE_CAPI_EXPORT extern bool PD_MkldnnEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetCpuMathLibraryNumThreads(
PD_AnalysisConfig* config, int cpu_math_library_num_threads);
PADDLE_CAPI_EXPORT extern int PD_CpuMathLibraryNumThreads(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer(
PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config,
const char* prog_buffer,
size_t prog_buffer_size,
const char* params_buffer,
size_t params_buffer_size);
PADDLE_CAPI_EXPORT extern bool PD_ModelFromMemory(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableMemoryOptim(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_MemoryOptimEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableProfile(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_ProfileEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetInValid(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_IsValid(const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_DisableGlogInfo(PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_DeletePass(PD_AnalysisConfig* config,
char* pass_name);
PADDLE_CAPI_EXPORT extern PD_Predictor* PD_NewPredictor(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_DeletePredictor(PD_Predictor* predictor);
PADDLE_CAPI_EXPORT extern int PD_GetInputNum(const PD_Predictor*);
PADDLE_CAPI_EXPORT extern int PD_GetOutputNum(const PD_Predictor*);
PADDLE_CAPI_EXPORT extern const char* PD_GetInputName(const PD_Predictor*, int);
PADDLE_CAPI_EXPORT extern const char* PD_GetOutputName(const PD_Predictor*,
int);
PADDLE_CAPI_EXPORT extern void PD_SetZeroCopyInput(
PD_Predictor* predictor, const PD_ZeroCopyTensor* tensor);
PADDLE_CAPI_EXPORT extern void PD_GetZeroCopyOutput(PD_Predictor* predictor,
PD_ZeroCopyTensor* tensor);
PADDLE_CAPI_EXPORT extern void PD_ZeroCopyRun(PD_Predictor* predictor);
#ifdef __cplusplus
} // extern "C"
#endif
package main
import (
"flag"
"log"
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/ocr"
)
var (
confFile string
image string
imageDir string
useServering bool
port string
)
func init() {
flag.StringVar(&confFile, "config", "config/conf.yaml", "config from ocr system. If not given, will use default config.")
flag.StringVar(&image, "image", "", "image to predict. if not given, will use image_dir")
flag.StringVar(&imageDir, "image_dir", "", "imgs in dir to be predicted. if not given, will check servering")
flag.BoolVar(&useServering, "use_servering", false, "whether to use ocr server. [default: false]")
flag.StringVar(&port, "port", "18600", "which port to serve ocr server. [default: 18600].")
}
func main() {
flag.Parse()
sys := ocr.NewOCRSystem(confFile, nil)
if image != "" {
img := ocr.ReadImage(image)
defer img.Close()
results := sys.PredictOneImage(img)
for _, res := range results {
log.Println(res)
}
return
}
if imageDir != "" {
results := sys.PredictDirImages(imageDir)
for k, vs := range results {
log.Printf("======== image: %v =======\n", k)
for _, res := range vs {
log.Println(res)
}
}
}
if useServering {
sys.StartServer(port)
}
}
......@@ -15,6 +15,7 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppocr.utils.utility import enable_static_mode
import os
import sys
......@@ -43,7 +44,6 @@ logger = initial_logger()
from ppocr.utils.save_load import init_model
def main():
startup_prog, eval_program, place, config, _ = program.preprocess()
......@@ -72,4 +72,5 @@ def main():
if __name__ == '__main__':
enable_static_mode()
main()
......@@ -191,10 +191,13 @@ if __name__ == "__main__":
if count > 0:
total_time += elapse
count += 1
print("Predict time of %s:" % image_file, elapse)
logger.info("The predicted time of img: {} is {}:".format(image_file,
elapse))
src_im = utility.draw_text_det_res(dt_boxes, image_file)
img_name_pure = os.path.split(image_file)[-1] # image_file.split("/")[-1]
img_path = os.path.join(draw_img_save, "det_res_%s" % img_name_pure)
cv2.imwrite(img_path, src_im)
img_name_pure = image_file.split("/")[-1]
cv2.imwrite(
os.path.join(draw_img_save, "det_res_%s" % img_name_pure), src_im)
logger.info("The visualized img saved in {}".format(
os.path.join(draw_img_save, "det_res_%s" % img_name_pure)))
if count > 1:
print("Avg Time:", total_time / (count - 1))
logger.info("Avg Time:", total_time / (count - 1))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册