提交 bc7d441e 编写于 作者: W WenmuZhou

Merge remote-tracking branch 'upstream/release/2.1' into 2.1_require

...@@ -147,6 +147,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -147,6 +147,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.itemsToShapesbox = {} self.itemsToShapesbox = {}
self.shapesToItemsbox = {} self.shapesToItemsbox = {}
self.prevLabelText = getStr('tempLabel') self.prevLabelText = getStr('tempLabel')
self.noLabelText = getStr('nullLabel')
self.model = 'paddle' self.model = 'paddle'
self.PPreader = None self.PPreader = None
self.autoSaveNum = 5 self.autoSaveNum = 5
...@@ -1020,7 +1021,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1020,7 +1021,7 @@ class MainWindow(QMainWindow, WindowMixin):
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points])) item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
self.updateComboBox() self.updateComboBox()
def updateComboBox(self): # TODO:貌似没用 def updateComboBox(self):
# Get the unique labels and add them to the Combobox. # Get the unique labels and add them to the Combobox.
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())] itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
...@@ -1040,7 +1041,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1040,7 +1041,7 @@ class MainWindow(QMainWindow, WindowMixin):
return dict(label=s.label, # str return dict(label=s.label, # str
line_color=s.line_color.getRgb(), line_color=s.line_color.getRgb(),
fill_color=s.fill_color.getRgb(), fill_color=s.fill_color.getRgb(),
points=[(p.x(), p.y()) for p in s.points], # QPonitF points=[(int(p.x()), int(p.y())) for p in s.points], # QPonitF
# add chris # add chris
difficult=s.difficult) # bool difficult=s.difficult) # bool
...@@ -1069,7 +1070,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1069,7 +1070,7 @@ class MainWindow(QMainWindow, WindowMixin):
# print('Image:{0} -> Annotation:{1}'.format(self.filePath, annotationFilePath)) # print('Image:{0} -> Annotation:{1}'.format(self.filePath, annotationFilePath))
return True return True
except: except:
self.errorMessage(u'Error saving label data') self.errorMessage(u'Error saving label data', u'Error saving label data')
return False return False
def copySelectedShape(self): def copySelectedShape(self):
...@@ -1802,10 +1803,14 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1802,10 +1803,14 @@ class MainWindow(QMainWindow, WindowMixin):
result.insert(0, box) result.insert(0, box)
print('result in reRec is ', result) print('result in reRec is ', result)
self.result_dic.append(result) self.result_dic.append(result)
if result[1][0] == shape.label: else:
print('label no change') print('Can not recognise the box')
else: self.result_dic.append([box,(self.noLabelText,0)])
rec_flag += 1
if self.noLabelText == shape.label or result[1][0] == shape.label:
print('label no change')
else:
rec_flag += 1
if len(self.result_dic) > 0 and rec_flag > 0: if len(self.result_dic) > 0 and rec_flag > 0:
self.saveFile(mode='Auto') self.saveFile(mode='Auto')
...@@ -1836,9 +1841,14 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1836,9 +1841,14 @@ class MainWindow(QMainWindow, WindowMixin):
print('label no change') print('label no change')
else: else:
shape.label = result[1][0] shape.label = result[1][0]
self.singleLabel(shape) else:
self.setDirty() print('Can not recognise the box')
print(box) if self.noLabelText == shape.label:
print('label no change')
else:
shape.label = self.noLabelText
self.singleLabel(shape)
self.setDirty()
def autolcm(self): def autolcm(self):
vbox = QVBoxLayout() vbox = QVBoxLayout()
......
...@@ -45,7 +45,7 @@ class Canvas(QWidget): ...@@ -45,7 +45,7 @@ class Canvas(QWidget):
CREATE, EDIT = list(range(2)) CREATE, EDIT = list(range(2))
_fill_drawing = False # draw shadows _fill_drawing = False # draw shadows
epsilon = 11.0 epsilon = 5.0
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Canvas, self).__init__(*args, **kwargs) super(Canvas, self).__init__(*args, **kwargs)
......
此差异已折叠。
...@@ -87,6 +87,7 @@ creatPolygon=四点标注 ...@@ -87,6 +87,7 @@ creatPolygon=四点标注
drawSquares=正方形标注 drawSquares=正方形标注
saveRec=保存识别结果 saveRec=保存识别结果
tempLabel=待识别 tempLabel=待识别
nullLabel=无法识别
steps=操作步骤 steps=操作步骤
choseModelLg=选择模型语言 choseModelLg=选择模型语言
cancel=取消 cancel=取消
......
...@@ -77,7 +77,7 @@ IR=Image Resize ...@@ -77,7 +77,7 @@ IR=Image Resize
autoRecognition=Auto Recognition autoRecognition=Auto Recognition
reRecognition=Re-recognition reRecognition=Re-recognition
mfile=File mfile=File
medit=Eidt medit=Edit
mview=View mview=View
mhelp=Help mhelp=Help
iconList=Icon List iconList=Icon List
...@@ -87,6 +87,7 @@ creatPolygon=Create Quadrilateral ...@@ -87,6 +87,7 @@ creatPolygon=Create Quadrilateral
drawSquares=Draw Squares drawSquares=Draw Squares
saveRec=Save Recognition Result saveRec=Save Recognition Result
tempLabel=TEMPORARY tempLabel=TEMPORARY
nullLabel=NULL
steps=Steps steps=Steps
choseModelLg=Choose Model Language choseModelLg=Choose Model Language
cancel=Cancel cancel=Cancel
......
...@@ -21,7 +21,7 @@ PaddleOCR supports both dynamic graph and static graph programming paradigm ...@@ -21,7 +21,7 @@ PaddleOCR supports both dynamic graph and static graph programming paradigm
- Ultra lightweight ppocr_mobile series models: detection (3.0M) + direction classifier (1.4M) + recognition (5.0M) = 9.4M - Ultra lightweight ppocr_mobile series models: detection (3.0M) + direction classifier (1.4M) + recognition (5.0M) = 9.4M
- General ppocr_server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M - General ppocr_server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M
- Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition - Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition
- Support more than 80 kinds of multi-language recognition models: [For details](./doc/doc_ch/multi_languages.md) - Support more than 80 kinds of multi-language recognition models: [For details](./doc/doc_en/multi_languages_en.md)
- Rich toolkits related to the OCR areas - Rich toolkits related to the OCR areas
- Semi-automatic data annotation tool, i.e., PPOCRLabel: support fast and efficient data annotation - Semi-automatic data annotation tool, i.e., PPOCRLabel: support fast and efficient data annotation
- Data synthesis tool, i.e., Style-Text: easy to synthesize a large number of images which are similar to the target scene image - Data synthesis tool, i.e., Style-Text: easy to synthesize a large number of images which are similar to the target scene image
...@@ -97,7 +97,7 @@ For a new language request, please refer to [Guideline for new language_requests ...@@ -97,7 +97,7 @@ For a new language request, please refer to [Guideline for new language_requests
- [Quick Inference Based on PIP](./doc/doc_en/whl_en.md) - [Quick Inference Based on PIP](./doc/doc_en/whl_en.md)
- [Python Inference](./doc/doc_en/inference_en.md) - [Python Inference](./doc/doc_en/inference_en.md)
- [C++ Inference](./deploy/cpp_infer/readme_en.md) - [C++ Inference](./deploy/cpp_infer/readme_en.md)
- [Serving](./deploy/pdserving/README.md) - [Serving](./deploy/hubserving/readme_en.md)
- [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md) - [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md)
- [Benchmark](./doc/doc_en/benchmark_en.md) - [Benchmark](./doc/doc_en/benchmark_en.md)
- Data Annotation and Synthesis - Data Annotation and Synthesis
......
...@@ -89,7 +89,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -89,7 +89,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [基于pip安装whl包快速推理](./doc/doc_ch/whl.md) - [基于pip安装whl包快速推理](./doc/doc_ch/whl.md)
- [基于Python脚本预测引擎推理](./doc/doc_ch/inference.md) - [基于Python脚本预测引擎推理](./doc/doc_ch/inference.md)
- [基于C++预测引擎推理](./deploy/cpp_infer/readme.md) - [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
- [服务化部署](./deploy/pdserving/README_CN.md) - [服务化部署](./deploy/hubserving/readme_en.md)
- [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md) - [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)
- [Benchmark](./doc/doc_ch/benchmark.md) - [Benchmark](./doc/doc_ch/benchmark.md)
- 数据集 - 数据集
......
...@@ -62,20 +62,21 @@ PostProcess: ...@@ -62,20 +62,21 @@ PostProcess:
mode: fast # fast or slow two ways mode: fast # fast or slow two ways
Metric: Metric:
name: E2EMetric name: E2EMetric
gt_mat_dir: # the dir of gt_mat gt_mat_dir: ./train_data/total_text/gt # the dir of gt_mat
character_dict_path: ppocr/utils/ic15_dict.txt character_dict_path: ppocr/utils/ic15_dict.txt
main_indicator: f_score_e2e main_indicator: f_score_e2e
Train: Train:
dataset: dataset:
name: PGDataSet name: PGDataSet
label_file_list: [.././train_data/total_text/train/] data_dir: ./train_data/total_text/train
label_file_list: [./train_data/total_text/train/]
ratio_list: [1.0] ratio_list: [1.0]
data_format: icdar #two data format: icdar/textnet
transforms: transforms:
- DecodeImage: # load image - DecodeImage: # load image
img_mode: BGR img_mode: BGR
channel_first: False channel_first: False
- E2ELabelEncode:
- PGProcessTrain: - PGProcessTrain:
batch_size: 14 # same as loader: batch_size_per_card batch_size: 14 # same as loader: batch_size_per_card
min_crop_size: 24 min_crop_size: 24
...@@ -92,13 +93,12 @@ Train: ...@@ -92,13 +93,12 @@ Train:
Eval: Eval:
dataset: dataset:
name: PGDataSet name: PGDataSet
data_dir: ./train_data/ data_dir: ./train_data/total_text/test
label_file_list: [./train_data/total_text/test/] label_file_list: [./train_data/total_text/test/]
transforms: transforms:
- DecodeImage: # load image - DecodeImage: # load image
img_mode: RGB img_mode: RGB
channel_first: False channel_first: False
- E2ELabelEncode:
- E2EResizeForTest: - E2EResizeForTest:
max_side_len: 768 max_side_len: 768
- NormalizeImage: - NormalizeImage:
...@@ -108,7 +108,7 @@ Eval: ...@@ -108,7 +108,7 @@ Eval:
order: 'hwc' order: 'hwc'
- ToCHWImage: - ToCHWImage:
- KeepKeys: - KeepKeys:
keep_keys: [ 'image', 'shape', 'polys', 'strs', 'tags', 'img_id'] keep_keys: [ 'image', 'shape', 'img_id']
loader: loader:
shuffle: False shuffle: False
drop_last: False drop_last: False
......
...@@ -120,7 +120,7 @@ ...@@ -120,7 +120,7 @@
#### Q1.1.10:PaddleOCR中,对于模型预测加速,CPU加速的途径有哪些?基于TenorRT加速GPU对输入有什么要求? #### Q1.1.10:PaddleOCR中,对于模型预测加速,CPU加速的途径有哪些?基于TenorRT加速GPU对输入有什么要求?
**A**:(1)CPU可以使用mkldnn进行加速;对于python inference的话,可以把enable_mkldnn改为true,[参考代码](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/tools/infer/utility.py#L84),对于cpp inference的话,在配置文件里面配置use_mkldnn 1即可,[参考代码](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/deploy/cpp_infer/tools/config.txt#L6) **A**:(1)CPU可以使用mkldnn进行加速;对于python inference的话,可以把enable_mkldnn改为true,[参考代码](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/tools/infer/utility.py#L99),对于cpp inference的话,在配置文件里面配置use_mkldnn 1即可,[参考代码](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/deploy/cpp_infer/tools/config.txt#L6)
(2)GPU需要注意变长输入问题等,TRT6 之后才支持变长输入 (2)GPU需要注意变长输入问题等,TRT6 之后才支持变长输入
......
...@@ -11,7 +11,7 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅 ...@@ -11,7 +11,7 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅
其中英文模型支持,大小写字母和常见标点的检测识别,并优化了空格字符的识别: 其中英文模型支持,大小写字母和常见标点的检测识别,并优化了空格字符的识别:
<div align="center"> <div align="center">
<img src="../imgs_results/multi_lang/en_1.jpg" width="400" height="600"> <img src="../imgs_results/multi_lang/img_12.jpg" width="900" height="300">
</div> </div>
小语种模型覆盖了拉丁语系、阿拉伯语系、中文繁体、韩语、日语等等: 小语种模型覆盖了拉丁语系、阿拉伯语系、中文繁体、韩语、日语等等:
...@@ -19,6 +19,8 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅 ...@@ -19,6 +19,8 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅
<div align="center"> <div align="center">
<img src="../imgs_results/multi_lang/japan_2.jpg" width="600" height="300"> <img src="../imgs_results/multi_lang/japan_2.jpg" width="600" height="300">
<img src="../imgs_results/multi_lang/french_0.jpg" width="300" height="300"> <img src="../imgs_results/multi_lang/french_0.jpg" width="300" height="300">
<img src="../imgs_results/multi_lang/korean_0.jpg" width="500" height="300">
<img src="../imgs_results/multi_lang/arabic_0.jpg" width="300" height="300">
</div> </div>
...@@ -30,14 +32,9 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅 ...@@ -30,14 +32,9 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅
- [2 快速使用](#快速使用) - [2 快速使用](#快速使用)
- [2.1 命令行运行](#命令行运行) - [2.1 命令行运行](#命令行运行)
- [2.1.1 整图预测](#bash_检测+识别)
- [2.1.2 识别预测](#bash_识别)
- [2.1.3 检测预测](#bash_检测)
- [2.2 python 脚本运行](#python_脚本运行) - [2.2 python 脚本运行](#python_脚本运行)
- [2.2.1 整图预测](#python_检测+识别)
- [2.2.2 识别预测](#python_识别)
- [2.2.3 检测预测](#python_检测)
- [3 自定义训练](#自定义训练) - [3 自定义训练](#自定义训练)
- [4 预测部署](#预测部署)
- [4 支持语种及缩写](#语种缩写) - [4 支持语种及缩写](#语种缩写)
<a name="安装"></a> <a name="安装"></a>
...@@ -108,8 +105,6 @@ paddleocr --image_dir doc/imgs/japan_2.jpg --lang=japan ...@@ -108,8 +105,6 @@ paddleocr --image_dir doc/imgs/japan_2.jpg --lang=japan
paddleocr --image_dir doc/imgs_words/japan/1.jpg --det false --lang=japan paddleocr --image_dir doc/imgs_words/japan/1.jpg --det false --lang=japan
``` ```
![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_words/japan/1.jpg)
结果是一个tuple,返回识别结果和识别置信度 结果是一个tuple,返回识别结果和识别置信度
```text ```text
...@@ -145,6 +140,9 @@ from paddleocr import PaddleOCR, draw_ocr ...@@ -145,6 +140,9 @@ from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR(lang="korean") # 首次执行会自动下载模型文件 ocr = PaddleOCR(lang="korean") # 首次执行会自动下载模型文件
img_path = 'doc/imgs/korean_1.jpg ' img_path = 'doc/imgs/korean_1.jpg '
result = ocr.ocr(img_path) result = ocr.ocr(img_path)
# 可通过参数控制单独执行识别、检测
# result = ocr.ocr(img_path, det=False) 只执行识别
# result = ocr.ocr(img_path, rec=False) 只执行检测
# 打印检测框和识别结果 # 打印检测框和识别结果
for line in result: for line in result:
print(line) print(line)
...@@ -166,59 +164,7 @@ im_show.save('result.jpg') ...@@ -166,59 +164,7 @@ im_show.save('result.jpg')
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg" width="800"> <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg" width="800">
</div> </div>
* 识别预测 ppocr 还支持方向分类, 更多使用方式请参考:[whl包使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md)
```
from paddleocr import PaddleOCR
ocr = PaddleOCR(lang="german")
img_path = 'PaddleOCR/doc/imgs_words/german/1.jpg'
result = ocr.ocr(img_path, det=False, cls=True)
for line in result:
print(line)
```
![](../imgs_words/german/1.jpg)
结果是一个tuple,只包含识别结果和识别置信度
```
('leider auch jetzt', 0.97538936)
```
* 检测预测
```python
from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
result = ocr.ocr(img_path, rec=False)
for line in result:
print(line)
# 显示结果
from PIL import Image
image = Image.open(img_path).convert('RGB')
im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
结果是一个list,每个item只包含文本框
```bash
[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
......
```
结果可视化 :
<div align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/whl/12_det.jpg" width="800">
</div>
ppocr 还支持方向分类, 更多使用方式请参考:[whl包使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md)
<a name="自定义训练"></a> <a name="自定义训练"></a>
## 3 自定义训练 ## 3 自定义训练
...@@ -229,84 +175,58 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别 ...@@ -229,84 +175,58 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别
具体数据准备、训练过程可参考:[文本检测](../doc_ch/detection.md)[文本识别](../doc_ch/recognition.md),更多功能如预测部署、 具体数据准备、训练过程可参考:[文本检测](../doc_ch/detection.md)[文本识别](../doc_ch/recognition.md),更多功能如预测部署、
数据标注等功能可以阅读完整的[文档教程](../../README_ch.md) 数据标注等功能可以阅读完整的[文档教程](../../README_ch.md)
<a name="预测部署"></a>
## 4 预测部署
除了安装whl包进行快速预测,ppocr 也提供了多种预测部署方式,如有需求可阅读相关文档:
- [基于Python脚本预测引擎推理](./doc/doc_ch/inference.md)
- [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
- [服务化部署](./deploy/hubserving/readme.md)
- [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)
- [Benchmark](./doc/doc_ch/benchmark.md)
<a name="语种缩写"></a> <a name="语种缩写"></a>
## 4 支持语种及缩写 ## 5 支持语种及缩写
| 语种 | 描述 | 缩写 | | 语种 | 描述 | 缩写 | | 语种 | 描述 | 缩写 |
| --- | --- | --- | | --- | --- | --- | ---|--- | --- | --- |
|中文|chinese and english|ch| |中文|chinese and english|ch| |保加利亚文|Bulgarian |bg|
|英文|english|en| |英文|english|en| |乌克兰文|Ukranian|uk|
|法文|french|fr| |法文|french|fr| |白俄罗斯文|Belarusian|be|
|德文|german|german| |德文|german|german| |泰卢固文|Telugu |te|
|日文|japan|japan| |日文|japan|japan| | |阿巴扎文|Abaza |abq|
|韩文|korean|korean| |韩文|korean|korean| |泰米尔文|Tamil |ta|
|中文繁体|chinese traditional |chinese_cht| |中文繁体|chinese traditional |ch_tra| |南非荷兰文 |Afrikaans |af|
|意大利文| Italian |it| |意大利文| Italian |it| |阿塞拜疆文 |Azerbaijani |az|
|西班牙文|Spanish |es| |西班牙文|Spanish |es| |波斯尼亚文|Bosnian|bs|
|葡萄牙文| Portuguese|pt| |葡萄牙文| Portuguese|pt| |捷克文|Czech|cs|
|俄罗斯文|Russia|ru| |俄罗斯文|Russia|ru| |威尔士文 |Welsh |cy|
|阿拉伯文|Arabic|ar| |阿拉伯文|Arabic|ar| |丹麦文 |Danish|da|
|印地文|Hindi|hi| |印地文|Hindi|hi| |爱沙尼亚文 |Estonian |et|
|维吾尔|Uyghur|ug| |维吾尔|Uyghur|ug| |爱尔兰文 |Irish |ga|
|波斯文|Persian|fa| |波斯文|Persian|fa| |克罗地亚文|Croatian |hr|
|乌尔都文|Urdu|ur| |乌尔都文|Urdu|ur| |匈牙利文|Hungarian |hu|
|塞尔维亚文(latin)| Serbian(latin) |rs_latin| |塞尔维亚文(latin)| Serbian(latin) |rs_latin| |印尼文|Indonesian|id|
|欧西坦文|Occitan |oc| |欧西坦文|Occitan |oc| |冰岛文 |Icelandic|is|
|马拉地文|Marathi|mr| |马拉地文|Marathi|mr| |库尔德文 |Kurdish|ku|
|尼泊尔文|Nepali|ne| |尼泊尔文|Nepali|ne| |立陶宛文|Lithuanian |lt|
|塞尔维亚文(cyrillic)|Serbian(cyrillic)|rs_cyrillic| |塞尔维亚文(cyrillic)|Serbian(cyrillic)|rs_cyrillic| |拉脱维亚文 |Latvian |lv|
|保加利亚文|Bulgarian |bg| |毛利文|Maori|mi| | 达尔瓦文|Dargwa |dar|
|乌克兰文|Ukranian|uk| |马来文 |Malay|ms| | 因古什文|Ingush |inh|
|白俄罗斯文|Belarusian|be| |马耳他文 |Maltese |mt| | 拉克文|Lak |lbe|
|泰卢固文|Telugu |te| |荷兰文 |Dutch |nl| | 莱兹甘文|Lezghian |lez|
|泰米尔文|Tamil |ta| |挪威文 |Norwegian |no| |塔巴萨兰文 |Tabassaran |tab|
|南非荷兰文 |Afrikaans |af| |波兰文|Polish |pl| | 比尔哈文|Bihari |bh|
|阿塞拜疆文 |Azerbaijani |az| | 罗马尼亚文|Romanian |ro| | 迈蒂利文|Maithili |mai|
|波斯尼亚文|Bosnian|bs| | 斯洛伐克文|Slovak |sk| | 昂加文|Angika |ang|
|捷克文|Czech|cs| | 斯洛文尼亚文|Slovenian |sl| | 孟加拉文|Bhojpuri |bho|
|威尔士文 |Welsh |cy| | 阿尔巴尼亚文|Albanian |sq| | 摩揭陀文 |Magahi |mah|
|丹麦文 |Danish|da| | 瑞典文|Swedish |sv| | 那格浦尔文|Nagpur |sck|
|爱沙尼亚文 |Estonian |et| | 西瓦希里文|Swahili |sw| | 尼瓦尔文|Newari |new|
|爱尔兰文 |Irish |ga| | 塔加洛文|Tagalog |tl| | 保加利亚文 |Goan Konkani|gom|
|克罗地亚文|Croatian |hr| | 土耳其文|Turkish |tr| | 沙特阿拉伯文|Saudi Arabia|sa|
|匈牙利文|Hungarian |hu| | 乌兹别克文|Uzbek |uz| | 阿瓦尔文|Avar |ava|
|印尼文|Indonesian|id| | 越南文|Vietnamese |vi| | 阿瓦尔文|Avar |ava|
|冰岛文 |Icelandic|is| | 蒙古文|Mongolian |mn| | 阿迪赫文|Adyghe |ady|
|库尔德文 |Kurdish|ku|
|立陶宛文|Lithuanian |lt|
|拉脱维亚文 |Latvian |lv|
|毛利文|Maori|mi|
|马来文 |Malay|ms|
|马耳他文 |Maltese |mt|
|荷兰文 |Dutch |nl|
|挪威文 |Norwegian |no|
|波兰文|Polish |pl|
| 罗马尼亚文|Romanian |ro|
| 斯洛伐克文|Slovak |sk|
| 斯洛文尼亚文|Slovenian |sl|
| 阿尔巴尼亚文|Albanian |sq|
| 瑞典文|Swedish |sv|
| 西瓦希里文|Swahili |sw|
| 塔加洛文|Tagalog |tl|
| 土耳其文|Turkish |tr|
| 乌兹别克文|Uzbek |uz|
| 越南文|Vietnamese |vi|
| 蒙古文|Mongolian |mn|
| 阿巴扎文|Abaza |abq|
| 阿迪赫文|Adyghe |ady|
| 卡巴丹文|Kabardian |kbd|
| 阿瓦尔文|Avar |ava|
| 达尔瓦文|Dargwa |dar|
| 因古什文|Ingush |inh|
| 拉克文|Lak |lbe|
| 莱兹甘文|Lezghian |lez|
|塔巴萨兰文 |Tabassaran |tab|
| 比尔哈文|Bihari |bh|
| 迈蒂利文|Maithili |mai|
| 昂加文|Angika |ang|
| 孟加拉文|Bhojpuri |bho|
| 摩揭陀文 |Magahi |mah|
| 那格浦尔文|Nagpur |sck|
| 尼瓦尔文|Newari |new|
| 保加利亚文 |Goan Konkani|gom|
| 沙特阿拉伯文|Saudi Arabia|sa|
...@@ -30,6 +30,7 @@ PGNet算法细节详见[论文](https://www.aaai.org/AAAI21Papers/AAAI-2885.Wang ...@@ -30,6 +30,7 @@ PGNet算法细节详见[论文](https://www.aaai.org/AAAI21Papers/AAAI-2885.Wang
测试集:Total-Text 测试集:Total-Text
测试环境: NVIDIA Tesla V100-SXM2-16GB 测试环境: NVIDIA Tesla V100-SXM2-16GB
|PGNetA|det_precision|det_recall|det_f_score|e2e_precision|e2e_recall|e2e_f_score|FPS|下载| |PGNetA|det_precision|det_recall|det_f_score|e2e_precision|e2e_recall|e2e_f_score|FPS|下载|
| --- | --- | --- | --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|Paper|85.30|86.80|86.1|-|-|61.7|38.20 (size=640)|-| |Paper|85.30|86.80|86.1|-|-|61.7|38.20 (size=640)|-|
...@@ -93,10 +94,12 @@ total_text.txt标注文件格式如下,文件名和标注信息中间用"\t" ...@@ -93,10 +94,12 @@ total_text.txt标注文件格式如下,文件名和标注信息中间用"\t"
" 图像文件名 json.dumps编码的图像标注信息" " 图像文件名 json.dumps编码的图像标注信息"
rgb/gt_0.png [{"transcription": "EST", "points": [[1004.0,689.0],[1019.0,698.0],[1034.0,708.0],[1049.0,718.0],[1064.0,728.0],[1079.0,738.0],[1095.0,748.0],[1094.0,774.0],[1079.0,765.0],[1065.0,756.0],[1050.0,747.0],[1036.0,738.0],[1021.0,729.0],[1007.0,721.0]]}, {...}] rgb/gt_0.png [{"transcription": "EST", "points": [[1004.0,689.0],[1019.0,698.0],[1034.0,708.0],[1049.0,718.0],[1064.0,728.0],[1079.0,738.0],[1095.0,748.0],[1094.0,774.0],[1079.0,765.0],[1065.0,756.0],[1050.0,747.0],[1036.0,738.0],[1021.0,729.0],[1007.0,721.0]]}, {...}]
``` ```
json.dumps编码前的图像标注信息是包含多个字典的list,字典中的 `points` 表示文本框的四个点的坐标(x, y),从左上角的点开始顺时针排列。 json.dumps编码前的图像标注信息是包含多个字典的list,字典中的 `points` 表示文本框的四个点的坐标(x, y),从左上角的点开始顺时针排列。
`transcription` 表示当前文本框的文字,**当其内容为“###”时,表示该文本框无效,在训练时会跳过。** `transcription` 表示当前文本框的文字,**当其内容为“###”时,表示该文本框无效,在训练时会跳过。**
如果您想在其他数据集上训练,可以按照上述形式构建标注文件。 如果您想在其他数据集上训练,可以按照上述形式构建标注文件。
*PGNet支持任意点的数据输入,但是需要保证均匀标注(上下对称,左右距离一致)。在我们实验中,十四点标注要比四点标注训练效果好,可以尝试在四点标注和十四点标注上作两阶段训练*
### 启动训练 ### 启动训练
PGNet训练分为两个步骤:step1: 在合成数据上训练,得到预训练模型,此时模型精度依然较低;step2: 加载预训练模型,在totaltext数据集上训练;为快速训练,我们直接提供了step1的预训练模型。 PGNet训练分为两个步骤:step1: 在合成数据上训练,得到预训练模型,此时模型精度依然较低;step2: 加载预训练模型,在totaltext数据集上训练;为快速训练,我们直接提供了step1的预训练模型。
......
...@@ -102,14 +102,14 @@ python3 generate_multi_language_configs.py -l it \ ...@@ -102,14 +102,14 @@ python3 generate_multi_language_configs.py -l it \
| german_mobile_v2.0_rec |Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) | | german_mobile_v2.0_rec |Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) | | korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) | | japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) | | chinese_cht_mobile_v2.0_rec |Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | | te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | | ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | | ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
| latin_mobile_v2.0_rec | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) | | latin_mobile_v2.0_rec | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
| arabic_mobile_v2.0_rec | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) | | arabic_mobile_v2.0_rec | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
| cyrillic_mobile_v2.0_rec | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) | | cyrillic_mobile_v2.0_rec | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
| devanagari_mobile_v2.0_rec | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) | | devanagari_mobile_v2.0_rec | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md) For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md)
......
...@@ -13,7 +13,7 @@ Among them, the English model supports the detection and recognition of uppercas ...@@ -13,7 +13,7 @@ Among them, the English model supports the detection and recognition of uppercas
letters and common punctuation, and the recognition of space characters is optimized: letters and common punctuation, and the recognition of space characters is optimized:
<div align="center"> <div align="center">
<img src="../imgs_results/multi_lang/en_1.jpg" width="400" height="600"> <img src="../imgs_results/multi_lang/img_12.jpg" width="900" height="300">
</div> </div>
The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japanese, etc.: The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japanese, etc.:
...@@ -21,6 +21,8 @@ The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japane ...@@ -21,6 +21,8 @@ The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japane
<div align="center"> <div align="center">
<img src="../imgs_results/multi_lang/japan_2.jpg" width="600" height="300"> <img src="../imgs_results/multi_lang/japan_2.jpg" width="600" height="300">
<img src="../imgs_results/multi_lang/french_0.jpg" width="300" height="300"> <img src="../imgs_results/multi_lang/french_0.jpg" width="300" height="300">
<img src="../imgs_results/multi_lang/korean_0.jpg" width="500" height="300">
<img src="../imgs_results/multi_lang/arabic_0.jpg" width="300" height="300">
</div> </div>
This document will briefly introduce how to use the multilingual model. This document will briefly introduce how to use the multilingual model.
...@@ -31,14 +33,9 @@ This document will briefly introduce how to use the multilingual model. ...@@ -31,14 +33,9 @@ This document will briefly introduce how to use the multilingual model.
- [2 Quick Use](#Quick_Use) - [2 Quick Use](#Quick_Use)
- [2.1 Command line operation](#Command_line_operation) - [2.1 Command line operation](#Command_line_operation)
- [2.1.1 Prediction of the whole image](#bash_detection+recognition)
- [2.1.2 Recognition](#bash_Recognition)
- [2.1.3 Detection](#bash_detection)
- [2.2 python script running](#python_Script_running) - [2.2 python script running](#python_Script_running)
- [2.2.1 Whole image prediction](#python_detection+recognition)
- [2.2.2 Recognition](#python_Recognition)
- [2.2.3 Detection](#python_detection)
- [3 Custom Training](#Custom_Training) - [3 Custom Training](#Custom_Training)
- [4 Inference and Deployment](#inference)
- [4 Supported languages and abbreviations](#language_abbreviations) - [4 Supported languages and abbreviations](#language_abbreviations)
<a name="Install"></a> <a name="Install"></a>
...@@ -143,6 +140,9 @@ from paddleocr import PaddleOCR, draw_ocr ...@@ -143,6 +140,9 @@ from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR(lang="korean") # The model file will be downloaded automatically when executed for the first time ocr = PaddleOCR(lang="korean") # The model file will be downloaded automatically when executed for the first time
img_path ='doc/imgs/korean_1.jpg' img_path ='doc/imgs/korean_1.jpg'
result = ocr.ocr(img_path) result = ocr.ocr(img_path)
# Recognition and detection can be performed separately through parameter control
# result = ocr.ocr(img_path, det=False) Only perform recognition
# result = ocr.ocr(img_path, rec=False) Only perform detection
# Print detection frame and recognition result # Print detection frame and recognition result
for line in result: for line in result:
print(line) print(line)
...@@ -162,54 +162,6 @@ Visualization of results: ...@@ -162,54 +162,6 @@ Visualization of results:
![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg) ![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg)
* Recognition
```
from paddleocr import PaddleOCR
ocr = PaddleOCR(lang="german")
img_path ='PaddleOCR/doc/imgs_words/german/1.jpg'
result = ocr.ocr(img_path, det=False, cls=True)
for line in result:
print(line)
```
![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_words/german/1.jpg)
The result is a tuple, which only contains the recognition result and recognition confidence
```
('leider auch jetzt', 0.97538936)
```
* Detection
```python
from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path ='PaddleOCR/doc/imgs_en/img_12.jpg'
result = ocr.ocr(img_path, rec=False)
for line in result:
print(line)
# show result
from PIL import Image
image = Image.open(img_path).convert('RGB')
im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
The result is a list, each item contains only text boxes
```bash
[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
......
```
Visualization of results:
![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/whl/12_det.jpg)
ppocr also supports direction classification. For more usage methods, please refer to: [whl package instructions](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md). ppocr also supports direction classification. For more usage methods, please refer to: [whl package instructions](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md).
<a name="Custom_training"></a> <a name="Custom_training"></a>
...@@ -221,84 +173,61 @@ Modify the training data path, dictionary and other parameters. ...@@ -221,84 +173,61 @@ Modify the training data path, dictionary and other parameters.
For specific data preparation and training process, please refer to: [Text Detection](../doc_en/detection_en.md), [Text Recognition](../doc_en/recognition_en.md), more functions such as predictive deployment, For specific data preparation and training process, please refer to: [Text Detection](../doc_en/detection_en.md), [Text Recognition](../doc_en/recognition_en.md), more functions such as predictive deployment,
For functions such as data annotation, you can read the complete [Document Tutorial](../../README.md). For functions such as data annotation, you can read the complete [Document Tutorial](../../README.md).
<a name="language_abbreviation"></a>
## 4 Support languages and abbreviations <a name="inference"></a>
## 4 Inference and Deployment
| Language | Abbreviation |
| --- | --- | In addition to installing the whl package for quick forecasting,
|chinese and english|ch| ppocr also provides a variety of forecasting deployment methods.
|english|en| If necessary, you can read related documents:
|french|fr|
|german|german| - [Python Inference](./doc/doc_en/inference_en.md)
|japan|japan| - [C++ Inference](./deploy/cpp_infer/readme_en.md)
|korean|korean| - [Serving](./deploy/hubserving/readme_en.md)
|chinese traditional |chinese_cht| - [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md)
| Italian |it| - [Benchmark](./doc/doc_en/benchmark_en.md)
|Spanish |es|
| Portuguese|pt|
|Russia|ru| <a name="language_abbreviations"></a>
|Arabic|ar| ## 5 Support languages and abbreviations
|Hindi|hi|
|Uyghur|ug| | Language | Abbreviation | | Language | Abbreviation |
|Persian|fa| | --- | --- | --- | --- | --- |
|Urdu|ur| |chinese and english|ch| |Arabic|ar|
| Serbian(latin) |rs_latin| |english|en| |Hindi|hi|
|Occitan |oc| |french|fr| |Uyghur|ug|
|Marathi|mr| |german|german| |Persian|fa|
|Nepali|ne| |japan|japan| |Urdu|ur|
|Serbian(cyrillic)|rs_cyrillic| |korean|korean| | Serbian(latin) |rs_latin|
|Bulgarian |bg| |chinese traditional |ch_tra| |Occitan |oc|
|Ukranian|uk| | Italian |it| |Marathi|mr|
|Belarusian|be| |Spanish |es| |Nepali|ne|
|Telugu |te| | Portuguese|pt| |Serbian(cyrillic)|rs_cyrillic|
|Tamil |ta| |Russia|ru||Bulgarian |bg|
|Afrikaans |af| |Ukranian|uk| |Estonian |et|
|Azerbaijani |az| |Belarusian|be| |Irish |ga|
|Bosnian|bs| |Telugu |te| |Croatian |hr|
|Czech|cs| |Saudi Arabia|sa| |Hungarian |hu|
|Welsh |cy| |Tamil |ta| |Indonesian|id|
|Danish|da| |Afrikaans |af| |Icelandic|is|
|Estonian |et| |Azerbaijani |az||Kurdish|ku|
|Irish |ga| |Bosnian|bs| |Lithuanian |lt|
|Croatian |hr| |Czech|cs| |Latvian |lv|
|Hungarian |hu| |Welsh |cy| |Maori|mi|
|Indonesian|id| |Danish|da| |Malay|ms|
|Icelandic|is| |Maltese |mt| |Adyghe |ady|
|Kurdish|ku| |Dutch |nl| |Kabardian |kbd|
|Lithuanian |lt| |Norwegian |no| |Avar |ava|
|Latvian |lv| |Polish |pl| |Dargwa |dar|
|Maori|mi| |Romanian |ro| |Ingush |inh|
|Malay|ms| |Slovak |sk| |Lak |lbe|
|Maltese |mt| |Slovenian |sl| |Lezghian |lez|
|Dutch |nl| |Albanian |sq| |Tabassaran |tab|
|Norwegian |no| |Swedish |sv| |Bihari |bh|
|Polish |pl| |Swahili |sw| |Maithili |mai|
|Romanian |ro| |Tagalog |tl| |Angika |ang|
|Slovak |sk| |Turkish |tr| |Bhojpuri |bho|
|Slovenian |sl| |Uzbek |uz| |Magahi |mah|
|Albanian |sq| |Vietnamese |vi| |Nagpur |sck|
|Swedish |sv| |Mongolian |mn| |Newari |new|
|Swahili |sw| |Abaza |abq| |Goan Konkani|gom|
|Tagalog |tl|
|Turkish |tr|
|Uzbek |uz|
|Vietnamese |vi|
|Mongolian |mn|
|Abaza |abq|
|Adyghe |ady|
|Kabardian |kbd|
|Avar |ava|
|Dargwa |dar|
|Ingush |inh|
|Lak |lbe|
|Lezghian |lez|
|Tabassaran |tab|
|Bihari |bh|
|Maithili |mai|
|Angika |ang|
|Bhojpuri |bho|
|Magahi |mah|
|Nagpur |sck|
|Newari |new|
|Goan Konkani|gom|
|Saudi Arabia|sa|
...@@ -93,12 +93,14 @@ rgb/gt_0.png [{"transcription": "EST", "points": [[1004.0,689.0],[1019.0,698. ...@@ -93,12 +93,14 @@ rgb/gt_0.png [{"transcription": "EST", "points": [[1004.0,689.0],[1019.0,698.
``` ```
The image annotation after **json.dumps()** encoding is a list containing multiple dictionaries. The image annotation after **json.dumps()** encoding is a list containing multiple dictionaries.
The `points` in the dictionary represent the coordinates (x, y) of the four points of the text box, arranged clockwise from the point at the upper left corner. The `points` in the dictionary represent the coordinates (x, y) of the fourteen points of the text box, arranged clockwise from the point at the upper left corner.
`transcription` represents the text of the current text box. **When its content is "###" it means that the text box is invalid and will be skipped during training.** `transcription` represents the text of the current text box. **When its content is "###" it means that the text box is invalid and will be skipped during training.**
If you want to train PaddleOCR on other datasets, please build the annotation file according to the above format. If you want to train PaddleOCR on other datasets, please build the annotation file according to the above format.
*PGNet supports data input of any point, but it needs to ensure uniform labeling (upper and lower symmetry, left and right distance is consistent). In our experiment, the training effect of fourteen points tagging is better than that of four points tagging. We can try to do two-stage training on four points tagging and fourteen points tagging.*
### Start Training ### Start Training
......
...@@ -187,29 +187,31 @@ class CTCLabelEncode(BaseRecLabelEncode): ...@@ -187,29 +187,31 @@ class CTCLabelEncode(BaseRecLabelEncode):
return dict_character return dict_character
class E2ELabelEncode(BaseRecLabelEncode): class E2ELabelEncode(object):
def __init__(self, def __init__(self, **kwargs):
max_text_length, pass
character_dict_path=None,
character_type='EN',
use_space_char=False,
**kwargs):
super(E2ELabelEncode,
self).__init__(max_text_length, character_dict_path,
character_type, use_space_char)
self.pad_num = len(self.dict) # the length to pad
def __call__(self, data): def __call__(self, data):
texts = data['strs'] import json
temp_texts = [] label = data['label']
for text in texts: label = json.loads(label)
text = text.lower() nBox = len(label)
text = self.encode(text) boxes, txts, txt_tags = [], [], []
if text is None: for bno in range(0, nBox):
return None box = label[bno]['points']
text = text + [self.pad_num] * (self.max_text_len - len(text)) txt = label[bno]['transcription']
temp_texts.append(text) boxes.append(box)
data['strs'] = np.array(temp_texts) txts.append(txt)
if txt in ['*', '###']:
txt_tags.append(True)
else:
txt_tags.append(False)
boxes = np.array(boxes, dtype=np.float32)
txt_tags = np.array(txt_tags, dtype=np.bool)
data['polys'] = boxes
data['texts'] = txts
data['ignore_tags'] = txt_tags
return data return data
......
...@@ -88,7 +88,7 @@ class PGProcessTrain(object): ...@@ -88,7 +88,7 @@ class PGProcessTrain(object):
return min_area_quad return min_area_quad
def check_and_validate_polys(self, polys, tags, xxx_todo_changeme): def check_and_validate_polys(self, polys, tags, im_size):
""" """
check so that the text poly is in the same direction, check so that the text poly is in the same direction,
and also filter some invalid polygons and also filter some invalid polygons
...@@ -96,7 +96,7 @@ class PGProcessTrain(object): ...@@ -96,7 +96,7 @@ class PGProcessTrain(object):
:param tags: :param tags:
:return: :return:
""" """
(h, w) = xxx_todo_changeme (h, w) = im_size
if polys.shape[0] == 0: if polys.shape[0] == 0:
return polys, np.array([]), np.array([]) return polys, np.array([]), np.array([])
polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
...@@ -750,8 +750,8 @@ class PGProcessTrain(object): ...@@ -750,8 +750,8 @@ class PGProcessTrain(object):
input_size = 512 input_size = 512
im = data['image'] im = data['image']
text_polys = data['polys'] text_polys = data['polys']
text_tags = data['tags'] text_tags = data['ignore_tags']
text_strs = data['strs'] text_strs = data['texts']
h, w, _ = im.shape h, w, _ = im.shape
text_polys, text_tags, hv_tags = self.check_and_validate_polys( text_polys, text_tags, hv_tags = self.check_and_validate_polys(
text_polys, text_tags, (h, w)) text_polys, text_tags, (h, w))
......
...@@ -29,20 +29,20 @@ class PGDataSet(Dataset): ...@@ -29,20 +29,20 @@ class PGDataSet(Dataset):
dataset_config = config[mode]['dataset'] dataset_config = config[mode]['dataset']
loader_config = config[mode]['loader'] loader_config = config[mode]['loader']
self.delimiter = dataset_config.get('delimiter', '\t')
label_file_list = dataset_config.pop('label_file_list') label_file_list = dataset_config.pop('label_file_list')
data_source_num = len(label_file_list) data_source_num = len(label_file_list)
ratio_list = dataset_config.get("ratio_list", [1.0]) ratio_list = dataset_config.get("ratio_list", [1.0])
if isinstance(ratio_list, (float, int)): if isinstance(ratio_list, (float, int)):
ratio_list = [float(ratio_list)] * int(data_source_num) ratio_list = [float(ratio_list)] * int(data_source_num)
self.data_format = dataset_config.get('data_format', 'icdar')
assert len( assert len(
ratio_list ratio_list
) == data_source_num, "The length of ratio_list should be the same as the file_list." ) == data_source_num, "The length of ratio_list should be the same as the file_list."
self.data_dir = dataset_config['data_dir']
self.do_shuffle = loader_config['shuffle'] self.do_shuffle = loader_config['shuffle']
logger.info("Initialize indexs of datasets:%s" % label_file_list) logger.info("Initialize indexs of datasets:%s" % label_file_list)
self.data_lines = self.get_image_info_list(label_file_list, ratio_list, self.data_lines = self.get_image_info_list(label_file_list, ratio_list)
self.data_format)
self.data_idx_order_list = list(range(len(self.data_lines))) self.data_idx_order_list = list(range(len(self.data_lines)))
if mode.lower() == "train": if mode.lower() == "train":
self.shuffle_data_random() self.shuffle_data_random()
...@@ -55,108 +55,40 @@ class PGDataSet(Dataset): ...@@ -55,108 +55,40 @@ class PGDataSet(Dataset):
random.shuffle(self.data_lines) random.shuffle(self.data_lines)
return return
def extract_polys(self, poly_txt_path): def get_image_info_list(self, file_list, ratio_list):
"""
Read text_polys, txt_tags, txts from give txt file.
"""
text_polys, txt_tags, txts = [], [], []
with open(poly_txt_path) as f:
for line in f.readlines():
poly_str, txt = line.strip().split('\t')
poly = list(map(float, poly_str.split(',')))
text_polys.append(
np.array(
poly, dtype=np.float32).reshape(-1, 2))
txts.append(txt)
txt_tags.append(txt == '###')
return np.array(list(map(np.array, text_polys))), \
np.array(txt_tags, dtype=np.bool), txts
def extract_info_textnet(self, im_fn, img_dir=''):
"""
Extract information from line in textnet format.
"""
info_list = im_fn.split('\t')
img_path = ''
for ext in [
'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'JPG'
]:
if os.path.exists(os.path.join(img_dir, info_list[0] + "." + ext)):
img_path = os.path.join(img_dir, info_list[0] + "." + ext)
break
if img_path == '':
print('Image {0} NOT found in {1}, and it will be ignored.'.format(
info_list[0], img_dir))
nBox = (len(info_list) - 1) // 9
wordBBs, txts, txt_tags = [], [], []
for n in range(0, nBox):
wordBB = list(map(float, info_list[n * 9 + 1:(n + 1) * 9]))
txt = info_list[(n + 1) * 9]
wordBBs.append([[wordBB[0], wordBB[1]], [wordBB[2], wordBB[3]],
[wordBB[4], wordBB[5]], [wordBB[6], wordBB[7]]])
txts.append(txt)
if txt == '###':
txt_tags.append(True)
else:
txt_tags.append(False)
return img_path, np.array(wordBBs, dtype=np.float32), txt_tags, txts
def get_image_info_list(self, file_list, ratio_list, data_format='textnet'):
if isinstance(file_list, str): if isinstance(file_list, str):
file_list = [file_list] file_list = [file_list]
data_lines = [] data_lines = []
for idx, data_source in enumerate(file_list): for idx, file in enumerate(file_list):
image_files = [] with open(file, "rb") as f:
if data_format == 'icdar': lines = f.readlines()
image_files = [(data_source, x) for x in if self.mode == "train" or ratio_list[idx] < 1.0:
os.listdir(os.path.join(data_source, 'rgb')) random.seed(self.seed)
if x.split('.')[-1] in [ lines = random.sample(lines,
'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', round(len(lines) * ratio_list[idx]))
'tiff', 'gif', 'JPG' data_lines.extend(lines)
]]
elif data_format == 'textnet':
with open(data_source) as f:
image_files = [(data_source, x.strip())
for x in f.readlines()]
else:
print("Unrecognized data format...")
exit(-1)
random.seed(self.seed)
image_files = random.sample(
image_files, round(len(image_files) * ratio_list[idx]))
data_lines.extend(image_files)
return data_lines return data_lines
def __getitem__(self, idx): def __getitem__(self, idx):
file_idx = self.data_idx_order_list[idx] file_idx = self.data_idx_order_list[idx]
data_path, data_line = self.data_lines[file_idx] data_line = self.data_lines[file_idx]
try: try:
if self.data_format == 'icdar': data_line = data_line.decode('utf-8')
im_path = os.path.join(data_path, 'rgb', data_line) substr = data_line.strip("\n").split(self.delimiter)
poly_path = os.path.join(data_path, 'poly', file_name = substr[0]
data_line.split('.')[0] + '.txt') label = substr[1]
text_polys, text_tags, text_strs = self.extract_polys(poly_path) img_path = os.path.join(self.data_dir, file_name)
if self.mode.lower() == 'eval':
img_id = int(data_line.split(".")[0][7:])
else: else:
image_dir = os.path.join(os.path.dirname(data_path), 'image') img_id = 0
im_path, text_polys, text_tags, text_strs = self.extract_info_textnet( data = {'img_path': img_path, 'label': label, 'img_id': img_id}
data_line, image_dir) if not os.path.exists(img_path):
img_id = int(data_line.split(".")[0][3:]) raise Exception("{} does not exist!".format(img_path))
data = {
'img_path': im_path,
'polys': text_polys,
'tags': text_tags,
'strs': text_strs,
'img_id': img_id
}
with open(data['img_path'], 'rb') as f: with open(data['img_path'], 'rb') as f:
img = f.read() img = f.read()
data['image'] = img data['image'] = img
outs = transform(data, self.ops) outs = transform(data, self.ops)
except Exception as e: except Exception as e:
self.logger.error( self.logger.error(
"When parsing line {}, error happened with msg: {}".format( "When parsing line {}, error happened with msg: {}".format(
......
...@@ -102,13 +102,11 @@ class PGLoss(nn.Layer): ...@@ -102,13 +102,11 @@ class PGLoss(nn.Layer):
f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2))
N, B, _ = f_tcl_char_ld.shape N, B, _ = f_tcl_char_ld.shape
input_lengths = paddle.to_tensor([N] * B, dtype='int64') input_lengths = paddle.to_tensor([N] * B, dtype='int64')
cost = paddle.nn.functional.ctc_loss( loss_out = paddle.fluid.layers.warpctc(f_tcl_char_ld, tcl_label,
log_probs=f_tcl_char_ld, self.pad_num, True,
labels=tcl_label, input_lengths, label_t)
input_lengths=input_lengths,
label_lengths=label_t, cost = paddle.fluid.layers.squeeze(loss_out, [-1])
blank=self.pad_num,
reduction='none')
cost = cost.mean() cost = cost.mean()
return cost return cost
......
...@@ -35,11 +35,11 @@ class E2EMetric(object): ...@@ -35,11 +35,11 @@ class E2EMetric(object):
self.reset() self.reset()
def __call__(self, preds, batch, **kwargs): def __call__(self, preds, batch, **kwargs):
img_id = batch[5][0] img_id = batch[2][0]
e2e_info_list = [{ e2e_info_list = [{
'points': det_polyon, 'points': det_polyon,
'text': pred_str 'texts': pred_str
} for det_polyon, pred_str in zip(preds['points'], preds['strs'])] } for det_polyon, pred_str in zip(preds['points'], preds['texts'])]
result = get_socre(self.gt_mat_dir, img_id, e2e_info_list) result = get_socre(self.gt_mat_dir, img_id, e2e_info_list)
self.results.append(result) self.results.append(result)
......
...@@ -26,7 +26,7 @@ def get_socre(gt_dir, img_id, pred_dict): ...@@ -26,7 +26,7 @@ def get_socre(gt_dir, img_id, pred_dict):
n = len(pred_dict) n = len(pred_dict)
for i in range(n): for i in range(n):
points = pred_dict[i]['points'] points = pred_dict[i]['points']
text = pred_dict[i]['text'] text = pred_dict[i]['texts']
point = ",".join(map(str, points.reshape(-1, ))) point = ",".join(map(str, points.reshape(-1, )))
det.append([point, text]) det.append([point, text])
return det return det
......
...@@ -342,6 +342,7 @@ def generate_pivot_list_curved(p_score, ...@@ -342,6 +342,7 @@ def generate_pivot_list_curved(p_score,
center_pos_yxs = [] center_pos_yxs = []
end_points_yxs = [] end_points_yxs = []
instance_center_pos_yxs = [] instance_center_pos_yxs = []
pred_strs = []
if instance_count > 0: if instance_count > 0:
for instance_id in range(1, instance_count): for instance_id in range(1, instance_count):
pos_list = [] pos_list = []
...@@ -367,12 +368,13 @@ def generate_pivot_list_curved(p_score, ...@@ -367,12 +368,13 @@ def generate_pivot_list_curved(p_score,
if is_backbone: if is_backbone:
keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id) keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id)
instance_center_pos_yxs.append(keep_yxs_list_with_id) instance_center_pos_yxs.append(keep_yxs_list_with_id)
pred_strs.append(decoded_str)
else: else:
end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1])) end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1]))
center_pos_yxs.extend(keep_yxs_list) center_pos_yxs.extend(keep_yxs_list)
if is_backbone: if is_backbone:
return instance_center_pos_yxs return pred_strs, instance_center_pos_yxs
else: else:
return center_pos_yxs, end_points_yxs return center_pos_yxs, end_points_yxs
......
...@@ -64,7 +64,7 @@ class PGNet_PostProcess(object): ...@@ -64,7 +64,7 @@ class PGNet_PostProcess(object):
src_w, src_h, self.valid_set) src_w, src_h, self.valid_set)
data = { data = {
'points': poly_list, 'points': poly_list,
'strs': keep_str_list, 'texts': keep_str_list,
} }
return data return data
...@@ -85,32 +85,13 @@ class PGNet_PostProcess(object): ...@@ -85,32 +85,13 @@ class PGNet_PostProcess(object):
p_char = p_char[0] p_char = p_char[0]
src_h, src_w, ratio_h, ratio_w = self.shape_list[0] src_h, src_w, ratio_h, ratio_w = self.shape_list[0]
is_curved = self.valid_set == "totaltext" is_curved = self.valid_set == "totaltext"
instance_yxs_list = generate_pivot_list_slow( char_seq_idx_set, instance_yxs_list = generate_pivot_list_slow(
p_score, p_score,
p_char, p_char,
p_direction, p_direction,
score_thresh=self.score_thresh, score_thresh=self.score_thresh,
is_backbone=True, is_backbone=True,
is_curved=is_curved) is_curved=is_curved)
p_char = paddle.to_tensor(np.expand_dims(p_char, axis=0))
char_seq_idx_set = []
for i in range(len(instance_yxs_list)):
gather_info_lod = paddle.to_tensor(instance_yxs_list[i])
f_char_map = paddle.transpose(p_char, [0, 2, 3, 1])
feature_seq = paddle.gather_nd(f_char_map, gather_info_lod)
feature_seq = np.expand_dims(feature_seq.numpy(), axis=0)
feature_len = [len(feature_seq[0])]
featyre_seq = paddle.to_tensor(feature_seq)
feature_len = np.array([feature_len]).astype(np.int64)
length = paddle.to_tensor(feature_len)
seq_pred = paddle.fluid.layers.ctc_greedy_decoder(
input=featyre_seq, blank=36, input_length=length)
seq_pred_str = seq_pred[0].numpy().tolist()[0]
seq_len = seq_pred[1].numpy()[0][0]
temp_t = []
for c in seq_pred_str[:seq_len]:
temp_t.append(c)
char_seq_idx_set.append(temp_t)
seq_strs = [] seq_strs = []
for char_idx_set in char_seq_idx_set: for char_idx_set in char_seq_idx_set:
pr_str = ''.join([self.Lexicon_Table[pos] for pos in char_idx_set]) pr_str = ''.join([self.Lexicon_Table[pos] for pos in char_idx_set])
...@@ -176,6 +157,6 @@ class PGNet_PostProcess(object): ...@@ -176,6 +157,6 @@ class PGNet_PostProcess(object):
exit(-1) exit(-1)
data = { data = {
'points': poly_list, 'points': poly_list,
'strs': keep_str_list, 'texts': keep_str_list,
} }
return data return data
...@@ -122,7 +122,7 @@ class TextE2E(object): ...@@ -122,7 +122,7 @@ class TextE2E(object):
else: else:
raise NotImplementedError raise NotImplementedError
post_result = self.postprocess_op(preds, shape_list) post_result = self.postprocess_op(preds, shape_list)
points, strs = post_result['points'], post_result['strs'] points, strs = post_result['points'], post_result['texts']
dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape) dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape)
elapse = time.time() - starttime elapse = time.time() - starttime
return dt_boxes, strs, elapse return dt_boxes, strs, elapse
......
...@@ -103,7 +103,7 @@ def main(): ...@@ -103,7 +103,7 @@ def main():
images = paddle.to_tensor(images) images = paddle.to_tensor(images)
preds = model(images) preds = model(images)
post_result = post_process_class(preds, shape_list) post_result = post_process_class(preds, shape_list)
points, strs = post_result['points'], post_result['strs'] points, strs = post_result['points'], post_result['texts']
# write resule # write resule
dt_boxes_json = [] dt_boxes_json = []
for poly, str in zip(points, strs): for poly, str in zip(points, strs):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册