提交 6fb61526 编写于 作者: qq_25193841's avatar qq_25193841

Merge remote-tracking branch 'upstream/dygraph' into dy1

......@@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支
**近期更新**
- 2021.1.11 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数147个,每周一都会更新,欢迎大家持续关注。
- 2021.1.18 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数152个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
......@@ -101,8 +101,8 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [效果展示](#效果展示)
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
- [【实战篇】PaddleOCR实战106个问题](./doc/doc_ch/FAQ.md)
- [【理论篇】OCR通用32个问题](./doc/doc_ch/FAQ.md)
- [【实战篇】PaddleOCR实战110个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import yaml
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import os.path
import logging
logging.basicConfig(level=logging.INFO)
support_list = {
'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic',
'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin',
'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian',
'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi',
'ne':'nepali',
}
assert(
os.path.isfile("./rec_multi_language_lite_train.yml")
),"Loss basic configuration file rec_multi_language_lite_train.yml.\
You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
formatter_class=RawDescriptionHelpFormatter)
self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options")
self.add_argument(
"-l", "--language", nargs='+', help="set language type, support {}".format(support_list))
self.add_argument(
"--train",type=str,help="you can use this command to change the train dataset default path")
self.add_argument(
"--val",type=str,help="you can use this command to change the eval dataset default path")
self.add_argument(
"--dict",type=str,help="you can use this command to change the dictionary default path")
self.add_argument(
"--data_dir",type=str,help="you can use this command to change the dataset default root path")
def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv)
args.opt = self._parse_opt(args.opt)
args.language = self._set_language(args.language)
return args
def _parse_opt(self, opts):
config = {}
if not opts:
return config
for s in opts:
s = s.strip()
k, v = s.split('=')
config[k] = yaml.load(v, Loader=yaml.Loader)
return config
def _set_language(self, type):
assert(type),"please use -l or --language to choose language type"
assert(
type[0] in support_list.keys()
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
"please check your running command".format(support_list, type)
global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0])
global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0])
global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])]
global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])]
global_config['Global']['character_type'] = type[0]
assert(
os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path']))
),"Loss default dictionary file {}_dict.txt.You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0])
return type[0]
def merge_config(config):
"""
Merge config into global config.
Args:
config (dict): Config to be merged.
Returns: global config
"""
for key, value in config.items():
if "." not in key:
if isinstance(value, dict) and key in global_config:
global_config[key].update(value)
else:
global_config[key] = value
else:
sub_keys = key.split('.')
assert (
sub_keys[0] in global_config
), "the sub_keys can only be one of global_config: {}, but get: {}, please check your running command".format(
global_config.keys(), sub_keys[0])
cur = global_config[sub_keys[0]]
for idx, sub_key in enumerate(sub_keys[1:]):
if idx == len(sub_keys) - 2:
cur[sub_key] = value
else:
cur = cur[sub_key]
def loss_file(path):
assert(
os.path.exists(path)
),"There is no such file:{},Please do not forget to put in the specified file".format(path)
if __name__ == '__main__':
FLAGS = ArgsParser().parse_args()
merge_config(FLAGS.opt)
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
if os.path.isfile(save_file_path):
os.remove(save_file_path)
if FLAGS.train:
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
train_label_path = os.path.join(project_path,FLAGS.train)
loss_file(train_label_path)
if FLAGS.val:
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
eval_label_path = os.path.join(project_path,FLAGS.val)
loss_file(Eval_label_path)
if FLAGS.dict:
global_config['Global']['character_dict_path'] = FLAGS.dict
dict_path = os.path.join(project_path,FLAGS.dict)
loss_file(dict_path)
if FLAGS.data_dir:
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
data_dir = os.path.join(project_path,FLAGS.data_dir)
loss_file(data_dir)
with open(save_file_path, 'w') as f:
yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False)
logging.info("Project path is :{}".format(project_path))
logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0]))
logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0]))
logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir']))
logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path']))
logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path))
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_multi_language_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path:
# Set the language of training, if set, select the default dictionary file
character_type:
max_text_length: 25
infer_mode: False
use_space_char: True
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list: ["./train_data/val_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
# Version: 1.0.0
FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev
# Version: 2.0.0
FROM registry.baidubce.com/paddlepaddle/paddle:2.0.0rc1
# PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
RUN python3.7 -m pip install paddlepaddle==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple
RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
......@@ -15,15 +13,15 @@ WORKDIR /PaddleOCR
RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
RUN mkdir -p /PaddleOCR/inference/
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py)
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py)
ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py)
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py)
ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py)
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py)
ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
......
# Version: 1.0.0
FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev
# Version: 2.0.0
FROM egistry.baidubce.com/paddlepaddle/paddle:2.0.0rc1-gpu-cuda10.0-cudnn7
# PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
RUN python3.7 -m pip install paddlepaddle-gpu==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple
RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
......@@ -15,15 +13,15 @@ WORKDIR /PaddleOCR
RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
RUN mkdir -p /PaddleOCR/inference/
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py)
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py)
ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py)
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py)
ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py)
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py)
ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
......
......@@ -9,48 +9,42 @@
## PaddleOCR常见问题汇总(持续更新)
* [近期更新(2021.1.11](#近期更新)
* [近期更新(2021.1.18](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
* [【理论篇】OCR通用31个问题](#OCR通用问题)
* [【理论篇】OCR通用32个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
* [模型训练调优17](#模型训练调优2)
* [【实战篇】PaddleOCR实战106个问题](#PaddleOCR实战问题)
* [模型训练调优18](#模型训练调优2)
* [【实战篇】PaddleOCR实战110个问题](#PaddleOCR实战问题)
* [使用咨询36题](#使用咨询)
* [数据集17题](#数据集3)
* [模型训练调优26](#模型训练调优3)
* [预测部署27](#预测部署3)
* [模型训练调优28](#模型训练调优3)
* [预测部署29](#预测部署3)
<a name="近期更新"></a>
## 近期更新(2021.1.11
## 近期更新(2021.1.18
#### Q3.1.32 能否修改StyleText配置文件中的分辨率
#### Q2.3.18: 在PP-OCR系统中,文本检测的骨干网络为什么没有使用SE模块
**A**:StyleText目前的训练数据主要是高度32的图片,建议不要改变高度。未来我们会支持更丰富的分辨率。
**A**:SE模块是MobileNetV3网络一个重要模块,目的是估计特征图每个特征通道重要性,给特征图每个特征分配权重,提高网络的表达能力。但是,对于文本检测,输入网络的分辨率比较大,一般是640\*640,利用SE模块估计特征图每个特征通道重要性比较困难,网络提升能力有限,但是该模块又比较耗时,因此在PP-OCR系统中,文本检测的骨干网络没有使用SE模块。实验也表明,当去掉SE模块,超轻量模型大小可以减小40%,文本检测效果基本不受影响。详细可以参考PP-OCR技术文章,https://arxiv.org/abs/2009.09941.
#### Q3.1.33 StyleText是否可以更换字体文件
#### Q3.3.27: PaddleOCR关于文本识别模型的训练,支持的数据增强方式有哪些
**A**:StyleText项目中的字体文件为标准字体,主要用作模型的输入部分,不能够修改。
StyleText的用途主要是:提取style_image中的字体、背景等style信息,根据语料生成同样style的图片。
**A**:文本识别支持的数据增强方式有随机小幅度裁剪、图像平衡、添加白噪声、颜色漂移、图像反色和Text Image Augmentation(TIA)变换等。可以参考[代码](../../ppocr/data/imaug/rec_img_aug.py)中的warp函数。
#### Q3.1.34 StyleText批量生成图片为什么没有输出
#### Q3.3.28: 关于dygraph分支中,文本识别模型训练,要使用数据增强应该如何设置
**A**:需要检查以下您配置文件中的路径是否都存在。尤其要注意的是[label_file配置](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/StyleText/README_ch.md#%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B)
如果您使用的style_image输入没有label信息,您依然需要提供一个图片文件列表。
**A**:可以参考[配置文件](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)在Train['dataset']['transforms']添加RecAug字段,使数据增强生效。可以通过添加对aug_prob设置,表示每种数据增强采用的概率。aug_prob默认是0.4.由于tia数据增强特殊性,默认不采用,可以通过添加use_tia设置,使tia数据增强生效。详细设置可以参考[ISSUE 1744](https://github.com/PaddlePaddle/PaddleOCR/issues/1744)
#### Q3.1.35 怎样把OCR输出的结果组成有意义的语句呢
#### Q3.4.28: PP-OCR系统中,文本检测的结果有置信度吗
**A**:OCR输出的结果包含坐标信息和文字内容两部分。如果您不关心文字的顺序,那么可以直接按box的序号连起来。
如果需要将文字按照一定的顺序排列,则需要您设定一些规则,对文字的坐标进行处理,例如按照坐标从上到下,从左到右连接识别结果。
对于一些有规律的垂类场景,可以设定模板,根据位置、内容进行匹配。
例如识别身份证照片,可以先匹配"姓名","性别"等关键字,根据这些关键字的坐标去推测其他信息的位置,再与识别的结果匹配。
**A**:文本检测的结果有置信度,由于推理过程中没有使用,所以没有显示的返回到最终结果中。如果需要文本检测结果的置信度,可以在[文本检测DB的后处理代码](../../ppocr/postprocess/db_postprocess.py)的155行,添加scores信息。这样,在[检测预测代码](../../tools/infer/predict_det.py)的197行,就可以拿到文本检测的scores信息。
#### Q3.1.36 如何识别竹简上的古文?
**A**:对于字符都是普通的汉字字符的情况,只要标注足够的数据,finetune模型就可以了。如果数据量不足,您可以尝试StyleText工具。
而如果使用的字符是特殊的古文字、甲骨文、象形文字等,那么首先需要构建一个古文字的字典,之后再进行训练。
#### Q3.4.29: DB文本检测,特征提取网络金字塔构建的部分代码在哪儿?
**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码,其中architectures是文本检测或者文本识别整体流程代码;backbones是骨干网络相关代码;necks是类似与FPN的颈函数代码;heads是提取文本检测或者文本识别预测结果相关的头函数;transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)
<a name="OCR精选10个问题"></a>
## 【精选】OCR精选10个问题
......@@ -292,7 +286,9 @@ StyleText的用途主要是:提取style_image中的字体、背景等style信
**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
#### Q2.3.18: 在PP-OCR系统中,文本检测的骨干网络为什么没有使用SE模块?
**A**:SE模块是MobileNetV3网络一个重要模块,目的是估计特征图每个特征通道重要性,给特征图每个特征分配权重,提高网络的表达能力。但是,对于文本检测,输入网络的分辨率比较大,一般是640\*640,利用SE模块估计特征图每个特征通道重要性比较困难,网络提升能力有限,但是该模块又比较耗时,因此在PP-OCR系统中,文本检测的骨干网络没有使用SE模块。实验也表明,当去掉SE模块,超轻量模型大小可以减小40%,文本检测效果基本不受影响。详细可以参考PP-OCR技术文章,https://arxiv.org/abs/2009.09941.
<a name="PaddleOCR实战问题"></a>
## 【实战篇】PaddleOCR实战问题
......@@ -602,7 +598,6 @@ det_db_unclip_ratio: 文本框扩张的系数,关系到文本框的大小``
ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
```
#### Q3.3.5:可不可以将pretrain_weights设置为空呢?想从零开始训练一个model
**A**:这个是可以的,在训练通用识别模型的时候,pretrain_weights就设置为空,但是这样可能需要更长的迭代轮数才能达到相同的精度。
......@@ -710,6 +705,14 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
**A**:cosine_decay表示在训练的过程中,学习率按照cosine的变化趋势逐渐下降至0,在迭代轮数更长的情况下,比常量的学习率变化策略会有更好的收敛效果,因此在实际训练的时候,均采用了cosine_decay,来获得精度更高的模型。
#### Q3.3.27: PaddleOCR关于文本识别模型的训练,支持的数据增强方式有哪些?
**A**:文本识别支持的数据增强方式有随机小幅度裁剪、图像平衡、添加白噪声、颜色漂移、图像反色和Text Image Augmentation(TIA)变换等。可以参考[代码](../../ppocr/data/imaug/rec_img_aug.py)中的warp函数。
#### Q3.3.28: 关于dygraph分支中,文本识别模型训练,要使用数据增强应该如何设置?
**A**:可以参考[配置文件](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)在Train['dataset']['transforms']添加RecAug字段,使数据增强生效。可以通过添加对aug_prob设置,表示每种数据增强采用的概率。aug_prob默认是0.4.由于tia数据增强特殊性,默认不采用,可以通过添加use_tia设置,使tia数据增强生效。详细设置可以参考[ISSUE 1744](https://github.com/PaddlePaddle/PaddleOCR/issues/1744)
<a name="预测部署3"></a>
### 预测部署
......@@ -823,13 +826,13 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
**A**:使用EAST或SAST模型进行推理预测时,需要在命令中指定参数--det_algorithm="EAST" 或 --det_algorithm="SAST",使用DB时不用指定是因为该参数默认值是"DB":https://github.com/PaddlePaddle/PaddleOCR/blob/e7a708e9fdaf413ed7a14da8e4a7b4ac0b211e42/tools/infer/utility.py#L43
#### Q3.4.25 : PaddleOCR模型Python端预测和C++预测结果不一致?
#### Q3.4.25: PaddleOCR模型Python端预测和C++预测结果不一致?
正常来说,python端预测和C++预测文本是一致的,如果预测结果差异较大,
建议首先排查diff出现在检测模型还是识别模型,或者尝试换其他模型是否有类似的问题。
其次,检查python端和C++端数据处理部分是否存在差异,建议保存环境,更新PaddleOCR代码再试下。
如果更新代码或者更新代码都没能解决,建议在PaddleOCR微信群里或者issue中抛出您的问题。
### Q3.4.26: 目前paddle hub serving 只支持 imgpath,如果我想用imgurl 去哪里改呢?
#### Q3.4.26: 目前paddle hub serving 只支持 imgpath,如果我想用imgurl 去哪里改呢?
**A**:图片是在这里读取的:https://github.com/PaddlePaddle/PaddleOCR/blob/67ef25d593c4eabfaaceb22daade4577f53bed81/deploy/hubserving/ocr_system/module.py#L55,
可以参考下面的写法,将url path转化为np array(https://cloud.tencent.com/developer/article/1467840)
......@@ -839,7 +842,15 @@ img_array = np.array(bytearray(response.read()), dtype=np.uint8)
img = cv.imdecode(img_array, -1)
```
### Q3.4.27: C++ 端侧部署可以只对OCR的检测部署吗?
#### Q3.4.27: C++ 端侧部署可以只对OCR的检测部署吗?
**A**:可以的,识别和检测模块是解耦的。如果想对检测部署,需要自己修改一下main函数,
只保留检测相关就可以:https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/deploy/cpp_infer/src/main.cpp#L72
#### Q3.4.28: PP-OCR系统中,文本检测的结果有置信度吗?
**A**:文本检测的结果有置信度,由于推理过程中没有使用,所以没有显示的返回到最终结果中。如果需要文本检测结果的置信度,可以在[文本检测DB的后处理代码](../../ppocr/postprocess/db_postprocess.py)的155行,添加scores信息。这样,在[检测预测代码](../../tools/infer/predict_det.py)的197行,就可以拿到文本检测的scores信息。
#### Q3.4.29: DB文本检测,特征提取网络金字塔构建的部分代码在哪儿?
**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码,其中architectures是文本检测或者文本识别整体流程代码;backbones是骨干网络相关代码;necks是类似与FPN的颈函数代码;heads是提取文本检测或者文本识别预测结果相关的头函数;transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)
......@@ -95,7 +95,7 @@ class MyBackbone(nn.Layer):
self.conv = nn.xxxx
def forward(self, inputs):
# your necwork forward
# your network forward
y = self.conv(inputs)
return y
```
......
......@@ -14,11 +14,10 @@ PaddleOCR开源的文本检测算法列表:
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
在ICDAR2015文本检测公开数据集上,算法效果如下:
|模型|骨干网络|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- |
|EAST|ResNet50_vd|88.76%|81.36%|84.90%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
|EAST|MobileNetV3|78.24%|79.15%|78.69%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|EAST|ResNet50_vd|85.80%|86.71%|86.25%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
|EAST|MobileNetV3|79.42%|80.64%|80.03%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
|DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
|SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
......@@ -40,17 +39,19 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
PaddleOCR基于动态图开源的文本识别算法列表:
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7](ppocr推荐)
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|-|-|-|-|-|
|---|---|---|---|---|
|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)
......@@ -352,10 +352,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
```
# 使用方向分类器
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true
# 不使用方向分类器
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=false
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=false
```
......@@ -364,7 +364,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model
执行命令后,识别结果图像如下:
![](../imgs_results/2.jpg)
![](../imgs_results/system_res_00018069.jpg)
<a name="其他模型推理"></a>
### 2. 其他模型推理
......@@ -381,4 +381,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
执行命令后,识别结果图像如下:
(coming soon)
![](../imgs_results/img_10_east_starnet.jpg)
## OCR模型列表(V2.0,2020年12月12日更新)
## OCR模型列表(V2.0,2021年1月20日更新)
**说明** :2.0版模型和[1.1版模型](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md)的主要区别在于动态图训练vs.静态图训练,模型性能上无明显差距。
- [一、文本检测模型](#文本检测模型)
......@@ -22,7 +22,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |推理模型 (coming soon) / slim模型 (coming soon)|
|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |推理模型 (coming soon) / 训练模型 (coming soon)|
|ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|
|ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)|
......@@ -35,7 +35,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |推理模型 (coming soon) / slim模型 (coming soon) |
|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
|ch_ppocr_mobile_v2.0_rec|原始超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v2.0_rec|通用模型,支持中英文、数字识别|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
......@@ -46,18 +46,76 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- |
|en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | 推理模型 (coming soon) / slim模型 (coming soon) |
|en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) |
|en_number_mobile_v2.0_rec|原始超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
<a name="多语言识别模型"></a>
#### 3. 多语言识别模型(更多语言持续更新中...)
**说明:** 新增的多语言模型的配置文件通过代码方式生成,您可以通过`--help`参数查看当前PaddleOCR支持生成哪些多语言的配置文件:
```bash
# 该代码需要在指定目录运行
cd {your/path/}PaddleOCR/configs/rec/multi_language/
python3 generate_multi_language_configs.py --help
```
下面以生成意大利语配置文件为例:
##### 1. 生成意大利语配置文件测试现有模型
如果您仅仅想用配置文件测试PaddleOCR提供的多语言模型可以通过下面命令生成默认的配置文件,使用PaddleOCR提供的小语种字典进行预测。
```bash
# 该代码需要在指定目录运行
cd {your/path/}PaddleOCR/configs/rec/multi_language/
# 通过-l或者--language参数设置需要生成的语种的配置文件,该命令会将默认参数写入配置文件
python3 generate_multi_language_configs.py -l it
```
##### 2. 生成意大利语配置文件训练自己的数据
如果您想训练自己的小语种模型,可以准备好训练集文件、验证集文件、字典文件和训练数据路径,这里假设准备的意大利语的训练集、验证集、字典和训练数据路径为:
- 训练集:{your/path/}PaddleOCR/train_data/train_list.txt
- 验证集:{your/path/}PaddleOCR/train_data/val_list.txt
- 使用PaddleOCR提供的默认字典:{your/path/}PaddleOCR/ppocr/utils/dict/it_dict.txt
- 训练数据路径:{your/path/}PaddleOCR/train_data
使用以下命令生成配置文件:
```bash
# 该代码需要在指定目录运行
cd {your/path/}PaddleOCR/configs/rec/multi_language/
# -l或者--language字段是必须的
# --train修改训练集,--val修改验证集,--data_dir修改数据集目录,-o修改对应默认参数
# --dict命令改变字典路径,示例使用默认字典路径则该参数可不填
python3 generate_multi_language_configs.py -l it \
--train train_data/train_list.txt \
--val train_data/val_list.txt \
--data_dir train_data \
-o Global.use_gpu=False
```
|模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- |
| french_mobile_v2.0_rec |法文识别|[rec_french_lite_train.yml](../../configs/rec/multi_language/rec_french_lite_train.yml)|2.65M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar) |
| german_mobile_v2.0_rec |德文识别|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |韩文识别|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |日文识别|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
| it_mobile_v2.0_rec |意大利文识别|rec_it_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) |
| xi_mobile_v2.0_rec |西班牙文识别|rec_xi_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) |
| pu_mobile_v2.0_rec |葡萄牙文识别|rec_pu_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) |
| ru_mobile_v2.0_rec |俄罗斯文识别|rec_ru_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) |
| ar_mobile_v2.0_rec |阿拉伯文识别|rec_ar_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) |
| hi_mobile_v2.0_rec |印地文识别|rec_hi_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) |
| chinese_cht_mobile_v2.0_rec |中文繁体识别|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
| ug_mobile_v2.0_rec |维吾尔文识别|rec_ug_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) |
| fa_mobile_v2.0_rec |波斯文识别|rec_fa_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) |
| ur_mobile_v2.0_rec |乌尔都文识别|rec_ur_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) |
| rs_mobile_v2.0_rec |塞尔维亚文(latin)识别|rec_rs_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) |
| oc_mobile_v2.0_rec |欧西坦文识别|rec_oc_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) |
| mr_mobile_v2.0_rec |马拉地文识别|rec_mr_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) |
| ne_mobile_v2.0_rec |尼泊尔文识别|rec_ne_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) |
| rsc_mobile_v2.0_rec |塞尔维亚文(cyrillic)识别|rec_rsc_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) |
| bg_mobile_v2.0_rec |保加利亚文识别|rec_bg_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) |
| uk_mobile_v2.0_rec |乌克兰文识别|rec_uk_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) |
| be_mobile_v2.0_rec |白俄罗斯文识别|rec_be_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |泰卢固文识别|rec_te_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |卡纳达文识别|rec_ka_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |泰米尔文识别|rec_ta_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
<a name="文本方向分类模型"></a>
......@@ -65,5 +123,5 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |推理模型 (coming soon) / 训练模型 / slim模型 |
|ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
|ch_ppocr_mobile_v2.0_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |
......@@ -96,7 +96,7 @@ class MyBackbone(nn.Layer):
self.conv = nn.xxxx
def forward(self, inputs):
# your necwork forward
# your network forward
y = self.conv(inputs)
return y
```
......
......@@ -19,8 +19,8 @@ On the ICDAR2015 dataset, the text detection result is as follows:
|Model|Backbone|precision|recall|Hmean|Download link|
| --- | --- | --- | --- | --- | --- |
|EAST|ResNet50_vd|88.76%|81.36%|84.90%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
|EAST|MobileNetV3|78.24%|79.15%|78.69%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|EAST|ResNet50_vd|85.80%|86.71%|86.25%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
|EAST|MobileNetV3|79.42%|80.64%|80.03%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
|DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
|SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
......@@ -41,17 +41,19 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
PaddleOCR open-source text recognition algorithms list:
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
|Model|Backbone|Avg Accuracy|Module combination|Download link|
|-|-|-|-|-|
|---|---|---|---|---|
|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md)
......@@ -366,15 +366,15 @@ When performing prediction, you need to specify the path of a single image or a
```
# use direction classifier
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true
# not use use direction classifier
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/"
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/"
```
After executing the command, the recognition result image is as follows:
![](../imgs_results/2.jpg)
![](../imgs_results/system_res_00018069.jpg)
<a name="OTHER_MODELS"></a>
### 2. OTHER MODELS
......@@ -391,4 +391,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
After executing the command, the recognition result image is as follows:
(coming soon)
![](../imgs_results/img_10_east_starnet.jpg)
## OCR model list(V2.0, updated on 2020.12.12
## OCR model list(V2.0, updated on 2021.1.20
**Note** : Compared with [models 1.1](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md), which are trained with static graph programming paradigm, models 2.0 are the dynamic graph trained version and achieve close performance.
- [1. Text Detection Model](#Detection)
......@@ -33,7 +33,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |inference model (coming soon) / slim model (coming soon) |
|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
|ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
......@@ -45,24 +45,84 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |inference model (coming soon ) / slim model (coming soon) |
|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) |
|en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
<a name="Multilingual"></a>
#### Multilingual Recognition Model(Updating...)
**Note:** The configuration file of the new multi language model is generated by code. You can use the `--help` parameter to check which multi language are supported by current PaddleOCR.
```bash
# The code needs to run in the specified directory
cd {your/path/}PaddleOCR/configs/rec/multi_language/
python3 generate_multi_language_configs.py --help
```
Take the Italian configuration file as an example:
##### 1.Generate Italian configuration file to test the model provided
you can generate the default configuration file through the following command, and use the default language dictionary provided by paddleocr for prediction.
```bash
# The code needs to run in the specified directory
cd {your/path/}PaddleOCR/configs/rec/multi_language/
# Set the required language configuration file through -l or --language parameter
# This command will write the default parameter to the configuration file.
python3 generate_multi_language_configs.py -l it
```
##### 2. Generate Italian configuration file to train your own data
If you want to train your own model, you can prepare the training set file, verification set file, dictionary file and training data path. Here we assume that the Italian training set, verification set, dictionary and training data path are:
- Training set:{your/path/}PaddleOCR/train_data/train_list.txt
- Validation set: {your/path/}PaddleOCR/train_data/val_list.txt
- Use the default dictionary provided by paddleocr:{your/path/}PaddleOCR/ppocr/utils/dict/it_dict.txt
- Training data path:{your/path/}PaddleOCR/train_data
```bash
# The code needs to run in the specified directory
cd {your/path/}PaddleOCR/configs/rec/multi_language/
# The -l or --language parameter is required
# --train modify train_list path
# --val modify eval_list path
# --data_dir modify data dir
# -o modify default parameters
# --dict Change the dictionary path. The example uses the default dictionary path, so that this parameter can be empty.
python3 generate_multi_language_configs.py -l it \
--train {path/to/train_list} \
--val {path/to/val_list} \
--data_dir {path/to/data_dir} \
-o Global.use_gpu=False
```
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
| french_mobile_v2.0_rec |Lightweight model for French recognition|[rec_french_lite_train.yml](../../configs/rec/multi_language/rec_french_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar) |
| german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
| it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) |
| xi_mobile_v2.0_rec |Lightweight model for Spanish recognition|rec_xi_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) |
| pu_mobile_v2.0_rec |Lightweight model for Portuguese recognition|rec_pu_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) |
| ru_mobile_v2.0_rec |Lightweight model for Russia recognition|rec_ru_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) |
| ar_mobile_v2.0_rec |Lightweight model for Arabic recognition|rec_ar_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) |
| hi_mobile_v2.0_rec |Lightweight model for Hindi recognition|rec_hi_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) |
| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese traditional recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
| ug_mobile_v2.0_rec |Lightweight model for Uyghur recognition|rec_ug_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) |
| fa_mobile_v2.0_rec |Lightweight model for Persian recognition|rec_fa_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) |
| ur_mobile_v2.0_rec |Lightweight model for Urdu recognition|rec_ur_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) |
| rs_mobile_v2.0_rec |Lightweight model for Serbian(latin) recognition|rec_rs_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) |
| oc_mobile_v2.0_rec |Lightweight model for Occitan recognition|rec_oc_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) |
| mr_mobile_v2.0_rec |Lightweight model for Marathi recognition|rec_mr_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) |
| ne_mobile_v2.0_rec |Lightweight model for Nepali recognition|rec_ne_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) |
| rsc_mobile_v2.0_rec |Lightweight model for Serbian(cyrillic) recognition|rec_rsc_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) |
| bg_mobile_v2.0_rec |Lightweight model for Bulgarian recognition|rec_bg_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) |
| uk_mobile_v2.0_rec |Lightweight model for Ukranian recognition|rec_uk_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) |
| be_mobile_v2.0_rec |Lightweight model for Belarusian recognition|rec_be_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
<a name="Angle"></a>
### 3. Text Angle Classification Model
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |inference model (coming soon) / trained model / slim model|
|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_train.tar) |
|ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |
......@@ -290,7 +290,9 @@ class PaddleOCR(predict_system.TextSystem):
image_file = img
img, flag = check_and_read_gif(image_file)
if not flag:
img = cv2.imread(image_file)
with open(image_file, 'rb') as f:
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
if img is None:
logger.error("error in loading image:{}".format(image_file))
return None
......
此差异已折叠。
......@@ -203,9 +203,9 @@ class GridGenerator(nn.Layer):
def build_C_paddle(self):
""" Return coordinates of fiducial points in I_r; C """
F = self.F
ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)])
ctrl_pts_y_bottom = paddle.ones([int(F / 2)])
ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2), dtype='float64')
ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)], dtype='float64')
ctrl_pts_y_bottom = paddle.ones([int(F / 2)], dtype='float64')
ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0)
......@@ -213,12 +213,14 @@ class GridGenerator(nn.Layer):
def build_P_paddle(self, I_r_size):
I_r_height, I_r_width = I_r_size
I_r_grid_x = (
paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0
) / I_r_width # self.I_r_width
I_r_grid_y = (
paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0
) / I_r_height # self.I_r_height
I_r_grid_x = (paddle.arange(
-I_r_width, I_r_width, 2, dtype='float64') + 1.0
) / paddle.to_tensor(np.array([I_r_width]))
I_r_grid_y = (paddle.arange(
-I_r_height, I_r_height, 2, dtype='float64') + 1.0
) / paddle.to_tensor(np.array([I_r_height]))
# P: self.I_r_width x self.I_r_height x 2
P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
P = paddle.transpose(P, perm=[1, 0, 2])
......@@ -228,7 +230,7 @@ class GridGenerator(nn.Layer):
def build_inv_delta_C_paddle(self, C):
""" Return inv_delta_C which is needed to calculate T """
F = self.F
hat_C = paddle.zeros((F, F), dtype='float32') # F x F
hat_C = paddle.zeros((F, F), dtype='float64') # F x F
for i in range(0, F):
for j in range(i, F):
if i == j:
......@@ -241,13 +243,21 @@ class GridGenerator(nn.Layer):
delta_C = paddle.concat( # F+3 x F+3
[
paddle.concat(
[paddle.ones((F, 1)), C, hat_C], axis=1), # F x F+3
[paddle.ones(
(F, 1), dtype='float64'), C, hat_C], axis=1), # F x F+3
paddle.concat(
[paddle.zeros((2, 3)), paddle.transpose(
C, perm=[1, 0])],
[
paddle.zeros(
(2, 3), dtype='float64'), paddle.transpose(
C, perm=[1, 0])
],
axis=1), # 2 x F+3
paddle.concat(
[paddle.zeros((1, 3)), paddle.ones((1, F))],
[
paddle.zeros(
(1, 3), dtype='float64'), paddle.ones(
(1, F), dtype='float64')
],
axis=1) # 1 x F+3
],
axis=0)
......@@ -268,7 +278,9 @@ class GridGenerator(nn.Layer):
# rbf: n x F
rbf = paddle.multiply(
paddle.square(rbf_norm), paddle.log(rbf_norm + eps))
P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1)
P_hat = paddle.concat(
[paddle.ones(
(n, 1), dtype='float64'), P, rbf], axis=1)
return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime):
......
......@@ -24,7 +24,9 @@ class BaseRecLabelDecode(object):
character_type='ch',
use_space_char=False):
support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', 'it',
'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'rsc', 'bg',
'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', 'ne'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
......@@ -60,7 +62,7 @@ class BaseRecLabelDecode(object):
def add_special_char(self, dict_character):
return dict_character
def decode(self, text_index, text_prob=None, is_remove_duplicate=True):
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
result_list = []
ignored_tokens = self.get_ignored_tokens()
......@@ -107,10 +109,10 @@ class CTCLabelDecode(BaseRecLabelDecode):
preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2)
text = self.decode(preds_idx, preds_prob)
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
if label is None:
return text
label = self.decode(label, is_remove_duplicate=False)
label = self.decode(label)
return text, label
def add_special_char(self, dict_character):
......
a
r
b
i
c
_
m
g
/
1
0
I
L
S
V
R
C
2
v
l
6
3
9
.
j
p
ا
ل
م
ر
ج
و
ح
ي
ة
5
8
7
أ
ب
ض
4
ك
س
ه
ث
ن
ط
ع
ت
غ
خ
ف
ئ
ز
إ
د
ص
ظ
ذ
ش
ى
ق
ؤ
آ
ء
s
e
n
w
t
u
z
d
A
N
G
h
o
E
T
H
O
B
y
F
U
J
X
W
P
Z
M
k
q
Y
Q
D
f
K
x
'
%
-
#
@
!
&
$
,
:
é
?
+
É
(
b
e
_
i
m
g
/
2
0
I
L
S
V
R
C
1
v
a
l
6
9
4
3
.
j
p
п
а
з
б
у
г
н
ц
ь
8
м
л
і
о
ў
ы
7
5
М
х
с
р
ф
я
е
д
ж
ю
ч
й
к
Д
в
Б
т
І
ш
ё
э
К
Л
Н
А
Ж
Г
В
П
З
Е
О
Р
С
У
Ё
Й
Т
Ч
Э
Ц
Ю
Ш
Ф
Х
Я
Ь
Ы
Ў
s
c
n
w
M
o
t
T
E
A
B
u
h
y
k
r
H
d
Y
O
U
F
f
x
D
G
N
K
P
z
J
X
W
Z
Q
%
-
q
@
'
!
#
&
,
:
$
(
?
é
+
É
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
А
Б
В
Г
Д
Е
Ж
З
И
Й
К
Л
М
Н
О
П
Р
С
Т
У
Ф
Х
Ц
Ч
Ш
Щ
Ъ
Ю
Я
а
б
в
г
д
е
ж
з
и
й
к
л
м
н
о
п
р
с
т
у
ф
х
ц
ч
ш
щ
ъ
ь
ю
я
此差异已折叠。
f
a
_
i
m
g
/
1
3
I
L
S
V
R
C
2
0
v
l
6
8
5
.
j
p
و
د
ر
ك
ن
ش
ه
ا
4
9
ی
ج
ِ
7
غ
ل
س
ز
ّ
ت
ک
گ
ي
م
ب
ف
چ
خ
ق
ژ
آ
ص
پ
َ
ع
ئ
ح
ٔ
ض
ُ
ذ
أ
ى
ط
ظ
ث
ة
ً
ء
ؤ
ْ
ۀ
إ
ٍ
ٌ
ٰ
ٓ
ٱ
s
c
e
n
w
N
E
W
Y
D
O
H
A
d
z
r
T
G
o
t
x
h
b
B
M
Z
u
P
F
y
q
U
K
k
J
Q
'
X
#
?
%
$
,
:
&
!
-
(
É
@
é
+
!
"
#
$
%
&
......@@ -83,45 +85,59 @@ w
x
y
z
¡
¢
£
¤
¥
¦
§
¨
©
ª
«
¬
­
®
¯
°
±
²
³
´
µ
·
¸
¹
º
»
¼
½
¿
Â
Ã
Á
Ä
Å
Ê
Î
Ð
É
Ï
Ô
Ö
Ü
ß
à
á
â
ã
ä
å
æ
ç
è
é
ê
ë
í
ï
ñ
ò
ó
ô
ö
ø
ù
ú
û
ü
ō
Š
Ÿ
ʒ
β
δ
з
©
ª
«
¬
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
ि
i
t
_
m
g
/
5
I
L
S
V
R
C
2
0
1
v
a
l
7
8
9
6
.
j
p
e
r
o
d
s
n
3
4
P
u
c
A
-
,
"
z
h
f
b
q
ì
'
à
O
è
G
ù
é
ò
;
F
E
B
N
H
k
:
U
T
X
D
K
?
[
M
­
x
y
(
)
W
ö
º
w
]
Q
J
+
ü
!
È
á
%
=
»
ñ
Ö
Y
ä
í
Z
«
@
ó
ø
ï
ú
ê
ç
Á
É
Å
ß
{
}
&
`
û
î
#
$
k
a
_
i
m
g
/
1
2
I
L
S
V
R
C
0
v
l
6
4
8
.
j
p
ಿ
7
5
3
9
-
,
s
c
e
n
w
o
u
t
d
E
A
T
B
Z
N
G
O
q
z
r
x
P
K
M
J
U
D
f
F
h
b
W
Y
y
H
X
Q
'
#
&
!
@
$
:
%
é
É
(
?
+
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
ि
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
ि
o
c
_
i
m
g
/
2
0
I
L
S
V
R
C
1
v
a
l
4
3
.
j
p
r
e
è
t
9
7
5
8
n
'
b
s
6
q
u
á
d
ò
à
h
z
f
ï
í
A
ç
x
ó
é
P
O
Ò
ü
k
À
F
-
ú
­
æ
Á
D
E
w
K
T
N
y
U
Z
G
B
J
H
M
W
Y
X
Q
%
$
,
@
&
!
:
(
#
?
+
É
p
u
_
i
m
g
/
8
I
L
S
V
R
C
2
0
1
v
a
l
6
7
4
5
.
j
q
e
s
t
ã
o
x
9
c
n
r
z
ç
õ
3
A
U
d
º
ô
­
,
E
;
ó
á
b
D
?
ú
ê
-
h
P
f
à
N
í
O
M
G
É
é
â
F
:
T
Á
"
Q
)
W
J
B
H
(
ö
%
Ö
«
w
K
y
!
k
]
'
Z
+
Ç
Õ
Y
À
X
µ
»
ª
Í
ü
ä
´
è
ñ
ß
ï
Ú
ë
Ô
Ï
Ó
[
Ì
<
Â
ò
§
³
ø
å
#
$
&
@
r
s
_
i
m
g
/
1
I
L
S
V
R
C
2
0
v
a
l
7
5
8
6
.
j
p
t
d
9
3
e
š
4
k
u
ć
c
n
đ
o
z
č
b
ž
f
Z
T
h
M
F
O
Š
B
H
A
E
Đ
Ž
D
P
G
Č
K
U
N
J
Ć
w
y
W
x
Y
X
q
Q
#
&
$
,
-
%
'
@
!
:
?
(
É
é
+
r
s
c
_
i
m
g
/
5
I
L
S
V
R
C
2
0
1
v
a
l
9
7
8
.
j
p
м
а
с
и
р
ћ
е
ш
3
4
о
г
н
з
в
л
6
т
ж
у
к
п
њ
д
ч
С
ј
ф
ц
љ
х
О
И
А
б
Ш
К
ђ
џ
М
В
З
Д
Р
У
Н
Т
Б
?
П
Х
Ј
Ц
Г
Љ
Л
Ф
e
n
w
E
F
A
N
f
o
b
M
G
t
y
W
k
P
u
H
B
T
z
h
O
Y
d
U
K
D
x
X
J
Z
Q
q
'
-
@
é
#
!
,
%
$
:
&
+
(
É
к
в
а
з
и
у
р
о
н
я
х
п
л
ы
г
е
т
м
д
ж
ш
ь
с
ё
б
й
ч
ю
ц
щ
М
э
ф
А
ъ
С
Ф
Ю
В
К
Т
Н
О
Э
У
И
Г
Л
Р
Д
Б
Ш
П
З
Х
Е
Ж
Я
Ц
Ч
Й
Щ
0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
t
a
_
i
m
g
/
3
I
L
S
V
R
C
2
0
1
v
l
9
7
8
.
j
p
ி
6
5
4
s
c
e
n
w
F
T
O
P
K
A
N
G
Y
E
M
H
U
B
o
b
D
d
r
W
u
y
f
X
k
q
h
J
z
Z
Q
x
-
'
$
,
%
@
é
!
#
+
É
&
:
(
?
t
e
_
i
m
g
/
5
I
L
S
V
R
C
2
0
1
v
a
l
3
4
8
9
.
j
p
ి
7
6
'
[
;
-
,
|
?
:
"
(
!
+
)
*
=
&
]
£
$
s
c
n
w
k
J
G
u
d
r
E
o
h
y
b
f
B
M
O
T
N
D
P
A
F
x
W
Y
U
H
K
X
z
Z
Q
q
É
%
#
@
é
u
g
_
i
m
/
1
I
L
S
V
R
C
2
0
v
a
l
8
5
3
6
9
.
j
p
ق
ا
پ
ل
4
7
ئ
ى
ش
ت
ي
ك
د
ف
ر
و
ن
ب
ە
خ
ې
چ
ۇ
ز
س
م
ۋ
گ
ڭ
ۆ
ۈ
ج
غ
ھ
ژ
s
c
e
n
w
P
E
D
U
d
r
b
y
B
o
O
Y
N
T
k
t
h
A
H
F
z
W
K
G
M
f
Z
X
Q
J
x
q
-
!
%
#
?
:
$
,
&
'
É
@
é
(
+
u
k
_
i
m
g
/
1
6
I
L
S
V
R
C
2
0
v
a
l
7
9
.
j
p
в
і
д
п
о
н
с
т
ю
4
5
3
а
и
м
е
р
ч
у
Б
з
л
к
8
А
В
г
є
б
ь
х
ґ
ш
ц
ф
я
щ
ж
Г
Х
У
Т
Е
І
Н
П
З
Л
Ю
С
Д
М
К
Р
Ф
О
Ц
И
Я
Ч
Ш
Ж
Є
Ґ
Ь
s
c
e
n
w
A
P
r
E
t
o
h
d
y
M
G
N
F
B
T
D
U
O
W
Z
f
H
Y
b
K
z
x
Q
X
q
J
$
-
'
#
&
%
?
:
!
,
+
@
(
é
É
u
r
_
i
m
g
/
3
I
L
S
V
R
C
2
0
1
v
a
l
9
7
8
.
j
p
چ
ٹ
پ
ا
ئ
ی
ے
4
6
و
ل
ن
ڈ
ھ
ک
ت
ش
ف
ق
ر
د
5
ب
ج
خ
ہ
س
ز
غ
ڑ
ں
آ
م
ؤ
ط
ص
ح
ع
گ
ث
ض
ذ
ۓ
ِ
ء
ظ
ً
ي
ُ
ۃ
أ
ٰ
ە
ژ
ۂ
ة
ّ
ك
ه
s
c
e
n
w
o
d
t
D
M
T
U
E
b
P
h
y
W
H
A
x
B
O
N
G
Y
Q
F
k
K
q
J
Z
f
z
X
'
@
&
!
,
:
$
-
#
?
%
é
+
(
É
x
i
_
m
g
/
1
0
I
L
S
V
R
C
2
v
a
l
3
6
4
5
.
j
p
Q
u
e
r
o
8
7
n
c
9
t
b
é
q
d
ó
y
F
s
,
O
í
T
f
"
U
M
h
:
P
H
A
E
D
z
N
á
ñ
ú
%
;
è
+
Y
-
B
G
(
)
¿
?
w
¡
!
X
É
K
k
Á
ü
Ú
«
»
J
'
ö
W
Z
º
Ö
­
[
]
Ç
ç
à
ä
û
ò
Í
ê
ô
ø
ª
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册