diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index 4d9c52740a5ca5bcdd891bb55ff769f23e7a2499..efe00a33b7917c8f8dd0965fb69f019c37b55e84 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -1031,7 +1031,7 @@ class MainWindow(QMainWindow, WindowMixin): for box in self.result_dic: trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False} - if trans_dic["label"] is "" and mode == 'Auto': + if trans_dic["label"] == "" and mode == 'Auto': continue shapes.append(trans_dic) @@ -1764,7 +1764,7 @@ class MainWindow(QMainWindow, WindowMixin): QMessageBox.information(self, "Information", msg) return result = self.ocr.ocr(img_crop, cls=True, det=False) - if result[0][0] is not '': + if result[0][0] != '': result.insert(0, box) print('result in reRec is ', result) self.result_dic.append(result) @@ -1795,7 +1795,7 @@ class MainWindow(QMainWindow, WindowMixin): QMessageBox.information(self, "Information", msg) return result = self.ocr.ocr(img_crop, cls=True, det=False) - if result[0][0] is not '': + if result[0][0] != '': result.insert(0, box) print('result in reRec is ', result) if result[1][0] == shape.label: @@ -1991,7 +1991,7 @@ if __name__ == '__main__': resource_file = './libs/resources.py' if not os.path.exists(resource_file): output = os.system('pyrcc5 -o libs/resources.py resources.qrc') - assert output is 0, "operate the cmd have some problems ,please check whether there is a in the lib " \ + assert output == 0, "operate the cmd have some problems ,please check whether there is a in the lib " \ "directory resources.py " import libs.resources sys.exit(main()) diff --git a/README.md b/README.md index 5b6e4bd0b594d71edd3ab4f8da350475c3ac83b8..67d65e98e8d7b978e25d9582cf21d2b222858e69 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,11 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ## Notice PaddleOCR supports both dynamic graph and static graph programming paradigm -- Dynamic graph: dygraph branch (default), **supported by paddle 2.0rc1+ ([installation](./doc/doc_en/installation_en.md))** +- Dynamic graph: dygraph branch (default), **supported by paddle 2.0.0 ([installation](./doc/doc_en/installation_en.md))** - Static graph: develop branch **Recent updates** +- 2021.1.21 update more than 25+ multilingual recognition models [models list](./doc/doc_en/models_list_en.md), including:English, Chinese, German, French, Japanese,Spanish,Portuguese Russia Arabic and so on. Models for more languages will continue to be updated [Develop Plan](https://github.com/PaddlePaddle/PaddleOCR/issues/1048). - 2020.12.15 update Data synthesis tool, i.e., [Style-Text](./StyleText/README.md),easy to synthesize a large number of images which are similar to the target scene image. - 2020.11.25 Update a new data annotation tool, i.e., [PPOCRLabel](./PPOCRLabel/README.md), which is helpful to improve the labeling efficiency. Moreover, the labeling results can be used in training of the PP-OCR system directly. - 2020.9.22 Update the PP-OCR technical article, https://arxiv.org/abs/2009.09941 diff --git a/README_ch.md b/README_ch.md index e2e96a6bc5a126f8193e069ae37f403e37824891..d627ec450f8260d1b88593ea14d9172d03016b5f 100755 --- a/README_ch.md +++ b/README_ch.md @@ -4,11 +4,13 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力使用者训练出更好的模型,并应用落地。 ## 注意 PaddleOCR同时支持动态图与静态图两种编程范式 -- 动态图版本:dygraph分支(默认),需将paddle版本升级至2.0rc1+([快速安装](./doc/doc_ch/installation.md)) +- 动态图版本:dygraph分支(默认),需将paddle版本升级至2.0.0([快速安装](./doc/doc_ch/installation.md)) - 静态图版本:develop分支 **近期更新** +- 2021.1.26,28,29 PaddleOCR官方研发团队带来技术深入解读三日直播课,1月26日、28日、29日晚上19:30,[直播地址](https://live.bilibili.com/21689802) - 2021.1.25 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数157个,每周一都会更新,欢迎大家持续关注。 +- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,[多语言模型下载](./doc/doc_ch/models_list.md),包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048) - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。 - 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941 diff --git a/StyleText/README.md b/StyleText/README.md index df4fbf3cfed44b26d690f70e2ab0d4b5c0aa1ff0..65a72ac808f5f875e1f42369e7d588027e9508a2 100644 --- a/StyleText/README.md +++ b/StyleText/README.md @@ -72,7 +72,7 @@ fusion_generator: python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en ``` -* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean. +* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English(en), Simplified Chinese(ch) and Korean(ko). * Note 2: Synth-Text is mainly used to generate images for OCR recognition models. So the height of style images should be around 32 pixels. Images in other sizes may behave poorly. * Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction. @@ -120,7 +120,7 @@ In actual application scenarios, it is often necessary to synthesize pictures in * `with_label`:Whether the `label_file` is label file list. * `CorpusGenerator`: * `method`:Method of CorpusGenerator,supports `FileCorpus` and `EnNumCorpus`. If `EnNumCorpus` is used,No other configuration is needed,otherwise you need to set `corpus_file` and `language`. - * `language`:Language of the corpus. + * `language`:Language of the corpus. Currently, the tool only supports English(en), Simplified Chinese(ch) and Korean(ko). * `corpus_file`: Filepath of the corpus. Corpus file should be a text file which will be split by line-endings('\n'). Corpus generator samples one line each time. diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md index fd259ca018efcdcf6bdd1040ee1642424c120ae7..ccd1efaf1afae2c21c746f989e9b86bfed19e74b 100644 --- a/StyleText/README_ch.md +++ b/StyleText/README_ch.md @@ -63,10 +63,10 @@ fusion_generator: ```python python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en ``` -* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。 +* 注1:语言选项和语料相对应,目前支持英文(en)、简体中文(ch)和韩语(ko)。 * 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。 如果输入图像尺寸相差过多,效果可能不佳。 -* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。 +* 注3:可以通过修改配置文件`configs/config.yml`中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。 例如,输入如下图片和语料"PaddleOCR": @@ -105,7 +105,7 @@ python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_ * `with_label`:标志`label_file`是否为label文件。 * `CorpusGenerator`: * `method`:语料生成方法,目前有`FileCorpus`和`EnNumCorpus`可选。如果使用`EnNumCorpus`,则不需要填写其他配置,否则需要修改`corpus_file`和`language`; - * `language`:语料的语种; + * `language`:语料的语种,目前支持英文(en)、简体中文(ch)和韩语(ko); * `corpus_file`: 语料文件路径。语料文件应使用文本文件。语料生成器首先会将语料按行切分,之后每次随机选取一行。 语料文件格式示例: diff --git a/configs/rec/multi_language/rec_en_number_lite_train.yml b/configs/rec/multi_language/rec_en_number_lite_train.yml index cee0512114fe9d488004a71cf6f0a0409822a4b5..13eda8481cad8ca308cd0629214b52146c3ebf13 100644 --- a/configs/rec/multi_language/rec_en_number_lite_train.yml +++ b/configs/rec/multi_language/rec_en_number_lite_train.yml @@ -16,7 +16,7 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict/en_dict.txt - character_type: ch + character_type: EN max_text_length: 25 infer_mode: False use_space_char: False diff --git a/deploy/slim/quantization/README.md b/deploy/slim/quantization/README.md index ccd4d06b4f16165f968402751b63a8fe58773e0b..4ac3f7c3016c9ef53724ad6f7745507cef3580a8 100644 --- a/deploy/slim/quantization/README.md +++ b/deploy/slim/quantization/README.md @@ -42,7 +42,7 @@ python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global # 比如下载提供的训练模型 wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar tar -xf ch_ppocr_mobile_v2.0_det_train.tar -python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model +python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_inference_dir=./output/quant_inference_model ``` 如果要训练识别模型的量化,修改配置文件和加载的模型参数即可。 diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md index 7da0b3e7e7d5f72e45dc17864630b9725f6fc8ba..36407a2bb58ee3a36afc211ca7a8f0d786d1714f 100644 --- a/deploy/slim/quantization/README_en.md +++ b/deploy/slim/quantization/README_en.md @@ -58,7 +58,7 @@ python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global After getting the model after pruning and finetuning we, can export it as inference_model for predictive deployment: ```bash -python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_inference_model +python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model ``` ### 5. Deploy diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md index bb61689bfae403062ba77ca6bb39719ef7d93725..11a9f35d9e8a92c9bef0da68e085739c1b1e821c 100755 --- a/doc/doc_ch/FAQ.md +++ b/doc/doc_ch/FAQ.md @@ -31,7 +31,7 @@ #### Q3.1.38: module 'paddle.distributed' has no attribute ‘get_rank’。 -**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0rc1。 +**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0。 #### Q3.4.30: PaddleOCR是否支持在华为鲲鹏920CPU上部署? @@ -397,13 +397,13 @@ Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2 **A**:动态图版本正在紧锣密鼓开发中,将于2020年12月16日发布,敬请关注。 #### Q3.1.22:ModuleNotFoundError: No module named 'paddle.nn', -**A**:paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0rc1的版本,安装方式为 +**A**:paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0的版本,安装方式为 ``` -python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple ``` #### Q3.1.23: ImportError: /usr/lib/x86_64_linux-gnu/libstdc++.so.6:version `CXXABI_1.3.11` not found (required by /usr/lib/python3.6/site-package/paddle/fluid/core+avx.so) -**A**:这个问题是glibc版本不足导致的,Paddle2.0rc1版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。 +**A**:这个问题是glibc版本不足导致的,Paddle2.0.0版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。 如果您的环境不满足这个要求,或者使用的docker镜像为: `hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev` `hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`,安装Paddle2.0rc版本可能会出现上述错误,2.0版本推荐使用新的docker镜像 `paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82`。 @@ -415,7 +415,7 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py - develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。 - release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。 -- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。 +- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0.0版本。 - release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。 如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。 @@ -432,7 +432,7 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py #### Q3.1.27: 如何可视化acc,loss曲线图,模型网络结构图等? -**A**:在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/guides/03_VisualDL/visualdl.html)。 +**A**:在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/03_VisualDL/visualdl.html)。 #### Q3.1.28: 在使用StyleText数据合成工具的时候,报错`ModuleNotFoundError: No module named 'utils.config'`,这是为什么呢? @@ -451,7 +451,7 @@ https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b5 #### Q3.1.31: 怎么输出网络结构以及每层的参数信息? -**A**:可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/api/paddle/hapi/model_summary/summary_cn.html#summary。 +**A**:可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/hapi/model_summary/summary_cn.html。 #### Q3.1.32 能否修改StyleText配置文件中的分辨率? @@ -485,7 +485,7 @@ StyleText的用途主要是:提取style_image中的字体、背景等style信 #### Q3.1.38: module 'paddle.distributed' has no attribute ‘get_rank’。 -**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0rc1。 +**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0。 ### 数据集 @@ -877,4 +877,4 @@ img = cv.imdecode(img_array, -1) **A**:我们目前已经通过Paddle2ONNX来支持各模型套件的转换,PaddleOCR基于PaddlePaddle 2.0的版本(dygraph分支)已经支持导出为ONNX,欢迎关注Paddle2ONNX,了解更多项目的进展: Paddle2ONNX项目:https://github.com/PaddlePaddle/Paddle2ONNX -Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr) \ No newline at end of file +Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr) diff --git a/doc/doc_ch/angle_class.md b/doc/doc_ch/angle_class.md index 4d7ff0d7aa839591df6e359d4f7295ab2f0cc445..6e68134a4d1b8d9b8927d67c9724ba88563383a4 100644 --- a/doc/doc_ch/angle_class.md +++ b/doc/doc_ch/angle_class.md @@ -63,7 +63,7 @@ PaddleOCR提供了训练脚本、评估脚本和预测脚本。 *如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false* ``` -# GPU训练 支持单卡,多卡训练,通过 '--gpus' 指定卡号,如果使用的paddle版本小于2.0rc1,请使用'--select_gpus'参数选择要使用的GPU +# GPU训练 支持单卡,多卡训练,通过 '--gpus' 指定卡号。 # 启动训练,下面的命令已经写入train.sh文件中,只需修改文件里的配置文件路径即可 python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/cls/cls_mv3.yml ``` diff --git a/doc/doc_ch/detection.md b/doc/doc_ch/detection.md index 8f0f69796a38ac16643e1168c3ecf73b92daa19a..a8dee65a220e3c66d8502181dd2a542cb01a29b5 100644 --- a/doc/doc_ch/detection.md +++ b/doc/doc_ch/detection.md @@ -76,7 +76,7 @@ tar -xf ./pretrain_models/MobileNetV3_large_x0_5_pretrained.tar ./pretrain_model # 单机单卡训练 mv3_db 模型 python3 tools/train.py -c configs/det/det_mv3_db.yml \ -o Global.pretrain_weights=./pretrain_models/MobileNetV3_large_x0_5_pretrained/ -# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID;如果使用的paddle版本小于2.0rc1,请使用'--select_gpus'参数选择要使用的GPU +# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \ -o Global.pretrain_weights=./pretrain_models/MobileNetV3_large_x0_5_pretrained/ ``` diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index ab5487037e69d40e38dde96fc8006022054f31df..c4601e1526d29e0a8c62030a4b47d2b2cc193d5d 100755 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -306,10 +306,10 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ### 4. 多语言模型的推理 如果您需要预测的是其他语言模型,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径, 同时为了得到正确的可视化结果, -需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/` 路径下有默认提供的小语种字体,例如韩文识别: +需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) diff --git a/doc/doc_ch/installation.md b/doc/doc_ch/installation.md index 36565cd4197a9b8b8404f57b378aa49637cdc58b..fce151eb9fee567477c09eee211633f7377dddb3 100644 --- a/doc/doc_ch/installation.md +++ b/doc/doc_ch/installation.md @@ -2,7 +2,7 @@ 经测试PaddleOCR可在glibc 2.23上运行,您也可以测试其他glibc版本或安装glic 2.23 PaddleOCR 工作环境 -- PaddlePaddle 1.8+ ,推荐使用 PaddlePaddle 2.0rc1 +- PaddlePaddle 2.0.0 - python3.7 - glibc 2.23 - cuDNN 7.6+ (GPU) @@ -35,11 +35,11 @@ sudo docker container exec -it ppocr /bin/bash pip3 install --upgrade pip 如果您的机器安装的是CUDA9或CUDA10,请运行以下命令安装 -python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple 如果您的机器是CPU,请运行以下命令安装 -python3 -m pip install paddlepaddle==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple 更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。 ``` diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index b473f3ac0a5007dee6ac5773e2b989454d4b8983..c5f459bdb88558b1cdea93b9b85eed0e4bb8433b 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -195,8 +195,6 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | | [rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml) | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | | [rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml) | CRNN | ResNet34_vd | None | BiLSTM | ctc | -| rec_chinese_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | -| rec_chinese_common_train.yml | CRNN | ResNet34_vd | None | BiLSTM | ctc | | rec_icdar15_train.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc | | rec_mv3_none_bilstm_ctc.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc | | rec_mv3_none_none_ctc.yml | Rosetta | Mobilenet_v3 large 0.5 | None | None | ctc | @@ -272,16 +270,109 @@ Eval: - 小语种 -PaddleOCR也提供了多语言的, `configs/rec/multi_languages` 路径下的提供了多语言的配置文件,目前PaddleOCR支持的多语言算法有: +PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi_languages` 路径下提供了一个多语言的配置文件模版: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。 -| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | +您有两种方式创建所需的配置文件: +1. 通过脚本自动生成 + +[generate_multi_language_configs.py](../../configs/rec/multi_language/generate_multi_language_configs.py) 可以帮助您生成多语言模型的配置文件 + +- 以意大利语为例,如果您的数据是按如下格式准备的: + ``` + |-train_data + |- it_train.txt # 训练集标签 + |- it_val.txt # 验证集标签 + |- data + |- word_001.jpg + |- word_002.jpg + |- word_003.jpg + | ... + ``` + + 可以使用默认参数,生成配置文件: + + ```bash + # 该代码需要在指定目录运行 + cd PaddleOCR/configs/rec/multi_language/ + # 通过-l或者--language参数设置需要生成的语种的配置文件,该命令会将默认参数写入配置文件 + python3 generate_multi_language_configs.py -l it + ``` + +- 如果您的数据放置在其他位置,或希望使用自己的字典,可以通过指定相关参数来生成配置文件: + + ```bash + # -l或者--language字段是必须的 + # --train修改训练集,--val修改验证集,--data_dir修改数据集目录,--dict修改字典路径, -o修改对应默认参数 + cd PaddleOCR/configs/rec/multi_language/ + python3 generate_multi_language_configs.py -l it \ # 语种 + --train {path/of/train_label.txt} \ # 训练标签文件的路径 + --val {path/of/val_label.txt} \ # 验证集标签文件的路径 + --data_dir {train_data/path} \ # 训练数据的根目录 + --dict {path/of/dict} \ # 字典文件路径 + -o Global.use_gpu=False # 是否使用gpu + ... + + ``` + +2. 手动修改配置文件 + + 您也可以手动修改模版中的以下几个字段: + + ``` + Global: + use_gpu: True + epoch_num: 500 + ... + character_type: it # 需要识别的语种 + character_dict_path: {path/of/dict} # 字典文件所在路径 + + Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ # 数据存放根目录 + label_file_list: ["./train_data/train_list.txt"] # 训练集label路径 + ... + + Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ # 数据存放根目录 + label_file_list: ["./train_data/val_list.txt"] # 验证集label路径 + ... + + ``` + +目前PaddleOCR支持的多语言算法有: + +| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean | +| rec_it_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 意大利语 | it | +| rec_xi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 西班牙语 | xi | +| rec_pu_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 葡萄牙语 | pu | +| rec_ru_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 俄罗斯语 | ru | +| rec_ar_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯语 | ar | +| rec_hi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 印地语 | hi | +| rec_ug_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 维吾尔语 | ug | +| rec_fa_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 波斯语 | fa | +| rec_ur_ite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 乌尔都语 | ur | +| rec_rs_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 塞尔维亚(latin)语 | rs | +| rec_oc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 欧西坦语 | oc | +| rec_mr_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 马拉地语 | mr | +| rec_ne_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 尼泊尔语 | ne | +| rec_rsc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 塞尔维亚(cyrillic)语 | rsc | +| rec_bg_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 保加利亚语 | bg | +| rec_uk_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 乌克兰语 | uk | +| rec_be_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 白俄罗斯语 | be | +| rec_te_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 泰卢固语 | te | +| rec_ka_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 卡纳达语 | ka | +| rec_ta_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 泰米尔语 | ta | 多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体可以在 [百度网盘](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA) 上下载,提取码:frgi。 diff --git a/doc/doc_en/angle_class_en.md b/doc/doc_en/angle_class_en.md index 8d9328700f3e638eb4576d132aa32fb93b3ad0c0..d1cc712f312bf8e70c0b399422519217df323129 100644 --- a/doc/doc_en/angle_class_en.md +++ b/doc/doc_en/angle_class_en.md @@ -66,7 +66,7 @@ Start training: ``` # Set PYTHONPATH path export PYTHONPATH=$PYTHONPATH:. -# GPU training Support single card and multi-card training, specify the card number through --gpus. If your paddle version is less than 2.0rc1, please use '--selected_gpus' +# GPU training Support single card and multi-card training, specify the card number through --gpus. # Start training, the following command has been written into the train.sh file, just modify the configuration file path in the file python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/cls/cls_mv3.yml ``` diff --git a/doc/doc_en/detection_en.md b/doc/doc_en/detection_en.md index 5c4a63e23ed4fe8f6d398bc68dec830bef6c09c9..3ee9092cc6a6f50b19f20df646c9cb1949d5d80f 100644 --- a/doc/doc_en/detection_en.md +++ b/doc/doc_en/detection_en.md @@ -76,7 +76,7 @@ You can also use `-o` to change the training parameters without modifying the ym python3 tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001 # multi-GPU training -# Set the GPU ID used by the '--gpus' parameter; If your paddle version is less than 2.0rc1, please use '--selected_gpus' +# Set the GPU ID used by the '--gpus' parameter. python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001 diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index 98e3ef6378480022baaf6e82843294dab3fbcaf4..ccbb71847d5946e854b88817a162957af0e6ed00 100755 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -315,10 +315,10 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ### 4. MULTILINGAUL MODEL INFERENCE If you need to predict other language models, when using inference model prediction, you need to specify the dictionary path used by `--rec_char_dict_path`. At the same time, in order to get the correct visualization results, -You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/` path, such as Korean recognition: +You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) diff --git a/doc/doc_en/installation_en.md b/doc/doc_en/installation_en.md index 7f1f0e83c94e4d4a18b99d620b4b192c47ffde7c..35c1881d12087e6509a68b504729d9ef20240e9c 100644 --- a/doc/doc_en/installation_en.md +++ b/doc/doc_en/installation_en.md @@ -3,7 +3,7 @@ After testing, paddleocr can run on glibc 2.23. You can also test other glibc versions or install glic 2.23 for the best compatibility. PaddleOCR working environment: -- PaddlePaddle 1.8+, Recommend PaddlePaddle 2.0rc1 +- PaddlePaddle 2.0.0 - python3.7 - glibc 2.23 @@ -38,10 +38,10 @@ sudo docker container exec -it ppocr /bin/bash pip3 install --upgrade pip # If you have cuda9 or cuda10 installed on your machine, please run the following command to install -python3 -m pip install paddlepaddle-gpu==2.0rc1 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple # If you only have cpu on your machine, please run the following command to install -python3 -m pip install paddlepaddle==2.0rc1 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple ``` For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation. diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index 3eb0cd237801aa62d1c741c177be7b73d9c08808..33033f8348fa4fb08d6e8998ff53cd62349c214e 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -93,7 +93,7 @@ python3 generate_multi_language_configs.py -l it \ |model name|description|config|model size|download| | --- | --- | --- | --- | --- | | french_mobile_v2.0_rec |Lightweight model for French recognition|[rec_french_lite_train.yml](../../configs/rec/multi_language/rec_french_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar) | -| german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) | +| german_mobile_v2.0_rec |Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) | | korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) | | japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) | | it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) | diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 7723d20b9f982bc2c121965f4cd7996c81aa42d5..22f89cdef080afe0b119d08d1e88f02ede5932c1 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -266,15 +266,116 @@ Eval: - Multi-language -PaddleOCR also provides multi-language. The configuration file in `configs/rec/multi_languages` provides multi-language configuration files. Currently, the multi-language algorithms supported by PaddleOCR are: - -| Configuration file | Algorithm name | backbone | trans | seq | pred | language | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | +PaddleOCR currently supports 26 (except Chinese) language recognition. A multi-language configuration file template is +provided under the path `configs/rec/multi_languages`: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。 + +There are two ways to create the required configuration file:: + +1. Automatically generated by script + +[generate_multi_language_configs.py](../../configs/rec/multi_language/generate_multi_language_configs.py) Can help you generate configuration files for multi-language models + +- Take Italian as an example, if your data is prepared in the following format: + ``` + |-train_data + |- it_train.txt # train_set label + |- it_val.txt # val_set label + |- data + |- word_001.jpg + |- word_002.jpg + |- word_003.jpg + | ... + ``` + + You can use the default parameters to generate a configuration file: + + ```bash + # The code needs to be run in the specified directory + cd PaddleOCR/configs/rec/multi_language/ + # Set the configuration file of the language to be generated through the -l or --language parameter. + # This command will write the default parameters into the configuration file + python3 generate_multi_language_configs.py -l it + ``` + +- If your data is placed in another location, or you want to use your own dictionary, you can generate the configuration file by specifying the relevant parameters: + + ```bash + # -l or --language field is required + # --train to modify the training set + # --val to modify the validation set + # --data_dir to modify the data set directory + # --dict to modify the dict path + # -o to modify the corresponding default parameters + cd PaddleOCR/configs/rec/multi_language/ + python3 generate_multi_language_configs.py -l it \ # language + --train {path/of/train_label.txt} \ # path of train_label + --val {path/of/val_label.txt} \ # path of val_label + --data_dir {train_data/path} \ # root directory of training data + --dict {path/of/dict} \ # path of dict + -o Global.use_gpu=False # whether to use gpu + ... + + ``` + +2. Manually modify the configuration file + + You can also manually modify the following fields in the template: + + ``` + Global: + use_gpu: True + epoch_num: 500 + ... + character_type: it # language + character_dict_path: {path/of/dict} # path of dict + + Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ # root directory of training data + label_file_list: ["./train_data/train_list.txt"] # train label path + ... + + Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ # root directory of val data + label_file_list: ["./train_data/val_list.txt"] # val label path + ... + + ``` + +Currently, the multi-language algorithms supported by PaddleOCR are: + +| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean | +| rec_it_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Italian | it | +| rec_xi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Spanish | xi | +| rec_pu_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Portuguese | pu | +| rec_ru_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Russia | ru | +| rec_ar_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Arabic | ar | +| rec_hi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Hindi | hi | +| rec_ug_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Uyghur | ug | +| rec_fa_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Persian(Farsi) | fa | +| rec_ur_ite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Urdu | ur | +| rec_rs_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Serbian(latin) | rs | +| rec_oc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Occitan | oc | +| rec_mr_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Marathi | mr | +| rec_ne_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Nepali | ne | +| rec_rsc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Serbian(cyrillic) | rsc | +| rec_bg_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Bulgarian | bg | +| rec_uk_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Ukranian | uk | +| rec_be_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Belarusian | be | +| rec_te_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Telugu | te | +| rec_ka_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Kannada | ka | +| rec_ta_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Tamil | ta | + The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded on [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi. diff --git a/doc/fonts/arabic.ttf b/doc/fonts/arabic.ttf new file mode 100644 index 0000000000000000000000000000000000000000..064b6041ee32814d852e084f639dae75d044d357 Binary files /dev/null and b/doc/fonts/arabic.ttf differ diff --git a/doc/fonts/chinese_cht.ttf b/doc/fonts/chinese_cht.ttf new file mode 100644 index 0000000000000000000000000000000000000000..3416754fd35aecd6eb0d9acfc730ae10a408bffd Binary files /dev/null and b/doc/fonts/chinese_cht.ttf differ diff --git a/doc/fonts/cyrillic.ttf b/doc/fonts/cyrillic.ttf new file mode 100644 index 0000000000000000000000000000000000000000..be4bf6605808d15ab25c9cbbe1fda2a1d190ac8b Binary files /dev/null and b/doc/fonts/cyrillic.ttf differ diff --git a/doc/french.ttf b/doc/fonts/french.ttf similarity index 100% rename from doc/french.ttf rename to doc/fonts/french.ttf diff --git a/doc/german.ttf b/doc/fonts/german.ttf similarity index 100% rename from doc/german.ttf rename to doc/fonts/german.ttf diff --git a/doc/fonts/hindi.ttf b/doc/fonts/hindi.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8b0c36f5868b935464f30883094b9556c3e41009 Binary files /dev/null and b/doc/fonts/hindi.ttf differ diff --git a/doc/japan.ttc b/doc/fonts/japan.ttc similarity index 100% rename from doc/japan.ttc rename to doc/fonts/japan.ttc diff --git a/doc/fonts/kannada.ttf b/doc/fonts/kannada.ttf new file mode 100644 index 0000000000000000000000000000000000000000..43b60d423ad5ea5f5528c9c9e5d6f013f87fa1d7 Binary files /dev/null and b/doc/fonts/kannada.ttf differ diff --git a/doc/korean.ttf b/doc/fonts/korean.ttf similarity index 100% rename from doc/korean.ttf rename to doc/fonts/korean.ttf diff --git a/doc/fonts/latin.ttf b/doc/fonts/latin.ttf new file mode 100644 index 0000000000000000000000000000000000000000..e392413ac2f82905b3c07073669c3e2058d20235 Binary files /dev/null and b/doc/fonts/latin.ttf differ diff --git a/doc/fonts/marathi.ttf b/doc/fonts/marathi.ttf new file mode 100644 index 0000000000000000000000000000000000000000..a796d3edc6a4cc140a9360d0fc502a9d99352db0 Binary files /dev/null and b/doc/fonts/marathi.ttf differ diff --git a/doc/fonts/nepali.ttf b/doc/fonts/nepali.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8b0c36f5868b935464f30883094b9556c3e41009 Binary files /dev/null and b/doc/fonts/nepali.ttf differ diff --git a/doc/fonts/persian.ttf b/doc/fonts/persian.ttf new file mode 100644 index 0000000000000000000000000000000000000000..bdb1c8d7402148127b7633c6b4cd1586e23745ab Binary files /dev/null and b/doc/fonts/persian.ttf differ diff --git a/doc/simfang.ttf b/doc/fonts/simfang.ttf similarity index 100% rename from doc/simfang.ttf rename to doc/fonts/simfang.ttf diff --git a/doc/fonts/spanish.ttf b/doc/fonts/spanish.ttf new file mode 100644 index 0000000000000000000000000000000000000000..532353d2778cd2bb37a5baf06f5daeea32729168 Binary files /dev/null and b/doc/fonts/spanish.ttf differ diff --git a/doc/fonts/tamil.ttf b/doc/fonts/tamil.ttf new file mode 100644 index 0000000000000000000000000000000000000000..2e9998e8d8218f1e868f06ba0db3e13b4620eed1 Binary files /dev/null and b/doc/fonts/tamil.ttf differ diff --git a/doc/fonts/telugu.ttf b/doc/fonts/telugu.ttf new file mode 100644 index 0000000000000000000000000000000000000000..12c91e41973a4704f52984e2089fdb2eaf1ed4a5 Binary files /dev/null and b/doc/fonts/telugu.ttf differ diff --git a/doc/fonts/urdu.ttf b/doc/fonts/urdu.ttf new file mode 100644 index 0000000000000000000000000000000000000000..625feee2e9616809c13e17eeb7da1aec58988b65 Binary files /dev/null and b/doc/fonts/urdu.ttf differ diff --git a/doc/fonts/uyghur.ttf b/doc/fonts/uyghur.ttf new file mode 100644 index 0000000000000000000000000000000000000000..625feee2e9616809c13e17eeb7da1aec58988b65 Binary files /dev/null and b/doc/fonts/uyghur.ttf differ diff --git a/doc/joinus.PNG b/doc/joinus.PNG index ee505a0c78b8625b0faadc9a6bc28980440158e5..22258be049c275567953018f26f6d3949297e72e 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py index 231cee196fbcfb9e960b34587ec6044ead51a005..7cb50d7a62aa3f24811e517768e0635ac7b7321a 100644 --- a/ppocr/data/__init__.py +++ b/ppocr/data/__init__.py @@ -65,6 +65,7 @@ def build_dataloader(config, mode, device, logger, seed=None): loader_config = config[mode]['loader'] batch_size = loader_config['batch_size_per_card'] drop_last = loader_config['drop_last'] + shuffle = loader_config['shuffle'] num_workers = loader_config['num_workers'] if 'use_shared_memory' in loader_config.keys(): use_shared_memory = loader_config['use_shared_memory'] @@ -75,14 +76,14 @@ def build_dataloader(config, mode, device, logger, seed=None): batch_sampler = DistributedBatchSampler( dataset=dataset, batch_size=batch_size, - shuffle=False, + shuffle=shuffle, drop_last=drop_last) else: #Distribute data to single card batch_sampler = BatchSampler( dataset=dataset, batch_size=batch_size, - shuffle=False, + shuffle=shuffle, drop_last=drop_last) data_loader = DataLoader( diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 986cec3dba2a30bf9698cbd22b6a46c5279432ae..61c0c196b3a48911707dc5210a410145ec93a76d 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -18,6 +18,7 @@ from __future__ import print_function from __future__ import unicode_literals import numpy as np +import string class ClsLabelEncode(object): @@ -92,7 +93,10 @@ class BaseRecLabelEncode(object): character_type='ch', use_space_char=False): support_character_type = [ - 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean' + 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', + 'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', + 'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', + 'mr', 'ne' ] assert character_type in support_character_type, "Only {} are supported now but get {}".format( support_character_type, character_type) @@ -103,9 +107,14 @@ class BaseRecLabelEncode(object): if character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type in ["ch", "french", "german", "japan", "korean"]: + elif character_type == "EN_symbol": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + elif character_type in support_character_type: self.character_str = "" - assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch" + assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format( + character_type) with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: @@ -114,11 +123,6 @@ class BaseRecLabelEncode(object): if use_space_char: self.character_str += " " dict_character = list(self.character_str) - elif character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - import string - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) self.character_type = character_type dict_character = self.add_special_char(dict_character) self.dict = {} diff --git a/ppocr/modeling/backbones/det_mobilenet_v3.py b/ppocr/modeling/backbones/det_mobilenet_v3.py index f97bcfca472310cfd681025595b7bce57d1ccf85..bb451bbec9327e2624ab0d501a7adf4355dc3407 100755 --- a/ppocr/modeling/backbones/det_mobilenet_v3.py +++ b/ppocr/modeling/backbones/det_mobilenet_v3.py @@ -58,15 +58,15 @@ class MobileNetV3(nn.Layer): [5, 72, 40, True, 'relu', 2], [5, 120, 40, True, 'relu', 1], [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hard_swish', 2], - [3, 200, 80, False, 'hard_swish', 1], - [3, 184, 80, False, 'hard_swish', 1], - [3, 184, 80, False, 'hard_swish', 1], - [3, 480, 112, True, 'hard_swish', 1], - [3, 672, 112, True, 'hard_swish', 1], - [5, 672, 160, True, 'hard_swish', 2], - [5, 960, 160, True, 'hard_swish', 1], - [5, 960, 160, True, 'hard_swish', 1], + [3, 240, 80, False, 'hardswish', 2], + [3, 200, 80, False, 'hardswish', 1], + [3, 184, 80, False, 'hardswish', 1], + [3, 184, 80, False, 'hardswish', 1], + [3, 480, 112, True, 'hardswish', 1], + [3, 672, 112, True, 'hardswish', 1], + [5, 672, 160, True, 'hardswish', 2], + [5, 960, 160, True, 'hardswish', 1], + [5, 960, 160, True, 'hardswish', 1], ] cls_ch_squeeze = 960 elif model_name == "small": @@ -75,14 +75,14 @@ class MobileNetV3(nn.Layer): [3, 16, 16, True, 'relu', 2], [3, 72, 24, False, 'relu', 2], [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hard_swish', 2], - [5, 240, 40, True, 'hard_swish', 1], - [5, 240, 40, True, 'hard_swish', 1], - [5, 120, 48, True, 'hard_swish', 1], - [5, 144, 48, True, 'hard_swish', 1], - [5, 288, 96, True, 'hard_swish', 2], - [5, 576, 96, True, 'hard_swish', 1], - [5, 576, 96, True, 'hard_swish', 1], + [5, 96, 40, True, 'hardswish', 2], + [5, 240, 40, True, 'hardswish', 1], + [5, 240, 40, True, 'hardswish', 1], + [5, 120, 48, True, 'hardswish', 1], + [5, 144, 48, True, 'hardswish', 1], + [5, 288, 96, True, 'hardswish', 2], + [5, 576, 96, True, 'hardswish', 1], + [5, 576, 96, True, 'hardswish', 1], ] cls_ch_squeeze = 576 else: @@ -102,7 +102,7 @@ class MobileNetV3(nn.Layer): padding=1, groups=1, if_act=True, - act='hard_swish', + act='hardswish', name='conv1') self.stages = [] @@ -112,7 +112,8 @@ class MobileNetV3(nn.Layer): inplanes = make_divisible(inplanes * scale) for (k, exp, c, se, nl, s) in cfg: se = se and not self.disable_se - if s == 2 and i > 2: + start_idx = 2 if model_name == 'large' else 0 + if s == 2 and i > start_idx: self.out_channels.append(inplanes) self.stages.append(nn.Sequential(*block_list)) block_list = [] @@ -137,7 +138,7 @@ class MobileNetV3(nn.Layer): padding=0, groups=1, if_act=True, - act='hard_swish', + act='hardswish', name='conv_last')) self.stages.append(nn.Sequential(*block_list)) self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) @@ -191,10 +192,11 @@ class ConvBNLayer(nn.Layer): if self.if_act: if self.act == "relu": x = F.relu(x) - elif self.act == "hard_swish": - x = F.activation.hard_swish(x) + elif self.act == "hardswish": + x = F.hardswish(x) else: - print("The activation function is selected incorrectly.") + print("The activation function({}) is selected incorrectly.". + format(self.act)) exit() return x @@ -281,5 +283,5 @@ class SEModule(nn.Layer): outputs = self.conv1(outputs) outputs = F.relu(outputs) outputs = self.conv2(outputs) - outputs = F.activation.hard_sigmoid(outputs) + outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) return inputs * outputs diff --git a/ppocr/modeling/backbones/rec_mobilenet_v3.py b/ppocr/modeling/backbones/rec_mobilenet_v3.py index bdf4a616d2cf03275ba311cff625a78d0140e442..1ff17159680372b00e6943e180e5fb638b39ec58 100644 --- a/ppocr/modeling/backbones/rec_mobilenet_v3.py +++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py @@ -51,15 +51,15 @@ class MobileNetV3(nn.Layer): [5, 72, 40, True, 'relu', (large_stride[2], 1)], [5, 120, 40, True, 'relu', 1], [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hard_swish', 1], - [3, 200, 80, False, 'hard_swish', 1], - [3, 184, 80, False, 'hard_swish', 1], - [3, 184, 80, False, 'hard_swish', 1], - [3, 480, 112, True, 'hard_swish', 1], - [3, 672, 112, True, 'hard_swish', 1], - [5, 672, 160, True, 'hard_swish', (large_stride[3], 1)], - [5, 960, 160, True, 'hard_swish', 1], - [5, 960, 160, True, 'hard_swish', 1], + [3, 240, 80, False, 'hardswish', 1], + [3, 200, 80, False, 'hardswish', 1], + [3, 184, 80, False, 'hardswish', 1], + [3, 184, 80, False, 'hardswish', 1], + [3, 480, 112, True, 'hardswish', 1], + [3, 672, 112, True, 'hardswish', 1], + [5, 672, 160, True, 'hardswish', (large_stride[3], 1)], + [5, 960, 160, True, 'hardswish', 1], + [5, 960, 160, True, 'hardswish', 1], ] cls_ch_squeeze = 960 elif model_name == "small": @@ -68,14 +68,14 @@ class MobileNetV3(nn.Layer): [3, 16, 16, True, 'relu', (small_stride[0], 1)], [3, 72, 24, False, 'relu', (small_stride[1], 1)], [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hard_swish', (small_stride[2], 1)], - [5, 240, 40, True, 'hard_swish', 1], - [5, 240, 40, True, 'hard_swish', 1], - [5, 120, 48, True, 'hard_swish', 1], - [5, 144, 48, True, 'hard_swish', 1], - [5, 288, 96, True, 'hard_swish', (small_stride[3], 1)], - [5, 576, 96, True, 'hard_swish', 1], - [5, 576, 96, True, 'hard_swish', 1], + [5, 96, 40, True, 'hardswish', (small_stride[2], 1)], + [5, 240, 40, True, 'hardswish', 1], + [5, 240, 40, True, 'hardswish', 1], + [5, 120, 48, True, 'hardswish', 1], + [5, 144, 48, True, 'hardswish', 1], + [5, 288, 96, True, 'hardswish', (small_stride[3], 1)], + [5, 576, 96, True, 'hardswish', 1], + [5, 576, 96, True, 'hardswish', 1], ] cls_ch_squeeze = 576 else: @@ -96,7 +96,7 @@ class MobileNetV3(nn.Layer): padding=1, groups=1, if_act=True, - act='hard_swish', + act='hardswish', name='conv1') i = 0 block_list = [] @@ -124,7 +124,7 @@ class MobileNetV3(nn.Layer): padding=0, groups=1, if_act=True, - act='hard_swish', + act='hardswish', name='conv_last') self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 656b9f7db8bcdfa00ce75739fd693e544aae322f..76a700e1599b143288814778dcc948126a98151d 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import numpy as np +import string import paddle from paddle.nn import functional as F @@ -24,9 +25,10 @@ class BaseRecLabelDecode(object): character_type='ch', use_space_char=False): support_character_type = [ - 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', 'it', - 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'rsc', 'bg', - 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', 'ne' + 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', + 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', + 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', + 'ne', 'EN' ] assert character_type in support_character_type, "Only {} are supported now but get {}".format( support_character_type, character_type) @@ -37,9 +39,14 @@ class BaseRecLabelDecode(object): if character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type in ["ch", "french", "german", "japan", "korean"]: + elif character_type == "EN_symbol": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + elif character_type in support_character_type: self.character_str = "" - assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch" + assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format( + character_type) with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: @@ -48,11 +55,7 @@ class BaseRecLabelDecode(object): if use_space_char: self.character_str += " " dict_character = list(self.character_str) - elif character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - import string - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) + else: raise NotImplementedError self.character_type = character_type diff --git a/tools/export_model.py b/tools/export_model.py index 58dc0defa8e481a17b1feea836d9175349831e73..1e9526e03d6b9001249d5891c37bee071c1f36a3 100755 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -75,10 +75,17 @@ def main(): ] ] model = to_static(model, input_spec=other_shape) - else: - infer_shape = [3, 32, 100] if config['Architecture'][ - 'model_type'] != "det" else [3, 640, 640] + infer_shape = [3, -1, -1] + if config['Architecture']['model_type'] == "rec": + infer_shape = [3, 32, -1] # for rec model, H must be 32 + if 'Transform' in config['Architecture'] and config['Architecture'][ + 'Transform'] is not None and config['Architecture'][ + 'Transform']['name'] == 'TPS': + logger.info( + 'When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training' + ) + infer_shape[-1] = 100 model = to_static( model, input_spec=[ diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 966fa3cc4c8c4e721fa83e440c9c6181937c7e96..4171a29bdd4194813638b72f0aae015da48fbcb1 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -70,7 +70,7 @@ def parse_args(): default="./ppocr/utils/ppocr_keys_v1.txt") parser.add_argument("--use_space_char", type=str2bool, default=True) parser.add_argument( - "--vis_font_path", type=str, default="./doc/simfang.ttf") + "--vis_font_path", type=str, default="./doc/fonts/simfang.ttf") parser.add_argument("--drop_score", type=float, default=0.5) # params for text classifier diff --git a/tools/program.py b/tools/program.py index 8ab17ee1bf0584ffb9ef10c9721bf9bcdc0148ef..694d64152f05ffd5e9329885149891f75a98ed84 100755 --- a/tools/program.py +++ b/tools/program.py @@ -218,7 +218,7 @@ def train(config, stats['lr'] = lr train_stats.update(stats) - if cal_metric_during_train: # onlt rec and cls need + if cal_metric_during_train: # only rec and cls need batch = [item.numpy() for item in batch] post_result = post_process_class(preds, batch[1]) eval_class(post_result, batch) @@ -253,19 +253,19 @@ def train(config, Model_Average.apply() cur_metirc = eval(model, valid_dataloader, post_process_class, eval_class) - cur_metirc_str = 'cur metirc, {}'.format(', '.join( - ['{}: {}'.format(k, v) for k, v in cur_metirc.items()])) - logger.info(cur_metirc_str) + cur_metric_str = 'cur metric, {}'.format(', '.join( + ['{}: {}'.format(k, v) for k, v in cur_metric.items()])) + logger.info(cur_metric_str) # logger metric if vdl_writer is not None: - for k, v in cur_metirc.items(): + for k, v in cur_metric.items(): if isinstance(v, (float, int)): vdl_writer.add_scalar('EVAL/{}'.format(k), - cur_metirc[k], global_step) - if cur_metirc[main_indicator] >= best_model_dict[ + cur_metric[k], global_step) + if cur_metric[main_indicator] >= best_model_dict[ main_indicator]: - best_model_dict.update(cur_metirc) + best_model_dict.update(cur_metric) best_model_dict['best_epoch'] = epoch save_model( model, @@ -276,7 +276,7 @@ def train(config, prefix='best_accuracy', best_model_dict=best_model_dict, epoch=epoch) - best_str = 'best metirc, {}'.format(', '.join([ + best_str = 'best metric, {}'.format(', '.join([ '{}: {}'.format(k, v) for k, v in best_model_dict.items() ])) logger.info(best_str) @@ -308,7 +308,7 @@ def train(config, prefix='iter_epoch_{}'.format(epoch), best_model_dict=best_model_dict, epoch=epoch) - best_str = 'best metirc, {}'.format(', '.join( + best_str = 'best metric, {}'.format(', '.join( ['{}: {}'.format(k, v) for k, v in best_model_dict.items()])) logger.info(best_str) if dist.get_rank() == 0 and vdl_writer is not None: @@ -338,13 +338,13 @@ def eval(model, valid_dataloader, post_process_class, eval_class): eval_class(post_result, batch) pbar.update(1) total_frame += len(images) - # Get final metirc,eg. acc or hmean - metirc = eval_class.get_metric() + # Get final metric,eg. acc or hmean + metric = eval_class.get_metric() pbar.close() model.train() - metirc['fps'] = total_frame / total_time - return metirc + metric['fps'] = total_frame / total_time + return metric def preprocess(is_train=False): diff --git a/train.sh b/train.sh index c511c51600cc2d939f0bc8c7f52a3f3c6ce52d58..8fe861a3d79d38929fc4a4f4464187f77d27ff2f 100644 --- a/train.sh +++ b/train.sh @@ -1,5 +1,2 @@ -# for paddle.__version__ >= 2.0rc1 +# recommended paddle.__version__ == 2.0.0 python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml - -# for paddle.__version__ < 2.0rc1 -# python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml