diff --git a/README_ch.md b/README_ch.md index 0430fe759f62155ad97d73db06445bbfe551c181..619e5a5c25917b407f035fe8c306de7f7d9541fb 100755 --- a/README_ch.md +++ b/README_ch.md @@ -8,7 +8,6 @@ PaddleOCR同时支持动态图与静态图两种编程范式 - 静态图版本:develop分支 **近期更新** -- 【预告】 PaddleOCR研发团队对最新发版内容技术深入解读,4月13日晚上19:00,[直播地址](https://live.bilibili.com/21689802) - 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。 - 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。 - 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048) @@ -80,7 +79,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 - 算法介绍 - [文本检测](./doc/doc_ch/algorithm_overview.md) - [文本识别](./doc/doc_ch/algorithm_overview.md) - - [PP-OCR Pipline](#PP-OCR) + - [PP-OCR Pipeline](#PP-OCR) - [端到端PGNet算法](./doc/doc_ch/pgnet.md) - 模型训练/评估 - [文本检测](./doc/doc_ch/detection.md) @@ -115,7 +114,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 -## PP-OCR Pipline +## PP-OCR Pipeline
diff --git a/deploy/cpp_infer/include/config.h b/deploy/cpp_infer/include/config.h index dbfbc2df141042f1065b380010e1ea3ff3ccedab..cd02a997e304850ebc04ce2288f4e497dbb4be4a 100644 --- a/deploy/cpp_infer/include/config.h +++ b/deploy/cpp_infer/include/config.h @@ -49,6 +49,8 @@ public: this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]); + this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"])); + this->det_model_dir.assign(config_map_["det_model_dir"]); this->rec_model_dir.assign(config_map_["rec_model_dir"]); @@ -86,6 +88,8 @@ public: double det_db_unclip_ratio = 2.0; + bool use_polygon_score = false; + std::string det_model_dir; std::string rec_model_dir; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index bab9c95fa4a3f1cb160ccbf9ca4587fa4c2ba16a..18318c9c4e37136db62c1338db1b58f82859f037 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -44,7 +44,8 @@ public: const bool &use_mkldnn, const int &max_side_len, const double &det_db_thresh, const double &det_db_box_thresh, - const double &det_db_unclip_ratio, const bool &visualize, + const double &det_db_unclip_ratio, + const bool &use_polygon_score, const bool &visualize, const bool &use_tensorrt, const bool &use_fp16) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; @@ -57,6 +58,7 @@ public: this->det_db_thresh_ = det_db_thresh; this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_unclip_ratio_ = det_db_unclip_ratio; + this->use_polygon_score_ = use_polygon_score; this->visualize_ = visualize; this->use_tensorrt_ = use_tensorrt; @@ -85,6 +87,7 @@ private: double det_db_thresh_ = 0.3; double det_db_box_thresh_ = 0.5; double det_db_unclip_ratio_ = 2.0; + bool use_polygon_score_ = false; bool visualize_ = true; bool use_tensorrt_ = false; diff --git a/deploy/cpp_infer/include/postprocess_op.h b/deploy/cpp_infer/include/postprocess_op.h index a600ea6d106706af2fbadb249c862fc764714f9e..b384b79b3041bfcb96f042c6450d3c6e54f00498 100644 --- a/deploy/cpp_infer/include/postprocess_op.h +++ b/deploy/cpp_infer/include/postprocess_op.h @@ -55,7 +55,8 @@ public: std::vector>> BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, - const float &box_thresh, const float &det_db_unclip_ratio); + const float &box_thresh, const float &det_db_unclip_ratio, + const bool &use_polygon_score); std::vector>> FilterTagDetRes(std::vector>> boxes, diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index b62b1a4cf10b5abd345528ebf48e6a8de4387469..ee5a9ed4b9aa16b76836dc01096ae132fead56dd 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -183,7 +183,7 @@ cmake .. \ make -j ``` -`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中;为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。 +`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。 * 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。 @@ -211,6 +211,7 @@ max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使 det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显 det_db_box_thresh 0.5 # DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小 det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本 +use_polygon_score 1 # 是否使用多边形框计算bbox score,0表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。 det_model_dir ./inference/det_db # 检测模型inference model地址 # cls config diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index cfda7ca05d49b9ba2534aa6bfc797425bb0dc6c5..913ba1f91668d682c7c3fa614f8997293d52db89 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -217,6 +217,7 @@ max_side_len 960 # Limit the maximum image height and width to 960 det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text +use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area. det_model_dir ./inference/det_db # Address of detection inference model # cls config diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 5c9042d4e80c569cfd4a57a84ad5c594e69dc350..588c8374ab341163835aea2ba6c7132640c74c64 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -59,7 +59,8 @@ int main(int argc, char **argv) { config.gpu_mem, config.cpu_math_library_num_threads, config.use_mkldnn, config.max_side_len, config.det_db_thresh, config.det_db_box_thresh, config.det_db_unclip_ratio, - config.visualize, config.use_tensorrt, config.use_fp16); + config.use_polygon_score, config.visualize, + config.use_tensorrt, config.use_fp16); Classifier *cls = nullptr; if (config.use_angle_cls == true) { diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 489940f062fa9f8093282d20441704dd5cb8b382..9bfee6138577288156496d9b533b4da906ae7268 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -109,9 +109,9 @@ void DBDetector::Run(cv::Mat &img, cv::Mat dilation_map; cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::dilate(bit_map, dilation_map, dila_ele); - boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map, - this->det_db_box_thresh_, - this->det_db_unclip_ratio_); + boxes = post_processor_.BoxesFromBitmap( + pred_map, dilation_map, this->det_db_box_thresh_, + this->det_db_unclip_ratio_, this->use_polygon_score_); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); diff --git a/deploy/cpp_infer/src/postprocess_op.cpp b/deploy/cpp_infer/src/postprocess_op.cpp index 1b71c210497778fcb70ffe8630e29245ad00136d..e7db70f3bff81390728c6b373b89cf06c74e4eca 100644 --- a/deploy/cpp_infer/src/postprocess_op.cpp +++ b/deploy/cpp_infer/src/postprocess_op.cpp @@ -160,35 +160,49 @@ std::vector> PostProcessor::GetMiniBoxes(cv::RotatedRect box, } float PostProcessor::PolygonScoreAcc(std::vector contour, - cv::Mat pred){ + cv::Mat pred) { int width = pred.cols; int height = pred.rows; std::vector box_x; std::vector box_y; - for(int i=0; i> box_array, return score; } -std::vector>> -PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, - const float &box_thresh, - const float &det_db_unclip_ratio) { +std::vector>> PostProcessor::BoxesFromBitmap( + const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, + const float &det_db_unclip_ratio, const bool &use_polygon_score) { const int min_size = 3; const int max_candidates = 1000; @@ -267,9 +280,12 @@ PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, } float score; - score = BoxScoreFast(array, pred); - /* compute using polygon*/ - // score = PolygonScoreAcc(contours[_i], pred); + if (use_polygon_score) + /* compute using polygon*/ + score = PolygonScoreAcc(contours[_i], pred); + else + score = BoxScoreFast(array, pred); + if (score < box_thresh) continue; diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt index 24e4ef0de7d844ba4bd6c11f2cba08766c0e5ddf..0e5f8472ab62f5fc646738bc2974736a0564b343 100644 --- a/deploy/cpp_infer/tools/config.txt +++ b/deploy/cpp_infer/tools/config.txt @@ -10,6 +10,7 @@ max_side_len 960 det_db_thresh 0.3 det_db_box_thresh 0.5 det_db_unclip_ratio 1.6 +use_polygon_score 1 det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/ # cls config diff --git a/doc/doc_ch/multi_languages.md b/doc/doc_ch/multi_languages.md index 741602e3c26725304c8a5e8300969fbea6ece4d0..306eba36e463cb4aef20a1d8ff895ecfcc77d0ef 100644 --- a/doc/doc_ch/multi_languages.md +++ b/doc/doc_ch/multi_languages.md @@ -11,7 +11,7 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅 其中英文模型支持,大小写字母和常见标点的检测识别,并优化了空格字符的识别:
- +
小语种模型覆盖了拉丁语系、阿拉伯语系、中文繁体、韩语、日语等等: @@ -19,6 +19,8 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅
+ +
@@ -30,14 +32,9 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅 - [2 快速使用](#快速使用) - [2.1 命令行运行](#命令行运行) - - [2.1.1 整图预测](#bash_检测+识别) - - [2.1.2 识别预测](#bash_识别) - - [2.1.3 检测预测](#bash_检测) - [2.2 python 脚本运行](#python_脚本运行) - - [2.2.1 整图预测](#python_检测+识别) - - [2.2.2 识别预测](#python_识别) - - [2.2.3 检测预测](#python_检测) - [3 自定义训练](#自定义训练) +- [4 预测部署](#预测部署) - [4 支持语种及缩写](#语种缩写) @@ -50,7 +47,7 @@ PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅 pip install paddlepaddle # gpu -pip instll paddlepaddle-gpu +pip install paddlepaddle-gpu ``` @@ -108,8 +105,6 @@ paddleocr --image_dir doc/imgs/japan_2.jpg --lang=japan paddleocr --image_dir doc/imgs_words/japan/1.jpg --det false --lang=japan ``` -![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_words/japan/1.jpg) - 结果是一个tuple,返回识别结果和识别置信度 ```text @@ -145,6 +140,9 @@ from paddleocr import PaddleOCR, draw_ocr ocr = PaddleOCR(lang="korean") # 首次执行会自动下载模型文件 img_path = 'doc/imgs/korean_1.jpg ' result = ocr.ocr(img_path) +# 可通过参数控制单独执行识别、检测 +# result = ocr.ocr(img_path, det=False) 只执行识别 +# result = ocr.ocr(img_path, rec=False) 只执行检测 # 打印检测框和识别结果 for line in result: print(line) @@ -166,59 +164,7 @@ im_show.save('result.jpg') -* 识别预测 - -``` -from paddleocr import PaddleOCR -ocr = PaddleOCR(lang="german") -img_path = 'PaddleOCR/doc/imgs_words/german/1.jpg' -result = ocr.ocr(img_path, det=False, cls=True) -for line in result: - print(line) -``` - - -![](../imgs_words/german/1.jpg) - -结果是一个tuple,只包含识别结果和识别置信度 - -``` -('leider auch jetzt', 0.97538936) -``` - -* 检测预测 - -```python -from paddleocr import PaddleOCR, draw_ocr -ocr = PaddleOCR() # need to run only once to download and load model into memory -img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' -result = ocr.ocr(img_path, rec=False) -for line in result: - print(line) - -# 显示结果 -from PIL import Image - -image = Image.open(img_path).convert('RGB') -im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf') -im_show = Image.fromarray(im_show) -im_show.save('result.jpg') -``` -结果是一个list,每个item只包含文本框 -```bash -[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] -[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] -[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] -...... -``` - -结果可视化 : - -
- -
- -ppocr 还支持方向分类, 更多使用方式请参考:[whl包使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md)。 +ppocr 还支持方向分类, 更多使用方式请参考:[whl包使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md) ## 3 自定义训练 @@ -229,84 +175,58 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别 具体数据准备、训练过程可参考:[文本检测](../doc_ch/detection.md)、[文本识别](../doc_ch/recognition.md),更多功能如预测部署、 数据标注等功能可以阅读完整的[文档教程](../../README_ch.md)。 + +## 4 预测部署 + +除了安装whl包进行快速预测,ppocr 也提供了多种预测部署方式,如有需求可阅读相关文档: +- [基于Python脚本预测引擎推理](./inference.md) +- [基于C++预测引擎推理](../../deploy/cpp_infer/readme.md) +- [服务化部署](../../deploy/hubserving/readme.md) +- [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md) +- [Benchmark](./benchmark.md) + + + -## 4 支持语种及缩写 - -| 语种 | 描述 | 缩写 | -| --- | --- | --- | -|中文|chinese and english|ch| -|英文|english|en| -|法文|french|fr| -|德文|german|german| -|日文|japan|japan| -|韩文|korean|korean| -|中文繁体|chinese traditional |chinese_cht| -|意大利文| Italian |it| -|西班牙文|Spanish |es| -|葡萄牙文| Portuguese|pt| -|俄罗斯文|Russia|ru| -|阿拉伯文|Arabic|ar| -|印地文|Hindi|hi| -|维吾尔|Uyghur|ug| -|波斯文|Persian|fa| -|乌尔都文|Urdu|ur| -|塞尔维亚文(latin)| Serbian(latin) |rs_latin| -|欧西坦文|Occitan |oc| -|马拉地文|Marathi|mr| -|尼泊尔文|Nepali|ne| -|塞尔维亚文(cyrillic)|Serbian(cyrillic)|rs_cyrillic| -|保加利亚文|Bulgarian |bg| -|乌克兰文|Ukranian|uk| -|白俄罗斯文|Belarusian|be| -|泰卢固文|Telugu |te| -|泰米尔文|Tamil |ta| -|南非荷兰文 |Afrikaans |af| -|阿塞拜疆文 |Azerbaijani |az| -|波斯尼亚文|Bosnian|bs| -|捷克文|Czech|cs| -|威尔士文 |Welsh |cy| -|丹麦文 |Danish|da| -|爱沙尼亚文 |Estonian |et| -|爱尔兰文 |Irish |ga| -|克罗地亚文|Croatian |hr| -|匈牙利文|Hungarian |hu| -|印尼文|Indonesian|id| -|冰岛文 |Icelandic|is| -|库尔德文 |Kurdish|ku| -|立陶宛文|Lithuanian |lt| -|拉脱维亚文 |Latvian |lv| -|毛利文|Maori|mi| -|马来文 |Malay|ms| -|马耳他文 |Maltese |mt| -|荷兰文 |Dutch |nl| -|挪威文 |Norwegian |no| -|波兰文|Polish |pl| -| 罗马尼亚文|Romanian |ro| -| 斯洛伐克文|Slovak |sk| -| 斯洛文尼亚文|Slovenian |sl| -| 阿尔巴尼亚文|Albanian |sq| -| 瑞典文|Swedish |sv| -| 西瓦希里文|Swahili |sw| -| 塔加洛文|Tagalog |tl| -| 土耳其文|Turkish |tr| -| 乌兹别克文|Uzbek |uz| -| 越南文|Vietnamese |vi| -| 蒙古文|Mongolian |mn| -| 阿巴扎文|Abaza |abq| -| 阿迪赫文|Adyghe |ady| -| 卡巴丹文|Kabardian |kbd| -| 阿瓦尔文|Avar |ava| -| 达尔瓦文|Dargwa |dar| -| 因古什文|Ingush |inh| -| 拉克文|Lak |lbe| -| 莱兹甘文|Lezghian |lez| -|塔巴萨兰文 |Tabassaran |tab| -| 比尔哈文|Bihari |bh| -| 迈蒂利文|Maithili |mai| -| 昂加文|Angika |ang| -| 孟加拉文|Bhojpuri |bho| -| 摩揭陀文 |Magahi |mah| -| 那格浦尔文|Nagpur |sck| -| 尼瓦尔文|Newari |new| -| 保加利亚文 |Goan Konkani|gom| -| 沙特阿拉伯文|Saudi Arabia|sa| +## 5 支持语种及缩写 + +| 语种 | 描述 | 缩写 | | 语种 | 描述 | 缩写 | +| --- | --- | --- | ---|--- | --- | --- | +|中文|chinese and english|ch| |保加利亚文|Bulgarian |bg| +|英文|english|en| |乌克兰文|Ukranian|uk| +|法文|french|fr| |白俄罗斯文|Belarusian|be| +|德文|german|german| |泰卢固文|Telugu |te| +|日文|japan|japan| | |阿巴扎文|Abaza |abq| +|韩文|korean|korean| |泰米尔文|Tamil |ta| +|中文繁体|chinese traditional |ch_tra| |南非荷兰文 |Afrikaans |af| +|意大利文| Italian |it| |阿塞拜疆文 |Azerbaijani |az| +|西班牙文|Spanish |es| |波斯尼亚文|Bosnian|bs| +|葡萄牙文| Portuguese|pt| |捷克文|Czech|cs| +|俄罗斯文|Russia|ru| |威尔士文 |Welsh |cy| +|阿拉伯文|Arabic|ar| |丹麦文 |Danish|da| +|印地文|Hindi|hi| |爱沙尼亚文 |Estonian |et| +|维吾尔|Uyghur|ug| |爱尔兰文 |Irish |ga| +|波斯文|Persian|fa| |克罗地亚文|Croatian |hr| +|乌尔都文|Urdu|ur| |匈牙利文|Hungarian |hu| +|塞尔维亚文(latin)| Serbian(latin) |rs_latin| |印尼文|Indonesian|id| +|欧西坦文|Occitan |oc| |冰岛文 |Icelandic|is| +|马拉地文|Marathi|mr| |库尔德文 |Kurdish|ku| +|尼泊尔文|Nepali|ne| |立陶宛文|Lithuanian |lt| +|塞尔维亚文(cyrillic)|Serbian(cyrillic)|rs_cyrillic| |拉脱维亚文 |Latvian |lv| +|毛利文|Maori|mi| | 达尔瓦文|Dargwa |dar| +|马来文 |Malay|ms| | 因古什文|Ingush |inh| +|马耳他文 |Maltese |mt| | 拉克文|Lak |lbe| +|荷兰文 |Dutch |nl| | 莱兹甘文|Lezghian |lez| +|挪威文 |Norwegian |no| |塔巴萨兰文 |Tabassaran |tab| +|波兰文|Polish |pl| | 比尔哈文|Bihari |bh| +| 罗马尼亚文|Romanian |ro| | 迈蒂利文|Maithili |mai| +| 斯洛伐克文|Slovak |sk| | 昂加文|Angika |ang| +| 斯洛文尼亚文|Slovenian |sl| | 孟加拉文|Bhojpuri |bho| +| 阿尔巴尼亚文|Albanian |sq| | 摩揭陀文 |Magahi |mah| +| 瑞典文|Swedish |sv| | 那格浦尔文|Nagpur |sck| +| 西瓦希里文|Swahili |sw| | 尼瓦尔文|Newari |new| +| 塔加洛文|Tagalog |tl| | 保加利亚文 |Goan Konkani|gom| +| 土耳其文|Turkish |tr| | 沙特阿拉伯文|Saudi Arabia|sa| +| 乌兹别克文|Uzbek |uz| | 阿瓦尔文|Avar |ava| +| 越南文|Vietnamese |vi| | 阿瓦尔文|Avar |ava| +| 蒙古文|Mongolian |mn| | 阿迪赫文|Adyghe |ady| diff --git a/doc/doc_en/multi_languages_en.md b/doc/doc_en/multi_languages_en.md index f801db5067e70e174491f41bc6ac5f9764364a0f..e58b782ca18d55dbd954382fd0df6f53910e2e52 100644 --- a/doc/doc_en/multi_languages_en.md +++ b/doc/doc_en/multi_languages_en.md @@ -13,7 +13,7 @@ Among them, the English model supports the detection and recognition of uppercas letters and common punctuation, and the recognition of space characters is optimized:
- +
The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japanese, etc.: @@ -21,6 +21,8 @@ The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japane
+ +
This document will briefly introduce how to use the multilingual model. @@ -31,14 +33,9 @@ This document will briefly introduce how to use the multilingual model. - [2 Quick Use](#Quick_Use) - [2.1 Command line operation](#Command_line_operation) - - [2.1.1 Prediction of the whole image](#bash_detection+recognition) - - [2.1.2 Recognition](#bash_Recognition) - - [2.1.3 Detection](#bash_detection) - [2.2 python script running](#python_Script_running) - - [2.2.1 Whole image prediction](#python_detection+recognition) - - [2.2.2 Recognition](#python_Recognition) - - [2.2.3 Detection](#python_detection) - [3 Custom Training](#Custom_Training) +- [4 Inference and Deployment](#inference) - [4 Supported languages and abbreviations](#language_abbreviations) @@ -51,7 +48,7 @@ This document will briefly introduce how to use the multilingual model. pip install paddlepaddle # gpu -pip instll paddlepaddle-gpu +pip install paddlepaddle-gpu ``` @@ -89,7 +86,7 @@ The specific supported [language] (#language_abbreviations) can be viewed in the paddleocr --image_dir doc/imgs/japan_2.jpg --lang=japan ``` -![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs/japan_2.jpg) +![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs/japan_2.jpg) The result is a list, each item contains a text box, text and recognition confidence ```text @@ -106,7 +103,7 @@ The result is a list, each item contains a text box, text and recognition confid paddleocr --image_dir doc/imgs_words/japan/1.jpg --det false --lang=japan ``` -![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_words/japan/1.jpg) +![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_words/japan/1.jpg) The result is a tuple, which returns the recognition result and recognition confidence @@ -143,6 +140,9 @@ from paddleocr import PaddleOCR, draw_ocr ocr = PaddleOCR(lang="korean") # The model file will be downloaded automatically when executed for the first time img_path ='doc/imgs/korean_1.jpg' result = ocr.ocr(img_path) +# Recognition and detection can be performed separately through parameter control +# result = ocr.ocr(img_path, det=False) Only perform recognition +# result = ocr.ocr(img_path, rec=False) Only perform detection # Print detection frame and recognition result for line in result: print(line) @@ -162,54 +162,6 @@ Visualization of results: ![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg) -* Recognition - -``` -from paddleocr import PaddleOCR -ocr = PaddleOCR(lang="german") -img_path ='PaddleOCR/doc/imgs_words/german/1.jpg' -result = ocr.ocr(img_path, det=False, cls=True) -for line in result: - print(line) -``` - -![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_words/german/1.jpg) - -The result is a tuple, which only contains the recognition result and recognition confidence - -``` -('leider auch jetzt', 0.97538936) -``` - -* Detection - -```python -from paddleocr import PaddleOCR, draw_ocr -ocr = PaddleOCR() # need to run only once to download and load model into memory -img_path ='PaddleOCR/doc/imgs_en/img_12.jpg' -result = ocr.ocr(img_path, rec=False) -for line in result: - print(line) - -# show result -from PIL import Image - -image = Image.open(img_path).convert('RGB') -im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf') -im_show = Image.fromarray(im_show) -im_show.save('result.jpg') -``` -The result is a list, each item contains only text boxes -```bash -[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] -[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] -[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] -...... -``` - -Visualization of results: -![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/whl/12_det.jpg) - ppocr also supports direction classification. For more usage methods, please refer to: [whl package instructions](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md). @@ -221,84 +173,61 @@ Modify the training data path, dictionary and other parameters. For specific data preparation and training process, please refer to: [Text Detection](../doc_en/detection_en.md), [Text Recognition](../doc_en/recognition_en.md), more functions such as predictive deployment, For functions such as data annotation, you can read the complete [Document Tutorial](../../README.md). - -## 4 Support languages and abbreviations - -| Language | Abbreviation | -| --- | --- | -|chinese and english|ch| -|english|en| -|french|fr| -|german|german| -|japan|japan| -|korean|korean| -|chinese traditional |chinese_cht| -| Italian |it| -|Spanish |es| -| Portuguese|pt| -|Russia|ru| -|Arabic|ar| -|Hindi|hi| -|Uyghur|ug| -|Persian|fa| -|Urdu|ur| -| Serbian(latin) |rs_latin| -|Occitan |oc| -|Marathi|mr| -|Nepali|ne| -|Serbian(cyrillic)|rs_cyrillic| -|Bulgarian |bg| -|Ukranian|uk| -|Belarusian|be| -|Telugu |te| -|Tamil |ta| -|Afrikaans |af| -|Azerbaijani |az| -|Bosnian|bs| -|Czech|cs| -|Welsh |cy| -|Danish|da| -|Estonian |et| -|Irish |ga| -|Croatian |hr| -|Hungarian |hu| -|Indonesian|id| -|Icelandic|is| -|Kurdish|ku| -|Lithuanian |lt| - |Latvian |lv| -|Maori|mi| -|Malay|ms| -|Maltese |mt| -|Dutch |nl| -|Norwegian |no| -|Polish |pl| -|Romanian |ro| -|Slovak |sk| -|Slovenian |sl| -|Albanian |sq| -|Swedish |sv| -|Swahili |sw| -|Tagalog |tl| -|Turkish |tr| -|Uzbek |uz| -|Vietnamese |vi| -|Mongolian |mn| -|Abaza |abq| -|Adyghe |ady| -|Kabardian |kbd| -|Avar |ava| -|Dargwa |dar| -|Ingush |inh| -|Lak |lbe| -|Lezghian |lez| -|Tabassaran |tab| -|Bihari |bh| -|Maithili |mai| -|Angika |ang| -|Bhojpuri |bho| -|Magahi |mah| -|Nagpur |sck| -|Newari |new| -|Goan Konkani|gom| -|Saudi Arabia|sa| + + +## 4 Inference and Deployment + +In addition to installing the whl package for quick forecasting, +ppocr also provides a variety of forecasting deployment methods. +If necessary, you can read related documents: + +- [Python Inference](./inference_en.md) +- [C++ Inference](../../deploy/cpp_infer/readme_en.md) +- [Serving](../../deploy/hubserving/readme_en.md) +- [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md) +- [Benchmark](./benchmark_en.md) + + + +## 5 Support languages and abbreviations + +| Language | Abbreviation | | Language | Abbreviation | +| --- | --- | --- | --- | --- | +|chinese and english|ch| |Arabic|ar| +|english|en| |Hindi|hi| +|french|fr| |Uyghur|ug| +|german|german| |Persian|fa| +|japan|japan| |Urdu|ur| +|korean|korean| | Serbian(latin) |rs_latin| +|chinese traditional |ch_tra| |Occitan |oc| +| Italian |it| |Marathi|mr| +|Spanish |es| |Nepali|ne| +| Portuguese|pt| |Serbian(cyrillic)|rs_cyrillic| +|Russia|ru||Bulgarian |bg| +|Ukranian|uk| |Estonian |et| +|Belarusian|be| |Irish |ga| +|Telugu |te| |Croatian |hr| +|Saudi Arabia|sa| |Hungarian |hu| +|Tamil |ta| |Indonesian|id| +|Afrikaans |af| |Icelandic|is| +|Azerbaijani |az||Kurdish|ku| +|Bosnian|bs| |Lithuanian |lt| +|Czech|cs| |Latvian |lv| +|Welsh |cy| |Maori|mi| +|Danish|da| |Malay|ms| +|Maltese |mt| |Adyghe |ady| +|Dutch |nl| |Kabardian |kbd| +|Norwegian |no| |Avar |ava| +|Polish |pl| |Dargwa |dar| +|Romanian |ro| |Ingush |inh| +|Slovak |sk| |Lak |lbe| +|Slovenian |sl| |Lezghian |lez| +|Albanian |sq| |Tabassaran |tab| +|Swedish |sv| |Bihari |bh| +|Swahili |sw| |Maithili |mai| +|Tagalog |tl| |Angika |ang| +|Turkish |tr| |Bhojpuri |bho| +|Uzbek |uz| |Magahi |mah| +|Vietnamese |vi| |Nagpur |sck| +|Mongolian |mn| |Newari |new| +|Abaza |abq| |Goan Konkani|gom| diff --git a/doc/imgs_results/multi_lang/arabic_0.jpg b/doc/imgs_results/multi_lang/arabic_0.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9941b906427b8f08c076ecc47a328780bd857598 Binary files /dev/null and b/doc/imgs_results/multi_lang/arabic_0.jpg differ diff --git a/doc/imgs_results/multi_lang/img_12.jpg b/doc/imgs_results/multi_lang/img_12.jpg new file mode 100644 index 0000000000000000000000000000000000000000..822d562eda747389157b8e49927a1841a193c9e7 Binary files /dev/null and b/doc/imgs_results/multi_lang/img_12.jpg differ diff --git a/doc/imgs_results/multi_lang/korean_0.jpg b/doc/imgs_results/multi_lang/korean_0.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3fe6305aa03edc5d6fe1bc10a140b55be619df72 Binary files /dev/null and b/doc/imgs_results/multi_lang/korean_0.jpg differ diff --git a/ppocr/modeling/backbones/rec_resnet_vd.py b/ppocr/modeling/backbones/rec_resnet_vd.py index 6837ea0fb2da3347fd8e115f859224e2a61fd578..0187deb96f111a2c2b545c7be42dba48c7352e17 100644 --- a/ppocr/modeling/backbones/rec_resnet_vd.py +++ b/ppocr/modeling/backbones/rec_resnet_vd.py @@ -249,7 +249,7 @@ class ResNet(nn.Layer): name=conv_name)) shortcut = True self.block_list.append(bottleneck_block) - self.out_channels = num_filters[block] + self.out_channels = num_filters[block] * 4 else: for block in range(len(depth)): shortcut = False diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index e6bc07aa98ab2991a77811d7ea0f723ed26f75e2..d353391c9af2b85bd01ba659f541fa1791461f68 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -218,6 +218,7 @@ class SRNLabelDecode(BaseRecLabelDecode): **kwargs): super(SRNLabelDecode, self).__init__(character_dict_path, character_type, use_space_char) + self.max_text_length = kwargs.get('max_text_length', 25) def __call__(self, preds, label=None, *args, **kwargs): pred = preds['predict'] @@ -229,9 +230,9 @@ class SRNLabelDecode(BaseRecLabelDecode): preds_idx = np.argmax(pred, axis=1) preds_prob = np.max(pred, axis=1) - preds_idx = np.reshape(preds_idx, [-1, 25]) + preds_idx = np.reshape(preds_idx, [-1, self.max_text_length]) - preds_prob = np.reshape(preds_prob, [-1, 25]) + preds_prob = np.reshape(preds_prob, [-1, self.max_text_length]) text = self.decode(preds_idx, preds_prob) diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 02814d6208aba7ddfa6eac338229502b18b535da..e69b330f0344321d88e7d175ae093cd9e51296aa 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -121,7 +121,7 @@ def init_model(config, model, logger, optimizer=None, lr_scheduler=None): return best_model_dict -def save_model(net, +def save_model(model, optimizer, model_path, logger, @@ -133,7 +133,7 @@ def save_model(net, """ _mkdir_if_not_exist(model_path, logger) model_prefix = os.path.join(model_path, prefix) - paddle.save(net.state_dict(), model_prefix + '.pdparams') + paddle.save(model.state_dict(), model_prefix + '.pdparams') paddle.save(optimizer.state_dict(), model_prefix + '.pdopt') # save metric and config diff --git a/tools/export_model.py b/tools/export_model.py index f587b2bb363e01ab4c0b2429fc95f243085649d1..bdff89f755d465742f1c2a810f8ae76153a558c6 100755 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -53,17 +53,19 @@ def main(): save_path = '{}/inference'.format(config['Global']['save_inference_dir']) if config['Architecture']['algorithm'] == "SRN": + max_text_length = config['Architecture']['Head']['max_text_length'] other_shape = [ paddle.static.InputSpec( shape=[None, 1, 64, 256], dtype='float32'), [ paddle.static.InputSpec( shape=[None, 256, 1], dtype="int64"), paddle.static.InputSpec( - shape=[None, 25, 1], - dtype="int64"), paddle.static.InputSpec( - shape=[None, 8, 25, 25], dtype="int64"), + shape=[None, max_text_length, 1], dtype="int64"), paddle.static.InputSpec( - shape=[None, 8, 25, 25], dtype="int64") + shape=[None, 8, max_text_length, max_text_length], + dtype="int64"), paddle.static.InputSpec( + shape=[None, 8, max_text_length, max_text_length], + dtype="int64") ] ] model = to_static(model, input_spec=other_shape) diff --git a/tools/program.py b/tools/program.py index d4c3583829f5946c73fde06d0838d9b4d9376858..7e54a2f8c2f1db8881aa476a309c8a8c563fcae5 100755 --- a/tools/program.py +++ b/tools/program.py @@ -18,6 +18,7 @@ from __future__ import print_function import os import sys +import platform import yaml import time import shutil @@ -159,6 +160,8 @@ def train(config, eval_batch_step = config['Global']['eval_batch_step'] global_step = 0 + if 'global_step' in pre_best_model_dict: + global_step = pre_best_model_dict['global_step'] start_eval_step = 0 if type(eval_batch_step) == list and len(eval_batch_step) >= 2: start_eval_step = eval_batch_step[0] @@ -196,8 +199,12 @@ def train(config, train_reader_cost = 0.0 batch_sum = 0 batch_start = time.time() - for idx, batch in enumerate(train_dataloader()): + max_iter = len(train_dataloader) - 1 if platform.system( + ) == "Windows" else len(train_dataloader) + for idx, batch in enumerate(train_dataloader): train_reader_cost += time.time() - batch_start + if idx >= max_iter: + break lr = optimizer.get_lr() images = batch[0] if use_srn: @@ -285,7 +292,8 @@ def train(config, is_best=True, prefix='best_accuracy', best_model_dict=best_model_dict, - epoch=epoch) + epoch=epoch, + global_step=global_step) best_str = 'best metric, {}'.format(', '.join([ '{}: {}'.format(k, v) for k, v in best_model_dict.items() ])) @@ -307,7 +315,8 @@ def train(config, is_best=False, prefix='latest', best_model_dict=best_model_dict, - epoch=epoch) + epoch=epoch, + global_step=global_step) if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0: save_model( model, @@ -317,7 +326,8 @@ def train(config, is_best=False, prefix='iter_epoch_{}'.format(epoch), best_model_dict=best_model_dict, - epoch=epoch) + epoch=epoch, + global_step=global_step) best_str = 'best metric, {}'.format(', '.join( ['{}: {}'.format(k, v) for k, v in best_model_dict.items()])) logger.info(best_str) @@ -333,8 +343,10 @@ def eval(model, valid_dataloader, post_process_class, eval_class, total_frame = 0.0 total_time = 0.0 pbar = tqdm(total=len(valid_dataloader), desc='eval model:') + max_iter = len(valid_dataloader) - 1 if platform.system( + ) == "Windows" else len(valid_dataloader) for idx, batch in enumerate(valid_dataloader): - if idx >= len(valid_dataloader): + if idx >= max_iter: break images = batch[0] start = time.time()