diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index c62fe32bc310eb3f91a6c55c3ecf25cfa53c0c61..418879063be54c02048a9473f0ca8847a7589020 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -220,6 +220,7 @@ Specifically, --det=true \ --rec=true \ --cls=true \ + --rec_img_h=48\ ``` ##### 2. det+rec: @@ -231,6 +232,7 @@ Specifically, --det=true \ --rec=true \ --cls=false \ + --rec_img_h=48\ ``` ##### 3. det @@ -250,6 +252,7 @@ Specifically, --det=false \ --rec=true \ --cls=true \ + --rec_img_h=48\ ``` ##### 5. rec @@ -260,6 +263,7 @@ Specifically, --det=false \ --rec=true \ --cls=false \ + --rec_img_h=48\ ``` ##### 6. cls @@ -335,10 +339,10 @@ The detection results will be shown on the screen, which is as follows. ```bash predict img: ../../doc/imgs/12.jpg ../../doc/imgs/12.jpg -0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757 -1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745 -2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956 -3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914 +0 det boxes: [[74,553],[427,542],[428,571],[75,582]] rec text: 打浦路252935号 rec score: 0.947724 +1 det boxes: [[23,507],[513,488],[515,529],[24,548]] rec text: 绿洲仕格维花园公寓 rec score: 0.993728 +2 det boxes: [[187,456],[399,448],[400,480],[188,488]] rec text: 打浦路15号 rec score: 0.964994 +3 det boxes: [[42,413],[483,391],[484,428],[43,450]] rec text: 上海斯格威铂尔大酒店 rec score: 0.980086 The detection visualized image saved in ./output//12.jpg ``` diff --git a/deploy/cpp_infer/readme_ch.md b/deploy/cpp_infer/readme_ch.md index 2a81e15a97cca45d525efe8739255acd12f8117f..cf14a6761300764a3200a7c04607d0b5212e99c7 100644 --- a/deploy/cpp_infer/readme_ch.md +++ b/deploy/cpp_infer/readme_ch.md @@ -229,6 +229,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir --det=true \ --rec=true \ --cls=true \ + --rec_img_h=48\ ``` ##### 2. 检测+识别: @@ -240,6 +241,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir --det=true \ --rec=true \ --cls=false \ + --rec_img_h=48\ ``` ##### 3. 检测: @@ -259,6 +261,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir --det=false \ --rec=true \ --cls=true \ + --rec_img_h=48\ ``` ##### 5. 识别: @@ -269,6 +272,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir --det=false \ --rec=true \ --cls=false \ + --rec_img_h=48\ ``` ##### 6. 分类: @@ -343,10 +347,10 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ```bash predict img: ../../doc/imgs/12.jpg ../../doc/imgs/12.jpg -0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757 -1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745 -2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956 -3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914 +0 det boxes: [[74,553],[427,542],[428,571],[75,582]] rec text: 打浦路252935号 rec score: 0.947724 +1 det boxes: [[23,507],[513,488],[515,529],[24,548]] rec text: 绿洲仕格维花园公寓 rec score: 0.993728 +2 det boxes: [[187,456],[399,448],[400,480],[188,488]] rec text: 打浦路15号 rec score: 0.964994 +3 det boxes: [[42,413],[483,391],[484,428],[43,450]] rec text: 上海斯格威铂尔大酒店 rec score: 0.980086 The detection visualized image saved in ./output//12.jpg ``` diff --git a/doc/doc_ch/inference_ppocr.md b/doc/doc_ch/inference_ppocr.md index 5fb3811eb40addd506dfa37d257c00a0c2a44258..23e9f3b6df2357f165d60600de7c7cae0662bcdc 100644 --- a/doc/doc_ch/inference_ppocr.md +++ b/doc/doc_ch/inference_ppocr.md @@ -19,9 +19,9 @@ ``` # 下载超轻量中文检测模型: -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar -tar xf ch_PP-OCRv2_det_infer.tar -python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +tar xf ch_PP-OCRv3_det_infer.tar +python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" ``` @@ -40,13 +40,13 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_m 如果输入图片的分辨率比较大,而且想使用更大的分辨率预测,可以设置det_limit_side_len 为想要的值,比如1216: ``` -python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --det_limit_type=max --det_limit_side_len=1216 +python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216 ``` 如果想使用CPU进行预测,执行命令如下 ``` -python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --use_gpu=False +python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_gpu=False ``` @@ -63,9 +63,9 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di ``` # 下载超轻量中文识别模型: -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar -tar xf ch_PP-OCRv2_rec_infer.tar -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +tar xf ch_PP-OCRv3_rec_infer.tar +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320 ``` ![](../imgs_words/ch/word_4.jpg) @@ -73,7 +73,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" 执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下: ```bash -Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153) +Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.9956803321838379) ``` @@ -123,13 +123,13 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982] ```shell # 使用方向分类器 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=true +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320 # 不使用方向分类器 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 # 使用多进程 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320 ``` 执行命令后,识别结果图像如下: -![](../imgs_results/system_res_00018069.jpg) +![](../imgs_results/system_res_00018069_v3.jpg) diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index ce0f6b1570f1570f7d12bf1ad24d7d9f9914f5f0..c41186a20693a12a4cdc9c1a537487ebe5c3ae84 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -65,15 +65,13 @@ cd /path/to/ppocr_img * 检测+方向分类器+识别全流程:`--use_angle_cls true`设置使用方向分类器识别180度旋转文字,`--use_gpu false`设置不使用GPU ```bash - paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false + paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false --rec_image_shape 3,48,320 ``` 结果是一个list,每个item包含了文本框,文字和识别置信度 ```bash - [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] - [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] - [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] + [[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)] ...... ``` @@ -86,35 +84,34 @@ cd /path/to/ppocr_img 结果是一个list,每个item只包含文本框 ```bash - [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] - [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] - [[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] + [[27.0, 459.0], [136.0, 459.0], [136.0, 479.0], [27.0, 479.0]] + [[28.0, 429.0], [372.0, 429.0], [372.0, 445.0], [28.0, 445.0]] ...... ``` - 单独使用识别:设置`--det`为`false` ```bash - paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false + paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320 ``` 结果是一个list,每个item只包含识别结果和识别置信度 ```bash - ['韩国小馆', 0.9907421] + ['韩国小馆', 0.994467] ``` -如需使用2.0模型,请指定参数`--version PP-OCR`,paddleocr默认使用2.1模型(`--versioin PP-OCRv2`)。更多whl包使用可参考[whl包文档](./whl.md) +如需使用2.0模型,请指定参数`--version PP-OCR`,paddleocr默认使用PP-OCRv3模型(`--versioin PP-OCRv3`)。更多whl包使用可参考[whl包文档](./whl.md) #### 2.1.2 多语言模型 -Paddleocr目前支持80个语种,可以通过修改`--lang`参数进行切换,对于英文模型,指定`--lang=en`。 +Paddleocr目前支持80个语种,可以通过修改`--lang`参数进行切换,对于英文模型,指定`--lang=en`, PP-OCRv3目前只支持中文和英文模型,其他多语言模型会陆续更新。 ``` bash -paddleocr --image_dir ./imgs_en/254.jpg --lang=en +paddleocr --image_dir ./imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320 ```
@@ -125,13 +122,9 @@ paddleocr --image_dir ./imgs_en/254.jpg --lang=en 结果是一个list,每个item包含了文本框,文字和识别置信度 ```text -[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]] -[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]] -[('Montpelier Vermont', 0.97389287), [[69.0, 132.0], [501.0, 126.0], [501.0, 158.0], [70.0, 164.0]]] -[('8022256183', 0.99810505), [[71.0, 175.0], [363.0, 170.0], [364.0, 202.0], [72.0, 207.0]]] -[('REG 07-24-201706:59 PM', 0.93537045), [[73.0, 299.0], [653.0, 281.0], [654.0, 318.0], [74.0, 336.0]]] -[('045555', 0.99346405), [[509.0, 331.0], [651.0, 325.0], [652.0, 356.0], [511.0, 362.0]]] -[('CT1', 0.9988654), [[535.0, 367.0], [654.0, 367.0], [654.0, 406.0], [535.0, 406.0]]] +[[[67.0, 51.0], [327.0, 46.0], [327.0, 74.0], [68.0, 80.0]], ('PHOCAPITAL', 0.9944712519645691)] +[[[72.0, 92.0], [453.0, 84.0], [454.0, 114.0], [73.0, 122.0]], ('107 State Street', 0.9744491577148438)] +[[[69.0, 135.0], [501.0, 125.0], [501.0, 156.0], [70.0, 165.0]], ('Montpelier Vermont', 0.9357033967971802)] ...... ``` @@ -181,9 +174,7 @@ im_show.save('result.jpg') 结果是一个list,每个item包含了文本框,文字和识别置信度 ```bash -[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] -[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] -[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)] ...... ``` diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index b2eb4ba17cf70edeaea36b5e54fe976605de850f..ba571186f1bc249f7c5bd5cd50b381e7b183df40 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -202,43 +202,39 @@ paddleocr -h * 检测+方向分类器+识别全流程 ```bash -paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --rec_image_shape 3,48,320 ``` 结果是一个list,每个item包含了文本框,文字和识别置信度 ```bash -[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] -[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] -[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]µ +[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)] ...... ``` * 检测+识别 ```bash -paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec_image_shape 3,48,320 ``` 结果是一个list,每个item包含了文本框,文字和识别置信度 ```bash -[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] -[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] -[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)] ...... ``` * 方向分类器+识别 ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false +paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec_image_shape 3,48,320 ``` 结果是一个list,每个item只包含识别结果和识别置信度 ```bash -['韩国小馆', 0.9907421] +['韩国小馆', 0.994467] ``` * 单独执行检测 @@ -250,22 +246,21 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false 结果是一个list,每个item只包含文本框 ```bash -[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] -[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] -[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] +[[27.0, 459.0], [136.0, 459.0], [136.0, 479.0], [27.0, 479.0]] +[[28.0, 429.0], [372.0, 429.0], [372.0, 445.0], [28.0, 445.0]] ...... ``` * 单独执行识别 ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false +paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320 ``` 结果是一个list,每个item只包含识别结果和识别置信度 ```bash -['韩国小馆', 0.9907421] +['韩国小馆', 0.994467] ``` * 单独执行方向分类器 @@ -419,5 +414,5 @@ im_show.save('result.jpg') | cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE | | show_log | 是否打印logger信息 | FALSE | | type | 执行ocr或者表格结构化, 值可选['ocr','structure'] | ocr | -| ocr_version | OCR模型版本,可选PP-OCRv2, PP-OCR。PP-OCRv2 目前仅支持中文的检测和识别模型,PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv2 | -| structure_version | 表格结构化模型版本,可选 STRUCTURE。STRUCTURE支持表格结构化模型 | STRUCTURE | +| ocr_version | OCR模型版本,可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 目前仅支持中、英文的检测和识别模型,方向分类器模型;PP-OCRv2 目前仅支持中文的检测和识别模型;PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv3 | +| structure_version | 表格结构化模型版本,可选 PP-STRUCTURE。PP-STRUCTURE支持表格结构化模型 | PP-STRUCTURE | diff --git a/doc/doc_en/inference_ppocr_en.md b/doc/doc_en/inference_ppocr_en.md index 8dc30d3106048575a9ad722386daf9cb658dd455..bcbe3206e3c96c35fe9e8558cbdddf7406f728b2 100755 --- a/doc/doc_en/inference_ppocr_en.md +++ b/doc/doc_en/inference_ppocr_en.md @@ -20,10 +20,10 @@ The default configuration is based on the inference setting of the DB text detec ``` # download DB text detection inference model -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar -tar xf ch_PP-OCRv2_det_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +tar xf ch_PP-OCRv3_det_infer.tar # run inference -python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer.tar/" +python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" ``` The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows: @@ -40,12 +40,12 @@ Set as `limit_type='min', det_limit_side_len=960`, it means that the shortest si If the resolution of the input picture is relatively large and you want to use a larger resolution prediction, you can set det_limit_side_len to the desired value, such as 1216: ``` -python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --det_limit_type=max --det_limit_side_len=1216 +python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216 ``` If you want to use the CPU for prediction, execute the command as follows ``` -python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --use_gpu=False +python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_gpu=False ``` @@ -60,10 +60,10 @@ For lightweight Chinese recognition model inference, you can execute the followi ``` # download CRNN text recognition inference model -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar -tar xf ch_PP-OCRv2_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +tar xf ch_PP-OCRv3_rec_infer.tar # run inference -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320 ``` ![](../imgs_words_en/word_10.png) @@ -71,7 +71,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen. ```bash -Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658) +Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.988671) ``` @@ -121,16 +121,16 @@ When performing prediction, you need to specify the path of a single image or a ```shell # use direction classifier -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=true +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320 # not use use direction classifier -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 # use multi-process -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320 ``` After executing the command, the recognition result image is as follows: -![](../imgs_results/system_res_00018069.jpg) +![](../imgs_results/system_res_00018069_v3.jpg) diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index 8a9c38069f384dcef06db60f6b1266e6eb116d84..0a420dd46e79029f01b4ede1fda12602b87c7161 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -80,15 +80,15 @@ If you do not use the provided test image, you can replace the following `--imag * Detection, direction classification and recognition: set the parameter`--use_gpu false` to disable the gpu device ```bash - paddleocr --image_dir ./imgs_en/img_12.jpg --use_angle_cls true --lang en --use_gpu false + paddleocr --image_dir ./imgs_en/img_12.jpg --use_angle_cls true --lang en --use_gpu false --rec_image_shape 3,48,320 ``` Output will be a list, each item contains bounding box, text and recognition confidence ```bash - [[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] - [[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] - [[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] + [[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)] + [[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)] + [[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)] ...... ``` @@ -101,33 +101,33 @@ If you do not use the provided test image, you can replace the following `--imag Output will be a list, each item only contains bounding box ```bash - [[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]] - [[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]] - [[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]] + [[397.0, 802.0], [1092.0, 802.0], [1092.0, 841.0], [397.0, 841.0]] + [[397.0, 750.0], [1211.0, 750.0], [1211.0, 789.0], [397.0, 789.0]] + [[397.0, 702.0], [1209.0, 698.0], [1209.0, 734.0], [397.0, 738.0]] ...... ``` * Only recognition: set `--det` to `false` ```bash - paddleocr --image_dir ./imgs_words_en/word_10.png --det false --lang en + paddleocr --image_dir ./imgs_words_en/word_10.png --det false --lang en --rec_image_shape 3,48,320 ``` Output will be a list, each item contains text and recognition confidence ```bash - ['PAIN', 0.990372] + ['PAIN', 0.9934559464454651] ``` -If you need to use the 2.0 model, please specify the parameter `--version PP-OCR`, paddleocr uses the 2.1 model by default(`--versioin PP-OCRv2`). More whl package usage can be found in [whl package](./whl_en.md) +If you need to use the 2.0 model, please specify the parameter `--version PP-OCR`, paddleocr uses the PP-OCRv3 model by default(`--versioin PP-OCRv3`). More whl package usage can be found in [whl package](./whl_en.md) #### 2.1.2 Multi-language Model -Paddleocr currently supports 80 languages, which can be switched by modifying the `--lang` parameter. +Paddleocr currently supports 80 languages, which can be switched by modifying the `--lang` parameter. PP-OCRv3 currently only supports Chinese and English models, and other multilingual models will be updated one after another. ``` bash -paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en +paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320 ```
@@ -137,13 +137,9 @@ paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en The result is a list, each item contains a text box, text and recognition confidence ```text -[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]] -[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]] -[('Montpelier Vermont', 0.97389287), [[69.0, 132.0], [501.0, 126.0], [501.0, 158.0], [70.0, 164.0]]] -[('8022256183', 0.99810505), [[71.0, 175.0], [363.0, 170.0], [364.0, 202.0], [72.0, 207.0]]] -[('REG 07-24-201706:59 PM', 0.93537045), [[73.0, 299.0], [653.0, 281.0], [654.0, 318.0], [74.0, 336.0]]] -[('045555', 0.99346405), [[509.0, 331.0], [651.0, 325.0], [652.0, 356.0], [511.0, 362.0]]] -[('CT1', 0.9988654), [[535.0, 367.0], [654.0, 367.0], [654.0, 406.0], [535.0, 406.0]]] +[[[67.0, 51.0], [327.0, 46.0], [327.0, 74.0], [68.0, 80.0]], ('PHOCAPITAL', 0.9944712519645691)] +[[[72.0, 92.0], [453.0, 84.0], [454.0, 114.0], [73.0, 122.0]], ('107 State Street', 0.9744491577148438)] +[[[69.0, 135.0], [501.0, 125.0], [501.0, 156.0], [70.0, 165.0]], ('Montpelier Vermont', 0.9357033967971802)] ...... ``` @@ -234,10 +230,10 @@ im_show.save('result.jpg') Output will be a list, each item contains bounding box, text and recognition confidence ```bash -[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] -[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] -[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] -...... +[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)] + [[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)] + [[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)] + ...... ``` Visualization of results diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 35b2b1798ad8b566ee87e921e23be84a5ecccf24..670653f1e59241e16bf569103e2dd2eaffcb4ed3 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -174,38 +174,38 @@ paddleocr -h * detection classification and recognition ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en --rec_image_shape 3,48,320 ``` Output will be a list, each item contains bounding box, text and recognition confidence ```bash -[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] -[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] -[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)] +[[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)] +[[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)] ...... ``` * detection and recognition ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en --rec_image_shape 3,48,320 ``` Output will be a list, each item contains bounding box, text and recognition confidence ```bash -[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] -[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] -[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)] +[[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)] +[[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)] ...... ``` * classification and recognition ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en --rec_image_shape 3,48,320 ``` Output will be a list, each item contains text and recognition confidence ```bash -['PAIN', 0.990372] +['PAIN', 0.9934559464454651] ``` * only detection @@ -215,20 +215,20 @@ paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false Output will be a list, each item only contains bounding box ```bash -[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]] -[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]] -[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]] +[[397.0, 802.0], [1092.0, 802.0], [1092.0, 841.0], [397.0, 841.0]] +[[397.0, 750.0], [1211.0, 750.0], [1211.0, 789.0], [397.0, 789.0]] +[[397.0, 702.0], [1209.0, 698.0], [1209.0, 734.0], [397.0, 738.0]] ...... ``` * only recognition ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en --rec_image_shape 3,48,320 ``` Output will be a list, each item contains text and recognition confidence ```bash -['PAIN', 0.990372] +['PAIN', 0.9934559464454651] ``` * only classification @@ -366,5 +366,5 @@ im_show.save('result.jpg') | cls | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction) | FALSE | | show_log | Whether to print log| FALSE | | type | Perform ocr or table structuring, the value is selected in ['ocr','structure'] | ocr | -| ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv2 | -| structure_version | table structure Model version number, the current model support list is as follows: STRUCTURE support english table structure model | STRUCTURE | +| ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv3 support Chinese and English detection and recognition model and direction classifier model, PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv3 | +| structure_version | table structure Model version number, the current model support list is as follows: PP-STRUCTURE support english table structure model | PP-STRUCTURE | diff --git a/doc/imgs_results/system_res_00018069_v3.jpg b/doc/imgs_results/system_res_00018069_v3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..51808ca556b52239cad3602cd5602f4d5d0ab7ce Binary files /dev/null and b/doc/imgs_results/system_res_00018069_v3.jpg differ diff --git a/paddleocr.py b/paddleocr.py index cb2c34f69f68d289b317d4737bd23385c77c3d95..417350839ac4d1e512c7396831f89ab4b2d6c724 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -47,16 +47,46 @@ __all__ = [ ] SUPPORT_DET_MODEL = ['DB'] -VERSION = '2.5' +VERSION = '2.5.0.1' SUPPORT_REC_MODEL = ['CRNN'] BASE_DIR = os.path.expanduser("~/.paddleocr/") -DEFAULT_OCR_MODEL_VERSION = 'PP-OCR' -SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2'] -DEFAULT_STRUCTURE_MODEL_VERSION = 'STRUCTURE' -SUPPORT_STRUCTURE_MODEL_VERSION = ['STRUCTURE'] +DEFAULT_OCR_MODEL_VERSION = 'PP-OCRv3' +SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2', 'PP-OCRv3'] +DEFAULT_STRUCTURE_MODEL_VERSION = 'PP-STRUCTURE' +SUPPORT_STRUCTURE_MODEL_VERSION = ['PP-STRUCTURE'] MODEL_URLS = { 'OCR': { + 'PP-OCRv3': { + 'det': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar', + }, + 'en': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar', + }, + }, + 'rec': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/ppocr_keys_v1.txt' + }, + 'en': { + 'url': + 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar', + 'dict_path': './ppocr/utils/en_dict.txt' + }, + }, + 'cls': { + 'ch': { + 'url': + 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar', + } + }, + }, 'PP-OCRv2': { 'det': { 'ch': { @@ -72,7 +102,7 @@ MODEL_URLS = { } } }, - DEFAULT_OCR_MODEL_VERSION: { + 'PP-OCR': { 'det': { 'ch': { 'url': @@ -173,7 +203,7 @@ MODEL_URLS = { } }, 'STRUCTURE': { - DEFAULT_STRUCTURE_MODEL_VERSION: { + 'PP-STRUCTURE': { 'table': { 'en': { 'url': @@ -198,16 +228,17 @@ def parse_args(mMain=True): "--ocr_version", type=str, choices=SUPPORT_OCR_MODEL_VERSION, - default='PP-OCRv2', + default='PP-OCRv3', help='OCR Model version, the current model support list is as follows: ' - '1. PP-OCRv2 Support Chinese detection and recognition model. ' - '2. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.' + '1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model' + '2. PP-OCRv2 Support Chinese detection and recognition model. ' + '3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.' ) parser.add_argument( "--structure_version", type=str, choices=SUPPORT_STRUCTURE_MODEL_VERSION, - default='STRUCTURE', + default='PP-STRUCTURE', help='Model version, the current model support list is as follows:' ' 1. STRUCTURE Support en table structure model.')