diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md
index c62fe32bc310eb3f91a6c55c3ecf25cfa53c0c61..418879063be54c02048a9473f0ca8847a7589020 100644
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -220,6 +220,7 @@ Specifically,
--det=true \
--rec=true \
--cls=true \
+ --rec_img_h=48\
```
##### 2. det+rec:
@@ -231,6 +232,7 @@ Specifically,
--det=true \
--rec=true \
--cls=false \
+ --rec_img_h=48\
```
##### 3. det
@@ -250,6 +252,7 @@ Specifically,
--det=false \
--rec=true \
--cls=true \
+ --rec_img_h=48\
```
##### 5. rec
@@ -260,6 +263,7 @@ Specifically,
--det=false \
--rec=true \
--cls=false \
+ --rec_img_h=48\
```
##### 6. cls
@@ -335,10 +339,10 @@ The detection results will be shown on the screen, which is as follows.
```bash
predict img: ../../doc/imgs/12.jpg
../../doc/imgs/12.jpg
-0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
-1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
-2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
-3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
+0 det boxes: [[74,553],[427,542],[428,571],[75,582]] rec text: 打浦路252935号 rec score: 0.947724
+1 det boxes: [[23,507],[513,488],[515,529],[24,548]] rec text: 绿洲仕格维花园公寓 rec score: 0.993728
+2 det boxes: [[187,456],[399,448],[400,480],[188,488]] rec text: 打浦路15号 rec score: 0.964994
+3 det boxes: [[42,413],[483,391],[484,428],[43,450]] rec text: 上海斯格威铂尔大酒店 rec score: 0.980086
The detection visualized image saved in ./output//12.jpg
```
diff --git a/deploy/cpp_infer/readme_ch.md b/deploy/cpp_infer/readme_ch.md
index 2a81e15a97cca45d525efe8739255acd12f8117f..cf14a6761300764a3200a7c04607d0b5212e99c7 100644
--- a/deploy/cpp_infer/readme_ch.md
+++ b/deploy/cpp_infer/readme_ch.md
@@ -229,6 +229,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--det=true \
--rec=true \
--cls=true \
+ --rec_img_h=48\
```
##### 2. 检测+识别:
@@ -240,6 +241,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--det=true \
--rec=true \
--cls=false \
+ --rec_img_h=48\
```
##### 3. 检测:
@@ -259,6 +261,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--det=false \
--rec=true \
--cls=true \
+ --rec_img_h=48\
```
##### 5. 识别:
@@ -269,6 +272,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--det=false \
--rec=true \
--cls=false \
+ --rec_img_h=48\
```
##### 6. 分类:
@@ -343,10 +347,10 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
```bash
predict img: ../../doc/imgs/12.jpg
../../doc/imgs/12.jpg
-0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
-1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
-2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
-3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
+0 det boxes: [[74,553],[427,542],[428,571],[75,582]] rec text: 打浦路252935号 rec score: 0.947724
+1 det boxes: [[23,507],[513,488],[515,529],[24,548]] rec text: 绿洲仕格维花园公寓 rec score: 0.993728
+2 det boxes: [[187,456],[399,448],[400,480],[188,488]] rec text: 打浦路15号 rec score: 0.964994
+3 det boxes: [[42,413],[483,391],[484,428],[43,450]] rec text: 上海斯格威铂尔大酒店 rec score: 0.980086
The detection visualized image saved in ./output//12.jpg
```
diff --git a/doc/doc_ch/inference_ppocr.md b/doc/doc_ch/inference_ppocr.md
index 5fb3811eb40addd506dfa37d257c00a0c2a44258..23e9f3b6df2357f165d60600de7c7cae0662bcdc 100644
--- a/doc/doc_ch/inference_ppocr.md
+++ b/doc/doc_ch/inference_ppocr.md
@@ -19,9 +19,9 @@
```
# 下载超轻量中文检测模型:
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
-tar xf ch_PP-OCRv2_det_infer.tar
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/"
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
+tar xf ch_PP-OCRv3_det_infer.tar
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"
```
@@ -40,13 +40,13 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_m
如果输入图片的分辨率比较大,而且想使用更大的分辨率预测,可以设置det_limit_side_len 为想要的值,比如1216:
```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --det_limit_type=max --det_limit_side_len=1216
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216
```
如果想使用CPU进行预测,执行命令如下
```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --use_gpu=False
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_gpu=False
```
@@ -63,9 +63,9 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di
```
# 下载超轻量中文识别模型:
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
-tar xf ch_PP-OCRv2_rec_infer.tar
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv2_rec_infer/"
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
+tar xf ch_PP-OCRv3_rec_infer.tar
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320
```
![](../imgs_words/ch/word_4.jpg)
@@ -73,7 +73,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下:
```bash
-Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
+Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.9956803321838379)
```
@@ -123,13 +123,13 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
```shell
# 使用方向分类器
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=true
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320
# 不使用方向分类器
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320
# 使用多进程
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320
```
执行命令后,识别结果图像如下:
-![](../imgs_results/system_res_00018069.jpg)
+![](../imgs_results/system_res_00018069_v3.jpg)
diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md
index ce0f6b1570f1570f7d12bf1ad24d7d9f9914f5f0..c41186a20693a12a4cdc9c1a537487ebe5c3ae84 100644
--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -65,15 +65,13 @@ cd /path/to/ppocr_img
* 检测+方向分类器+识别全流程:`--use_angle_cls true`设置使用方向分类器识别180度旋转文字,`--use_gpu false`设置不使用GPU
```bash
- paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false
+ paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false --rec_image_shape 3,48,320
```
结果是一个list,每个item包含了文本框,文字和识别置信度
```bash
- [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
- [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
- [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]
+ [[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
......
```
@@ -86,35 +84,34 @@ cd /path/to/ppocr_img
结果是一个list,每个item只包含文本框
```bash
- [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
- [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
- [[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
+ [[27.0, 459.0], [136.0, 459.0], [136.0, 479.0], [27.0, 479.0]]
+ [[28.0, 429.0], [372.0, 429.0], [372.0, 445.0], [28.0, 445.0]]
......
```
- 单独使用识别:设置`--det`为`false`
```bash
- paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false
+ paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320
```
结果是一个list,每个item只包含识别结果和识别置信度
```bash
- ['韩国小馆', 0.9907421]
+ ['韩国小馆', 0.994467]
```
-如需使用2.0模型,请指定参数`--version PP-OCR`,paddleocr默认使用2.1模型(`--versioin PP-OCRv2`)。更多whl包使用可参考[whl包文档](./whl.md)
+如需使用2.0模型,请指定参数`--version PP-OCR`,paddleocr默认使用PP-OCRv3模型(`--versioin PP-OCRv3`)。更多whl包使用可参考[whl包文档](./whl.md)
#### 2.1.2 多语言模型
-Paddleocr目前支持80个语种,可以通过修改`--lang`参数进行切换,对于英文模型,指定`--lang=en`。
+Paddleocr目前支持80个语种,可以通过修改`--lang`参数进行切换,对于英文模型,指定`--lang=en`, PP-OCRv3目前只支持中文和英文模型,其他多语言模型会陆续更新。
``` bash
-paddleocr --image_dir ./imgs_en/254.jpg --lang=en
+paddleocr --image_dir ./imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320
```
@@ -125,13 +122,9 @@ paddleocr --image_dir ./imgs_en/254.jpg --lang=en
结果是一个list,每个item包含了文本框,文字和识别置信度
```text
-[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]]
-[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]]
-[('Montpelier Vermont', 0.97389287), [[69.0, 132.0], [501.0, 126.0], [501.0, 158.0], [70.0, 164.0]]]
-[('8022256183', 0.99810505), [[71.0, 175.0], [363.0, 170.0], [364.0, 202.0], [72.0, 207.0]]]
-[('REG 07-24-201706:59 PM', 0.93537045), [[73.0, 299.0], [653.0, 281.0], [654.0, 318.0], [74.0, 336.0]]]
-[('045555', 0.99346405), [[509.0, 331.0], [651.0, 325.0], [652.0, 356.0], [511.0, 362.0]]]
-[('CT1', 0.9988654), [[535.0, 367.0], [654.0, 367.0], [654.0, 406.0], [535.0, 406.0]]]
+[[[67.0, 51.0], [327.0, 46.0], [327.0, 74.0], [68.0, 80.0]], ('PHOCAPITAL', 0.9944712519645691)]
+[[[72.0, 92.0], [453.0, 84.0], [454.0, 114.0], [73.0, 122.0]], ('107 State Street', 0.9744491577148438)]
+[[[69.0, 135.0], [501.0, 125.0], [501.0, 156.0], [70.0, 165.0]], ('Montpelier Vermont', 0.9357033967971802)]
......
```
@@ -181,9 +174,7 @@ im_show.save('result.jpg')
结果是一个list,每个item包含了文本框,文字和识别置信度
```bash
-[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
-[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]
+[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
......
```
diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md
index b2eb4ba17cf70edeaea36b5e54fe976605de850f..ba571186f1bc249f7c5bd5cd50b381e7b183df40 100644
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -202,43 +202,39 @@ paddleocr -h
* 检测+方向分类器+识别全流程
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --rec_image_shape 3,48,320
```
结果是一个list,每个item包含了文本框,文字和识别置信度
```bash
-[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
-[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]µ
+[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
......
```
* 检测+识别
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec_image_shape 3,48,320
```
结果是一个list,每个item包含了文本框,文字和识别置信度
```bash
-[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
-[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]
+[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
......
```
* 方向分类器+识别
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec_image_shape 3,48,320
```
结果是一个list,每个item只包含识别结果和识别置信度
```bash
-['韩国小馆', 0.9907421]
+['韩国小馆', 0.994467]
```
* 单独执行检测
@@ -250,22 +246,21 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
结果是一个list,每个item只包含文本框
```bash
-[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
-[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
-[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
+[[27.0, 459.0], [136.0, 459.0], [136.0, 479.0], [27.0, 479.0]]
+[[28.0, 429.0], [372.0, 429.0], [372.0, 445.0], [28.0, 445.0]]
......
```
* 单独执行识别
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320
```
结果是一个list,每个item只包含识别结果和识别置信度
```bash
-['韩国小馆', 0.9907421]
+['韩国小馆', 0.994467]
```
* 单独执行方向分类器
@@ -419,5 +414,5 @@ im_show.save('result.jpg')
| cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE |
| show_log | 是否打印logger信息 | FALSE |
| type | 执行ocr或者表格结构化, 值可选['ocr','structure'] | ocr |
-| ocr_version | OCR模型版本,可选PP-OCRv2, PP-OCR。PP-OCRv2 目前仅支持中文的检测和识别模型,PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv2 |
-| structure_version | 表格结构化模型版本,可选 STRUCTURE。STRUCTURE支持表格结构化模型 | STRUCTURE |
+| ocr_version | OCR模型版本,可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 目前仅支持中、英文的检测和识别模型,方向分类器模型;PP-OCRv2 目前仅支持中文的检测和识别模型;PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv3 |
+| structure_version | 表格结构化模型版本,可选 PP-STRUCTURE。PP-STRUCTURE支持表格结构化模型 | PP-STRUCTURE |
diff --git a/doc/doc_en/inference_ppocr_en.md b/doc/doc_en/inference_ppocr_en.md
index 8dc30d3106048575a9ad722386daf9cb658dd455..bcbe3206e3c96c35fe9e8558cbdddf7406f728b2 100755
--- a/doc/doc_en/inference_ppocr_en.md
+++ b/doc/doc_en/inference_ppocr_en.md
@@ -20,10 +20,10 @@ The default configuration is based on the inference setting of the DB text detec
```
# download DB text detection inference model
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
-tar xf ch_PP-OCRv2_det_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
+tar xf ch_PP-OCRv3_det_infer.tar
# run inference
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer.tar/"
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"
```
The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
@@ -40,12 +40,12 @@ Set as `limit_type='min', det_limit_side_len=960`, it means that the shortest si
If the resolution of the input picture is relatively large and you want to use a larger resolution prediction, you can set det_limit_side_len to the desired value, such as 1216:
```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --det_limit_type=max --det_limit_side_len=1216
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216
```
If you want to use the CPU for prediction, execute the command as follows
```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --use_gpu=False
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_gpu=False
```
@@ -60,10 +60,10 @@ For lightweight Chinese recognition model inference, you can execute the followi
```
# download CRNN text recognition inference model
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
-tar xf ch_PP-OCRv2_rec_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
+tar xf ch_PP-OCRv3_rec_infer.tar
# run inference
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv2_rec_infer/"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320
```
![](../imgs_words_en/word_10.png)
@@ -71,7 +71,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
```bash
-Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
+Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.988671)
```
@@ -121,16 +121,16 @@ When performing prediction, you need to specify the path of a single image or a
```shell
# use direction classifier
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=true
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320
# not use use direction classifier
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320
# use multi-process
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320
```
After executing the command, the recognition result image is as follows:
-![](../imgs_results/system_res_00018069.jpg)
+![](../imgs_results/system_res_00018069_v3.jpg)
diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md
index 8a9c38069f384dcef06db60f6b1266e6eb116d84..0a420dd46e79029f01b4ede1fda12602b87c7161 100644
--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
@@ -80,15 +80,15 @@ If you do not use the provided test image, you can replace the following `--imag
* Detection, direction classification and recognition: set the parameter`--use_gpu false` to disable the gpu device
```bash
- paddleocr --image_dir ./imgs_en/img_12.jpg --use_angle_cls true --lang en --use_gpu false
+ paddleocr --image_dir ./imgs_en/img_12.jpg --use_angle_cls true --lang en --use_gpu false --rec_image_shape 3,48,320
```
Output will be a list, each item contains bounding box, text and recognition confidence
```bash
- [[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
- [[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
- [[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+ [[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
+ [[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
+ [[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
......
```
@@ -101,33 +101,33 @@ If you do not use the provided test image, you can replace the following `--imag
Output will be a list, each item only contains bounding box
```bash
- [[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]]
- [[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]]
- [[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]]
+ [[397.0, 802.0], [1092.0, 802.0], [1092.0, 841.0], [397.0, 841.0]]
+ [[397.0, 750.0], [1211.0, 750.0], [1211.0, 789.0], [397.0, 789.0]]
+ [[397.0, 702.0], [1209.0, 698.0], [1209.0, 734.0], [397.0, 738.0]]
......
```
* Only recognition: set `--det` to `false`
```bash
- paddleocr --image_dir ./imgs_words_en/word_10.png --det false --lang en
+ paddleocr --image_dir ./imgs_words_en/word_10.png --det false --lang en --rec_image_shape 3,48,320
```
Output will be a list, each item contains text and recognition confidence
```bash
- ['PAIN', 0.990372]
+ ['PAIN', 0.9934559464454651]
```
-If you need to use the 2.0 model, please specify the parameter `--version PP-OCR`, paddleocr uses the 2.1 model by default(`--versioin PP-OCRv2`). More whl package usage can be found in [whl package](./whl_en.md)
+If you need to use the 2.0 model, please specify the parameter `--version PP-OCR`, paddleocr uses the PP-OCRv3 model by default(`--versioin PP-OCRv3`). More whl package usage can be found in [whl package](./whl_en.md)
#### 2.1.2 Multi-language Model
-Paddleocr currently supports 80 languages, which can be switched by modifying the `--lang` parameter.
+Paddleocr currently supports 80 languages, which can be switched by modifying the `--lang` parameter. PP-OCRv3 currently only supports Chinese and English models, and other multilingual models will be updated one after another.
``` bash
-paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en
+paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320
```
@@ -137,13 +137,9 @@ paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en
The result is a list, each item contains a text box, text and recognition confidence
```text
-[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]]
-[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]]
-[('Montpelier Vermont', 0.97389287), [[69.0, 132.0], [501.0, 126.0], [501.0, 158.0], [70.0, 164.0]]]
-[('8022256183', 0.99810505), [[71.0, 175.0], [363.0, 170.0], [364.0, 202.0], [72.0, 207.0]]]
-[('REG 07-24-201706:59 PM', 0.93537045), [[73.0, 299.0], [653.0, 281.0], [654.0, 318.0], [74.0, 336.0]]]
-[('045555', 0.99346405), [[509.0, 331.0], [651.0, 325.0], [652.0, 356.0], [511.0, 362.0]]]
-[('CT1', 0.9988654), [[535.0, 367.0], [654.0, 367.0], [654.0, 406.0], [535.0, 406.0]]]
+[[[67.0, 51.0], [327.0, 46.0], [327.0, 74.0], [68.0, 80.0]], ('PHOCAPITAL', 0.9944712519645691)]
+[[[72.0, 92.0], [453.0, 84.0], [454.0, 114.0], [73.0, 122.0]], ('107 State Street', 0.9744491577148438)]
+[[[69.0, 135.0], [501.0, 125.0], [501.0, 156.0], [70.0, 165.0]], ('Montpelier Vermont', 0.9357033967971802)]
......
```
@@ -234,10 +230,10 @@ im_show.save('result.jpg')
Output will be a list, each item contains bounding box, text and recognition confidence
```bash
-[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
-[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
-[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
-......
+[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
+ [[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
+ [[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
+ ......
```
Visualization of results
diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md
index 35b2b1798ad8b566ee87e921e23be84a5ecccf24..670653f1e59241e16bf569103e2dd2eaffcb4ed3 100644
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -174,38 +174,38 @@ paddleocr -h
* detection classification and recognition
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en --rec_image_shape 3,48,320
```
Output will be a list, each item contains bounding box, text and recognition confidence
```bash
-[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
-[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
-[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
+[[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
+[[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
......
```
* detection and recognition
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en --rec_image_shape 3,48,320
```
Output will be a list, each item contains bounding box, text and recognition confidence
```bash
-[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
-[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
-[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
+[[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
+[[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
......
```
* classification and recognition
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en --rec_image_shape 3,48,320
```
Output will be a list, each item contains text and recognition confidence
```bash
-['PAIN', 0.990372]
+['PAIN', 0.9934559464454651]
```
* only detection
@@ -215,20 +215,20 @@ paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false
Output will be a list, each item only contains bounding box
```bash
-[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]]
-[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]]
-[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]]
+[[397.0, 802.0], [1092.0, 802.0], [1092.0, 841.0], [397.0, 841.0]]
+[[397.0, 750.0], [1211.0, 750.0], [1211.0, 789.0], [397.0, 789.0]]
+[[397.0, 702.0], [1209.0, 698.0], [1209.0, 734.0], [397.0, 738.0]]
......
```
* only recognition
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en --rec_image_shape 3,48,320
```
Output will be a list, each item contains text and recognition confidence
```bash
-['PAIN', 0.990372]
+['PAIN', 0.9934559464454651]
```
* only classification
@@ -366,5 +366,5 @@ im_show.save('result.jpg')
| cls | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction) | FALSE |
| show_log | Whether to print log| FALSE |
| type | Perform ocr or table structuring, the value is selected in ['ocr','structure'] | ocr |
-| ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv2 |
-| structure_version | table structure Model version number, the current model support list is as follows: STRUCTURE support english table structure model | STRUCTURE |
+| ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv3 support Chinese and English detection and recognition model and direction classifier model, PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv3 |
+| structure_version | table structure Model version number, the current model support list is as follows: PP-STRUCTURE support english table structure model | PP-STRUCTURE |
diff --git a/doc/imgs_results/system_res_00018069_v3.jpg b/doc/imgs_results/system_res_00018069_v3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..51808ca556b52239cad3602cd5602f4d5d0ab7ce
Binary files /dev/null and b/doc/imgs_results/system_res_00018069_v3.jpg differ
diff --git a/paddleocr.py b/paddleocr.py
index cb2c34f69f68d289b317d4737bd23385c77c3d95..417350839ac4d1e512c7396831f89ab4b2d6c724 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -47,16 +47,46 @@ __all__ = [
]
SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.5'
+VERSION = '2.5.0.1'
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
-DEFAULT_OCR_MODEL_VERSION = 'PP-OCR'
-SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2']
-DEFAULT_STRUCTURE_MODEL_VERSION = 'STRUCTURE'
-SUPPORT_STRUCTURE_MODEL_VERSION = ['STRUCTURE']
+DEFAULT_OCR_MODEL_VERSION = 'PP-OCRv3'
+SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2', 'PP-OCRv3']
+DEFAULT_STRUCTURE_MODEL_VERSION = 'PP-STRUCTURE'
+SUPPORT_STRUCTURE_MODEL_VERSION = ['PP-STRUCTURE']
MODEL_URLS = {
'OCR': {
+ 'PP-OCRv3': {
+ 'det': {
+ 'ch': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar',
+ },
+ 'en': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar',
+ },
+ },
+ 'rec': {
+ 'ch': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar',
+ 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
+ },
+ 'en': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar',
+ 'dict_path': './ppocr/utils/en_dict.txt'
+ },
+ },
+ 'cls': {
+ 'ch': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
+ }
+ },
+ },
'PP-OCRv2': {
'det': {
'ch': {
@@ -72,7 +102,7 @@ MODEL_URLS = {
}
}
},
- DEFAULT_OCR_MODEL_VERSION: {
+ 'PP-OCR': {
'det': {
'ch': {
'url':
@@ -173,7 +203,7 @@ MODEL_URLS = {
}
},
'STRUCTURE': {
- DEFAULT_STRUCTURE_MODEL_VERSION: {
+ 'PP-STRUCTURE': {
'table': {
'en': {
'url':
@@ -198,16 +228,17 @@ def parse_args(mMain=True):
"--ocr_version",
type=str,
choices=SUPPORT_OCR_MODEL_VERSION,
- default='PP-OCRv2',
+ default='PP-OCRv3',
help='OCR Model version, the current model support list is as follows: '
- '1. PP-OCRv2 Support Chinese detection and recognition model. '
- '2. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
+ '1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model'
+ '2. PP-OCRv2 Support Chinese detection and recognition model. '
+ '3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
)
parser.add_argument(
"--structure_version",
type=str,
choices=SUPPORT_STRUCTURE_MODEL_VERSION,
- default='STRUCTURE',
+ default='PP-STRUCTURE',
help='Model version, the current model support list is as follows:'
' 1. STRUCTURE Support en table structure model.')