update doc for whl and python cpp infer quick

738ff190 · andyjpaddle · 5a08a408 · 738ff190 · 738ff190 · 738ff190
10 changed file
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -220,6 +220,7 @@ Specifically,
    --det=true \
    --rec=true \
    --cls=true \
+    --rec_img_h=48\
 ```
 ##### 2. det+rec：
@@ -231,6 +232,7 @@ Specifically,
    --det=true \
    --rec=true \
    --cls=false \
+    --rec_img_h=48\
 ```
 ##### 3. det
@@ -250,6 +252,7 @@ Specifically,
    --det=false \
    --rec=true \
    --cls=true \
+    --rec_img_h=48\
 ```
 ##### 5. rec
@@ -260,6 +263,7 @@ Specifically,
    --det=false \
    --rec=true \
    --cls=false \
+    --rec_img_h=48\
 ```
 ##### 6. cls
@@ -335,10 +339,10 @@ The detection results will be shown on the screen, which is as follows.
 ```bash
 predict img: ../../doc/imgs/12.jpg
 ../../doc/imgs/12.jpg
-0       det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
+0       det boxes: [[74,553],[427,542],[428,571],[75,582]] rec text: 打浦路252935号 rec score: 0.947724
-1       det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
+1       det boxes: [[23,507],[513,488],[515,529],[24,548]] rec text: 绿洲仕格维花园公寓 rec score: 0.993728
-2       det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
+2       det boxes: [[187,456],[399,448],[400,480],[188,488]] rec text: 打浦路15号 rec score: 0.964994
-3       det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
+3       det boxes: [[42,413],[483,391],[484,428],[43,450]] rec text: 上海斯格威铂尔大酒店 rec score: 0.980086
 The detection visualized image saved in ./output//12.jpg
 ```

--- a/deploy/cpp_infer/readme_ch.md
+++ b/deploy/cpp_infer/readme_ch.md
@@ -229,6 +229,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --det=true \
    --rec=true \
    --cls=true \
+    --rec_img_h=48\
 ```
 ##### 2. 检测+识别：
@@ -240,6 +241,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --det=true \
    --rec=true \
    --cls=false \
+    --rec_img_h=48\
 ```
 ##### 3. 检测：
@@ -259,6 +261,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --det=false \
    --rec=true \
    --cls=true \
+    --rec_img_h=48\
 ```
 ##### 5. 识别：
@@ -269,6 +272,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --det=false \
    --rec=true \
    --cls=false \
+    --rec_img_h=48\
 ```
 ##### 6. 分类：
@@ -343,10 +347,10 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 ```bash
 predict img: ../../doc/imgs/12.jpg
 ../../doc/imgs/12.jpg
-0       det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
+0       det boxes: [[74,553],[427,542],[428,571],[75,582]] rec text: 打浦路252935号 rec score: 0.947724
-1       det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
+1       det boxes: [[23,507],[513,488],[515,529],[24,548]] rec text: 绿洲仕格维花园公寓 rec score: 0.993728
-2       det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
+2       det boxes: [[187,456],[399,448],[400,480],[188,488]] rec text: 打浦路15号 rec score: 0.964994
-3       det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
+3       det boxes: [[42,413],[483,391],[484,428],[43,450]] rec text: 上海斯格威铂尔大酒店 rec score: 0.980086
 The detection visualized image saved in ./output//12.jpg
 ```

--- a/doc/doc_ch/inference_ppocr.md
+++ b/doc/doc_ch/inference_ppocr.md
@@ -19,9 +19,9 @@
 ```
 # 下载超轻量中文检测模型：
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
-tar xf ch_PP-OCRv2_det_infer.tar
+tar xf ch_PP-OCRv3_det_infer.tar
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/"
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"
 ```
@@ -40,13 +40,13 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_m
 如果输入图片的分辨率比较大，而且想使用更大的分辨率预测，可以设置det_limit_side_len 为想要的值，比如1216：
 ```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --det_limit_type=max --det_limit_side_len=1216
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216
 ```
 如果想使用CPU进行预测，执行命令如下
 ```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/"  --use_gpu=False
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"  --use_gpu=False
 ```
@@ -63,9 +63,9 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di
 ```
 # 下载超轻量中文识别模型：
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
-tar xf ch_PP-OCRv2_rec_infer.tar
+tar xf ch_PP-OCRv3_rec_infer.tar
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv2_rec_infer/"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320
 ```
 ![](../imgs_words/ch/word_4.jpg)
@@ -73,7 +73,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
 执行命令后，上面图像的预测结果（识别的文本和得分）会打印到屏幕上，示例如下：
 ```bash
-Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
+Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.9956803321838379)
 ```
 <a name="多语言模型的推理"></a>
@@ -123,13 +123,13 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
 ```shell
 # 使用方向分类器
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=true
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320
 # 不使用方向分类器
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320
 # 使用多进程
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320
 ```
 执行命令后，识别结果图像如下：
-![](../imgs_results/system_res_00018069.jpg)
+![](../imgs_results/system_res_00018069_v3.jpg)
--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -65,15 +65,13 @@ cd /path/to/ppocr_img
 * 检测+方向分类器+识别全流程：`--use_angle_cls true`设置使用方向分类器识别180度旋转文字，`--use_gpu false`设置不使用GPU
  ```bash
-  paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false
+  paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false --rec_image_shape 3,48,320
  ```
  结果是一个list，每个item包含了文本框，文字和识别置信度
  ```bash
-  [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
+  [[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
-  [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-  [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['（45元/每公斤，100公斤起订）', 0.9676722]]
  ......
  ```
@@ -86,35 +84,34 @@ cd /path/to/ppocr_img
  结果是一个list，每个item只包含文本框
  ```bash
-  [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
+  [[27.0, 459.0], [136.0, 459.0], [136.0, 479.0], [27.0, 479.0]]
-  [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
+  [[28.0, 429.0], [372.0, 429.0], [372.0, 445.0], [28.0, 445.0]]
-  [[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
  ......
  ```
 - 单独使用识别：设置`--det`为`false`
  ```bash
-  paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false
+  paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320
  ```
  结果是一个list，每个item只包含识别结果和识别置信度
  ```bash
-  ['韩国小馆', 0.9907421]
+  ['韩国小馆', 0.994467]
  ```
-如需使用2.0模型，请指定参数`--version PP-OCR`，paddleocr默认使用2.1模型(`--versioin PP-OCRv2`)。更多whl包使用可参考[whl包文档](./whl.md)
+如需使用2.0模型，请指定参数`--version PP-OCR`，paddleocr默认使用PP-OCRv3模型(`--versioin PP-OCRv3`)。更多whl包使用可参考[whl包文档](./whl.md)
 <a name="212"></a>
 #### 2.1.2 多语言模型
-Paddleocr目前支持80个语种，可以通过修改`--lang`参数进行切换，对于英文模型，指定`--lang=en`。
+Paddleocr目前支持80个语种，可以通过修改`--lang`参数进行切换，对于英文模型，指定`--lang=en`, PP-OCRv3目前只支持中文和英文模型，其他多语言模型会陆续更新。
 ``` bash
-paddleocr --image_dir ./imgs_en/254.jpg --lang=en
+paddleocr --image_dir ./imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320
 ```
 <div align="center">
@@ -125,13 +122,9 @@ paddleocr --image_dir ./imgs_en/254.jpg --lang=en
 结果是一个list，每个item包含了文本框，文字和识别置信度
 ```text
-[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]]
+[[[67.0, 51.0], [327.0, 46.0], [327.0, 74.0], [68.0, 80.0]], ('PHOCAPITAL', 0.9944712519645691)]
-[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]]
+[[[72.0, 92.0], [453.0, 84.0], [454.0, 114.0], [73.0, 122.0]], ('107 State Street', 0.9744491577148438)]
-[('Montpelier Vermont', 0.97389287), [[69.0, 132.0], [501.0, 126.0], [501.0, 158.0], [70.0, 164.0]]]
+[[[69.0, 135.0], [501.0, 125.0], [501.0, 156.0], [70.0, 165.0]], ('Montpelier Vermont', 0.9357033967971802)]
-[('8022256183', 0.99810505), [[71.0, 175.0], [363.0, 170.0], [364.0, 202.0], [72.0, 207.0]]]
-[('REG 07-24-201706:59 PM', 0.93537045), [[73.0, 299.0], [653.0, 281.0], [654.0, 318.0], [74.0, 336.0]]]
-[('045555', 0.99346405), [[509.0, 331.0], [651.0, 325.0], [652.0, 356.0], [511.0, 362.0]]]
-[('CT1', 0.9988654), [[535.0, 367.0], [654.0, 367.0], [654.0, 406.0], [535.0, 406.0]]]
 ......
 ```
@@ -181,9 +174,7 @@ im_show.save('result.jpg')
 结果是一个list，每个item包含了文本框，文字和识别置信度
 ```bash
-[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
+[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
-[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['（45元/每公斤，100公斤起订）', 0.9676722]]
 ......
 ```

--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -202,43 +202,39 @@ paddleocr -h
 * 检测+方向分类器+识别全流程
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --rec_image_shape 3,48,320
 ```
 结果是一个list，每个item包含了文本框，文字和识别置信度
 ```bash
-[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
+[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
-[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['（45元/每公斤，100公斤起订）', 0.9676722]]µ
 ......
 ```
 * 检测+识别
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec_image_shape 3,48,320
 ```
 结果是一个list，每个item包含了文本框，文字和识别置信度
 ```bash
-[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
+[[[28.0, 37.0], [302.0, 39.0], [302.0, 72.0], [27.0, 70.0]], ('纯臻营养护发素', 0.9658738374710083)]
-[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
-[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['（45元/每公斤，100公斤起订）', 0.9676722]]
 ......
 ```
 * 方向分类器+识别
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec_image_shape 3,48,320
 ```
 结果是一个list，每个item只包含识别结果和识别置信度
 ```bash
-['韩国小馆', 0.9907421]
+['韩国小馆', 0.994467]
 ```
 * 单独执行检测
@@ -250,22 +246,21 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
 结果是一个list，每个item只包含文本框
 ```bash
-[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
+[[27.0, 459.0], [136.0, 459.0], [136.0, 479.0], [27.0, 479.0]]
-[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
+[[28.0, 429.0], [372.0, 429.0], [372.0, 445.0], [28.0, 445.0]]
-[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
 ......
 ```
 * 单独执行识别
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320
 ```
 结果是一个list，每个item只包含识别结果和识别置信度
 ```bash
-['韩国小馆', 0.9907421]
+['韩国小馆', 0.994467]
 ```
 * 单独执行方向分类器
@@ -419,5 +414,5 @@ im_show.save('result.jpg')
 | cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
 | show_log                     | 是否打印logger信息                                                                                                                                               | FALSE                    |
 | type                     | 执行ocr或者表格结构化, 值可选['ocr','structure']                                                                                                                                                                                             | ocr                    |
-| ocr_version                     | OCR模型版本，可选PP-OCRv2, PP-OCR。PP-OCRv2 目前仅支持中文的检测和识别模型，PP-OCR支持中文的检测，识别，多语种识别，方向分类器等模型                                                                                                                                        | PP-OCRv2                   |
+| ocr_version                     | OCR模型版本，可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 目前仅支持中、英文的检测和识别模型，方向分类器模型；PP-OCRv2 目前仅支持中文的检测和识别模型；PP-OCR支持中文的检测，识别，多语种识别，方向分类器等模型                                                                                                                                        | PP-OCRv3                   |
-| structure_version                     | 表格结构化模型版本，可选 STRUCTURE。STRUCTURE支持表格结构化模型                                                                                                                                                                                        | STRUCTURE                    |
+| structure_version                     | 表格结构化模型版本，可选 PP-STRUCTURE。PP-STRUCTURE支持表格结构化模型                                                                                                                                                                                        | PP-STRUCTURE                    |
--- a/doc/doc_en/inference_ppocr_en.md
+++ b/doc/doc_en/inference_ppocr_en.md
@@ -20,10 +20,10 @@ The default configuration is based on the inference setting of the DB text detec
 ```
 # download DB text detection inference model
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
-tar xf ch_PP-OCRv2_det_infer.tar
+tar xf ch_PP-OCRv3_det_infer.tar
 # run inference
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer.tar/"
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"
 ```
 The visual text detection results are saved to the ./inference_results folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
@@ -40,12 +40,12 @@ Set as `limit_type='min', det_limit_side_len=960`, it means that the shortest si
 If the resolution of the input picture is relatively large and you want to use a larger resolution prediction, you can set det_limit_side_len to the desired value, such as 1216:
 ```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --det_limit_type=max --det_limit_side_len=1216
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216
 ```
 If you want to use the CPU for prediction, execute the command as follows
 ```
-python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/"  --use_gpu=False
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"  --use_gpu=False
 ```
 <a name="RECOGNITION_MODEL_INFERENCE"></a>
@@ -60,10 +60,10 @@ For lightweight Chinese recognition model inference, you can execute the followi
 ```
 # download CRNN text recognition inference model
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
-tar xf ch_PP-OCRv2_rec_infer.tar
+tar xf ch_PP-OCRv3_rec_infer.tar
 # run inference
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv2_rec_infer/"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320
 ```
 ![](../imgs_words_en/word_10.png)
@@ -71,7 +71,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
 After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
 ```bash
-Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
+Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.988671)
 ```
 <a name="MULTILINGUAL_MODEL_INFERENCE"></a>
@@ -121,16 +121,16 @@ When performing prediction, you need to specify the path of a single image or a
 ```shell
 # use direction classifier
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=true
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320
 # not use use direction classifier
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320
 # use multi-process
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/ch_PP-OCRv2_det_infer/" --rec_model_dir="./inference/ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320
 ```
 After executing the command, the recognition result image is as follows:
-![](../imgs_results/system_res_00018069.jpg)
+![](../imgs_results/system_res_00018069_v3.jpg)
--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
@@ -80,15 +80,15 @@ If you do not use the provided test image, you can replace the following `--imag
 * Detection, direction classification and recognition: set the parameter`--use_gpu false` to disable the gpu device
  ```bash
-  paddleocr --image_dir ./imgs_en/img_12.jpg --use_angle_cls true --lang en --use_gpu false
+  paddleocr --image_dir ./imgs_en/img_12.jpg --use_angle_cls true --lang en --use_gpu false --rec_image_shape 3,48,320
  ```
  Output will be a list, each item contains bounding box, text and recognition confidence
  ```bash
-  [[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+  [[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
-  [[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+  [[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
-  [[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+  [[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
  ......
  ```
@@ -101,33 +101,33 @@ If you do not use the provided test image, you can replace the following `--imag
  Output will be a list, each item only contains bounding box
  ```bash
-  [[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]]
+  [[397.0, 802.0], [1092.0, 802.0], [1092.0, 841.0], [397.0, 841.0]]
-  [[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]]
+  [[397.0, 750.0], [1211.0, 750.0], [1211.0, 789.0], [397.0, 789.0]]
-  [[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]]
+  [[397.0, 702.0], [1209.0, 698.0], [1209.0, 734.0], [397.0, 738.0]]
  ......
  ```
 * Only recognition: set `--det` to `false`
  ```bash
-  paddleocr --image_dir ./imgs_words_en/word_10.png --det false --lang en
+  paddleocr --image_dir ./imgs_words_en/word_10.png --det false --lang en --rec_image_shape 3,48,320
  ```
  Output will be a list, each item contains text and recognition confidence
  ```bash
-  ['PAIN', 0.990372]
+  ['PAIN', 0.9934559464454651]
  ```
-If you need to use the 2.0 model, please specify the parameter `--version PP-OCR`, paddleocr uses the 2.1 model by default(`--versioin PP-OCRv2`). More whl package usage can be found in [whl package](./whl_en.md)
+If you need to use the 2.0 model, please specify the parameter `--version PP-OCR`, paddleocr uses the PP-OCRv3 model by default(`--versioin PP-OCRv3`). More whl package usage can be found in [whl package](./whl_en.md)
 <a name="212-multi-language-model"></a>
 #### 2.1.2 Multi-language Model
-Paddleocr currently supports 80 languages, which can be switched by modifying the `--lang` parameter.
+Paddleocr currently supports 80 languages, which can be switched by modifying the `--lang` parameter. PP-OCRv3 currently only supports Chinese and English models, and other multilingual models will be updated one after another.
 ``` bash
-paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en
+paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320
 ```
 <div align="center">
@@ -137,13 +137,9 @@ paddleocr --image_dir ./doc/imgs_en/254.jpg --lang=en
 The result is a list, each item contains a text box, text and recognition confidence
 ```text
-[('PHO CAPITAL', 0.95723116), [[66.0, 50.0], [327.0, 44.0], [327.0, 76.0], [67.0, 82.0]]]
+[[[67.0, 51.0], [327.0, 46.0], [327.0, 74.0], [68.0, 80.0]], ('PHOCAPITAL', 0.9944712519645691)]
-[('107 State Street', 0.96311164), [[72.0, 90.0], [451.0, 84.0], [452.0, 116.0], [73.0, 121.0]]]
+[[[72.0, 92.0], [453.0, 84.0], [454.0, 114.0], [73.0, 122.0]], ('107 State Street', 0.9744491577148438)]
-[('Montpelier Vermont', 0.97389287), [[69.0, 132.0], [501.0, 126.0], [501.0, 158.0], [70.0, 164.0]]]
+[[[69.0, 135.0], [501.0, 125.0], [501.0, 156.0], [70.0, 165.0]], ('Montpelier Vermont', 0.9357033967971802)]
-[('8022256183', 0.99810505), [[71.0, 175.0], [363.0, 170.0], [364.0, 202.0], [72.0, 207.0]]]
-[('REG 07-24-201706:59 PM', 0.93537045), [[73.0, 299.0], [653.0, 281.0], [654.0, 318.0], [74.0, 336.0]]]
-[('045555', 0.99346405), [[509.0, 331.0], [651.0, 325.0], [652.0, 356.0], [511.0, 362.0]]]
-[('CT1', 0.9988654), [[535.0, 367.0], [654.0, 367.0], [654.0, 406.0], [535.0, 406.0]]]
 ......
 ```
@@ -234,10 +230,10 @@ im_show.save('result.jpg')
 Output will be a list, each item contains bounding box, text and recognition confidence
 ```bash
-[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
-[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+  [[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
-[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+  [[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
-......
+  ......
 ```
 Visualization of results

--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -174,38 +174,38 @@ paddleocr -h
 * detection classification and recognition
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en --rec_image_shape 3,48,320
 ```
 Output will be a list, each item contains bounding box, text and recognition confidence
 ```bash
-[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
-[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+[[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
-[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+[[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
 ......
 ```
 * detection and recognition
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en --rec_image_shape 3,48,320
 ```
 Output will be a list, each item contains bounding box, text and recognition confidence
 ```bash
-[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[441.0, 174.0], [1166.0, 176.0], [1165.0, 222.0], [441.0, 221.0]], ('ACKNOWLEDGEMENTS', 0.9971134662628174)]
-[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+[[[403.0, 346.0], [1204.0, 348.0], [1204.0, 384.0], [402.0, 383.0]], ('We would like to thank all the designers and', 0.9761400818824768)]
-[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+[[[403.0, 396.0], [1204.0, 398.0], [1204.0, 434.0], [402.0, 433.0]], ('contributors who have been involved in the', 0.9791957139968872)]
 ......
 ```
 * classification and recognition
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en --rec_image_shape 3,48,320
 ```
 Output will be a list, each item contains text and recognition confidence
 ```bash
-['PAIN', 0.990372]
+['PAIN', 0.9934559464454651]
 ```
 * only detection
@@ -215,20 +215,20 @@ paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false
 Output will be a list, each item only contains bounding box
 ```bash
-[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]]
+[[397.0, 802.0], [1092.0, 802.0], [1092.0, 841.0], [397.0, 841.0]]
-[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]]
+[[397.0, 750.0], [1211.0, 750.0], [1211.0, 789.0], [397.0, 789.0]]
-[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]]
+[[397.0, 702.0], [1209.0, 698.0], [1209.0, 734.0], [397.0, 738.0]]
 ......
 ```
 * only recognition
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en --rec_image_shape 3,48,320
 ```
 Output will be a list, each item contains text and recognition confidence
 ```bash
-['PAIN', 0.990372]
+['PAIN', 0.9934559464454651]
 ```
 * only classification
@@ -366,5 +366,5 @@ im_show.save('result.jpg')
 | cls                     | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction)                                                                                                                                                                                                   | FALSE                    |
 | show_log                     | Whether to print log| FALSE                    |
 | type                     | Perform ocr or table structuring, the value is selected in ['ocr','structure']                                                                                                                                                                                             | ocr                    |
-| ocr_version                     | OCR Model version number, the current model support list is as follows: PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv2                 |
+| ocr_version                     | OCR Model version number, the current model support list is as follows: PP-OCRv3 support Chinese and English detection and recognition model and direction classifier model, PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv3                 |
-| structure_version                     | table structure Model version number, the current model support list is as follows: STRUCTURE support english table structure model | STRUCTURE                 |
+| structure_version                     | table structure Model version number, the current model support list is as follows: PP-STRUCTURE support english table structure model | PP-STRUCTURE                 |
--- a/doc/imgs_results/system_res_00018069_v3.jpg
+++ b/doc/imgs_results/system_res_00018069_v3.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -47,16 +47,46 @@ __all__ = [
 ]
 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.5'
+VERSION = '2.5.0.1'
 SUPPORT_REC_MODEL = ['CRNN']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")
-DEFAULT_OCR_MODEL_VERSION = 'PP-OCR'
+DEFAULT_OCR_MODEL_VERSION = 'PP-OCRv3'
-SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2']
+SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2', 'PP-OCRv3']
-DEFAULT_STRUCTURE_MODEL_VERSION = 'STRUCTURE'
+DEFAULT_STRUCTURE_MODEL_VERSION = 'PP-STRUCTURE'
-SUPPORT_STRUCTURE_MODEL_VERSION = ['STRUCTURE']
+SUPPORT_STRUCTURE_MODEL_VERSION = ['PP-STRUCTURE']
 MODEL_URLS = {
    'OCR': {
+        'PP-OCRv3': {
+            'det': {
+                'ch': {
+                    'url':
+                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar',
+                },
+                'en': {
+                    'url':
+                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar',
+                },
+            },
+            'rec': {
+                'ch': {
+                    'url':
+                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar',
+                    'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
+                },
+                'en': {
+                    'url':
+                    'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar',
+                    'dict_path': './ppocr/utils/en_dict.txt'
+                },
+            },
+            'cls': {
+                'ch': {
+                    'url':
+                    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
+                }
+            },
+        },
        'PP-OCRv2': {
            'det': {
                'ch': {
@@ -72,7 +102,7 @@ MODEL_URLS = {
                }
            }
        },
-        DEFAULT_OCR_MODEL_VERSION: {
+        'PP-OCR': {
            'det': {
                'ch': {
                    'url':
@@ -173,7 +203,7 @@ MODEL_URLS = {
        }
    },
    'STRUCTURE': {
-        DEFAULT_STRUCTURE_MODEL_VERSION: {
+        'PP-STRUCTURE': {
            'table': {
                'en': {
                    'url':
@@ -198,16 +228,17 @@ def parse_args(mMain=True):
        "--ocr_version",
        type=str,
        choices=SUPPORT_OCR_MODEL_VERSION,
-        default='PP-OCRv2',
+        default='PP-OCRv3',
        help='OCR Model version, the current model support list is as follows: '
-        '1. PP-OCRv2 Support Chinese detection and recognition model. '
+        '1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model'
-        '2. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
+        '2. PP-OCRv2 Support Chinese detection and recognition model. '
+        '3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
    )
    parser.add_argument(
        "--structure_version",
        type=str,
        choices=SUPPORT_STRUCTURE_MODEL_VERSION,
-        default='STRUCTURE',
+        default='PP-STRUCTURE',
        help='Model version, the current model support list is as follows:'
        ' 1. STRUCTURE Support en table structure model.')