未验证 提交 8bdc050c 编写于 作者: B Bin Lu 提交者: GitHub

Merge branch 'PaddlePaddle:dygraph' into dygraph

===========================train_params=========================== ===========================train_params===========================
model_name:ocr_det model_name:ocr_det
python:python3.7 python:python3.7
gpu_list:0|0,1 gpu_list:0|0,1|10.21.226.181,10.21.226.133;0,1
Global.use_gpu:True|True Global.use_gpu:True|True|True
Global.auto_cast:null Global.auto_cast:fp32|amp
Global.epoch_num:lite_train_infer=1|whole_train_infer=300 Global.epoch_num:lite_train_infer=1|whole_train_infer=300
Global.save_model_dir:./output/ Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4 Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
...@@ -65,6 +65,8 @@ inference:./deploy/cpp_infer/build/ppocr det ...@@ -65,6 +65,8 @@ inference:./deploy/cpp_infer/build/ppocr det
null:null null:null
--benchmark:True --benchmark:True
===========================serving_params=========================== ===========================serving_params===========================
model_name:ocr_det
python:python3.7
trans_model:-m paddle_serving_client.convert trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/ --dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/
--model_filename:inference.pdmodel --model_filename:inference.pdmodel
...@@ -82,14 +84,14 @@ pipline:pipeline_http_client.py --image_dir=../../doc/imgs ...@@ -82,14 +84,14 @@ pipline:pipeline_http_client.py --image_dir=../../doc/imgs
===========================kl_quant_params=========================== ===========================kl_quant_params===========================
infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
infer_quant:False infer_quant:True
inference:tools/infer/predict_det.py inference:tools/infer/predict_det.py
--use_gpu:True|False --use_gpu:True|False
--enable_mkldnn:True|False --enable_mkldnn:True|False
--cpu_threads:1|6 --cpu_threads:1|6
--rec_batch_num:1 --rec_batch_num:1
--use_tensorrt:False|True --use_tensorrt:False|True
--precision:fp32|fp16|int8 --precision:int8
--det_model_dir: --det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/ --image_dir:./inference/ch_det_data_50/all-sum-510/
null:null null:null
......
...@@ -49,4 +49,35 @@ inference:tools/infer/predict_det.py ...@@ -49,4 +49,35 @@ inference:tools/infer/predict_det.py
--save_log_path:null --save_log_path:null
--benchmark:True --benchmark:True
null:null null:null
===========================cpp_infer_params===========================
use_opencv:True
infer_model:./inference/ch_ppocr_server_v2.0_det_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr det
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
null:null
--benchmark:True
===========================serving_params===========================
model_name:ocr_det_server
python:python3.7
trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_server_v2.0_det_infer/
--model_filename:inference.pdmodel
--params_filename:inference.pdiparams
--serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/
--serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/
serving_dir:./deploy/pdserving
web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1
op.det.local_service_conf.devices:null|0
op.det.local_service_conf.use_mkldnn:True|False
op.det.local_service_conf.thread_num:1|6
op.det.local_service_conf.use_trt:False|True
op.det.local_service_conf.precision:fp32|fp16|int8
pipline:pipeline_http_client.py --image_dir=../../doc/imgs
...@@ -65,6 +65,8 @@ inference:./deploy/cpp_infer/build/ppocr rec ...@@ -65,6 +65,8 @@ inference:./deploy/cpp_infer/build/ppocr rec
null:null null:null
--benchmark:True --benchmark:True
===========================serving_params=========================== ===========================serving_params===========================
model_name:ocr_rec
python:python3.7
trans_model:-m paddle_serving_client.convert trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_mobile_v2.0_rec_infer/ --dirname:./inference/ch_ppocr_mobile_v2.0_rec_infer/
--model_filename:inference.pdmodel --model_filename:inference.pdmodel
......
...@@ -65,12 +65,14 @@ inference:./deploy/cpp_infer/build/ppocr rec ...@@ -65,12 +65,14 @@ inference:./deploy/cpp_infer/build/ppocr rec
null:null null:null
--benchmark:True --benchmark:True
===========================serving_params=========================== ===========================serving_params===========================
model_name:ocr_server_rec
python:python3.7
trans_model:-m paddle_serving_client.convert trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_server_v2.0_rec_infer/ --dirname:./inference/ch_ppocr_server_v2.0_rec_infer/
--model_filename:inference.pdmodel --model_filename:inference.pdmodel
--params_filename:inference.pdiparams --params_filename:inference.pdiparams
--serving_server:./deploy/pdserving/ppocr_rec_server_2.0_serving/ --serving_server:./deploy/pdserving/ppocr_rec_mobile_2.0_serving/
--serving_client:./deploy/pdserving/ppocr_rec_server_2.0_client/ --serving_client:./deploy/pdserving/ppocr_rec_mobile_2.0_client/
serving_dir:./deploy/pdserving serving_dir:./deploy/pdserving
web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1 web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1
op.rec.local_service_conf.devices:null|0 op.rec.local_service_conf.devices:null|0
......
## 环境配置
本教程适用于PTDN目录下基础功能测试的运行环境搭建。
推荐环境:
- CUDA 10.1
- CUDNN 7.6
- TensorRT 6.1.0.5 / 7.1
推荐docker镜像安装,按照如下命令创建镜像,当前目录映射到镜像中的`/paddle`目录下
```
nvidia-docker run --name paddle -it -v $PWD:/paddle paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
cd /paddle
# 安装带TRT的paddle
pip3.7 install https://paddle-wheel.bj.bcebos.com/with-trt/2.1.3/linux-gpu-cuda10.1-cudnn7-mkl-gcc8.2-trt6-avx/paddlepaddle_gpu-2.1.3.post101-cp37-cp37m-linux_x86_64.whl
# 安装AutoLog
git clone https://github.com/LDOUBLEV/AutoLog
cd AutoLog
pip3.7 install -r requirements.txt
python3.7 setup.py bdist_wheel
pip3.7 install ./dist/auto_log-1.0.0-py3-none-any.whl
# 下载OCR代码
cd ../
git clone https://github.com/PaddlePaddle/PaddleOCR
```
安装PaddleOCR依赖:
```
cd PaddleOCR
pip3.7 install -r requirements.txt
```
## FAQ :
Q. You are using Paddle compiled with TensorRT, but TensorRT dynamic library is not found. Ignore this if TensorRT is not needed.
A. 问题一般是当前安装paddle版本带TRT,但是本地环境找不到TensorRT的预测库,需要下载TensorRT库,解压后设置环境变量LD_LIBRARY_PATH;
如:
```
export LD_LIBRARY_PATH=/usr/local/python3.7.0/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/paddle/package/TensorRT-6.0.1.5/lib
```
或者问题是下载的TensorRT版本和当前paddle中编译的TRT版本不匹配,需要下载版本相符的TRT。
# C++预测功能测试
C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于C++预测库的模型推理功能。
## 1. 测试结论汇总
基于训练是否使用量化,进行本测试的模型可以分为`正常模型``量化模型`,这两类模型对应的C++预测功能汇总如下:
| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
| ---- | ---- | ---- | :----: | :----: | :----: |
| 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
| 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
| 量化模型 | GPU | 1/6 | int8 | - | - |
| 量化模型 | CPU | 1/6 | - | int8 | 支持 |
## 2. 测试流程
### 2.1 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```PTDN/output```目录下生成`cpp_infer_*.log`后缀的日志文件。
```shell
bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt "cpp_infer"
# 用法1:
bash PTDN/test_inference_cpp.sh ./PTDN/configs/ppocr_det_mobile_params.txt
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash PTDN/test_inference_cpp.sh ./PTDN/configs/ppocr_det_mobile_params.txt '1'
```
### 2.2 精度测试
使用compare_results.py脚本比较模型预测的结果是否符合预期,主要步骤包括:
- 提取日志中的预测坐标;
- 从本地文件中提取保存好的坐标结果;
- 比较上述两个结果是否符合精度预期,误差大于设置阈值时会报错。
#### 使用方式
运行命令:
```shell
python3.7 PTDN/compare_results.py --gt_file=./PTDN/results/cpp_*.txt --log_file=./PTDN/output/cpp_*.log --atol=1e-3 --rtol=1e-3
```
参数介绍:
- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在PTDN/result/ 文件夹下
- log_file: 指向运行PTDN/test_inference_cpp.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持cpp_infer_*.log格式传入
- atol: 设置的绝对误差
- rtol: 设置的相对误差
#### 运行结果
正常运行效果如下图:
<img src="compare_cpp_right.png" width="1000">
出现不一致结果时的运行输出:
<img src="compare_cpp_wrong.png" width="1000">
## 3. 更多教程
本文档为功能测试用,更详细的c++预测使用教程请参考:[服务器端C++预测](https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/deploy/cpp_infer)
# PaddleServing预测功能测试
PaddleServing预测功能测试的主程序为`test_serving.sh`,可以测试基于PaddleServing的部署功能。
## 1. 测试结论汇总
基于训练是否使用量化,进行本测试的模型可以分为`正常模型``量化模型`,这两类模型对应的C++预测功能汇总如下:
| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
| ---- | ---- | ---- | :----: | :----: | :----: |
| 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
| 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
| 量化模型 | GPU | 1/6 | int8 | - | - |
| 量化模型 | CPU | 1/6 | - | int8 | 支持 |
## 2. 测试流程
### 2.1 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_serving.sh`进行测试,最终在```PTDN/output```目录下生成`serving_infer_*.log`后缀的日志文件。
```shell
bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt "serving_infer"
# 用法:
bash PTND/test_serving.sh ./PTDN/configs/ppocr_det_mobile_params.txt
```
#### 运行结果
各测试的运行情况会打印在 `PTDN/output/results_serving.log` 中:
运行成功时会输出:
```
Run successfully with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 !
Run successfully with command - xxxxx
...
```
运行失败时会输出:
```
Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 !
Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_6_batchsize_1.log 2>&1 !
Run failed with command - xxxxx
...
```
详细的预测结果会存在 PTDN/output/ 文件夹下,例如`server_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log`中会返回检测框的坐标:
```
{'err_no': 0, 'err_msg': '', 'key': ['dt_boxes'], 'value': ['[[[ 78. 642.]\n [409. 640.]\n [409. 657.]\n
[ 78. 659.]]\n\n [[ 75. 614.]\n [211. 614.]\n [211. 635.]\n [ 75. 635.]]\n\n
[[103. 554.]\n [135. 554.]\n [135. 575.]\n [103. 575.]]\n\n [[ 75. 531.]\n
[347. 531.]\n [347. 549.]\n [ 75. 549.] ]\n\n [[ 76. 503.]\n [309. 498.]\n
[309. 521.]\n [ 76. 526.]]\n\n [[163. 462.]\n [317. 462.]\n [317. 493.]\n
[163. 493.]]\n\n [[324. 431.]\n [414. 431.]\n [414. 452.]\n [324. 452.]]\n\n
[[ 76. 412.]\n [208. 408.]\n [209. 424.]\n [ 76. 428.]]\n\n [[307. 409.]\n
[428. 409.]\n [428. 426.]\n [307 . 426.]]\n\n [[ 74. 385.]\n [217. 382.]\n
[217. 400.]\n [ 74. 403.]]\n\n [[308. 381.]\n [427. 380.]\n [427. 400.]\n
[308. 401.]]\n\n [[ 74. 363.]\n [195. 362.]\n [195. 378.]\n [ 74. 379.]]\n\n
[[303. 359.]\n [423. 357.]\n [423. 375.]\n [303. 377.]]\n\n [[ 70. 336.]\n
[239. 334.]\n [239. 354.]\ n [ 70. 356.]]\n\n [[ 70. 312.]\n [204. 310.]\n
[204. 327.]\n [ 70. 330.]]\n\n [[303. 308.]\n [419. 306.]\n [419. 326.]\n
[303. 328.]]\n\n [[113. 2 72.]\n [246. 270.]\n [247. 299.]\n [113. 301.]]\n\n
[[361. 269.]\n [384. 269.]\n [384. 296.]\n [361. 296.]]\n\n [[ 70. 250.]\n
[243. 246.]\n [243. 265.]\n [ 70. 269.]]\n\n [[ 65. 221.]\n [187. 220.]\n
[187. 240.]\n [ 65. 241.]]\n\n [[337. 216.]\n [382. 216.]\n [382. 240.]\n
[337. 240.]]\n\n [ [ 65. 196.]\n [247. 193.]\n [247. 213.]\n [ 65. 216.]]\n\n
[[296. 197.]\n [423. 191.]\n [424. 209.]\n [296. 215.]]\n\n [[ 65. 167.]\n [244. 167.]\n
[244. 186.]\n [ 65. 186.]]\n\n [[ 67. 139.]\n [290. 139.]\n [290. 159.]\n [ 67. 159.]]\n\n
[[ 68. 113.]\n [410. 113.]\n [410. 128.]\n [ 68. 129.] ]\n\n [[277. 87.]\n [416. 87.]\n
[416. 108.]\n [277. 108.]]\n\n [[ 79. 28.]\n [132. 28.]\n [132. 62.]\n [ 79. 62.]]\n\n
[[163. 17.]\n [410. 14.]\n [410. 50.]\n [163. 53.]]]']}
```
## 3. 更多教程
本文档为功能测试用,更详细的Serving预测使用教程请参考:[PPOCR 服务化部署](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/deploy/pdserving/README_CN.md)
# Python功能测试 # 基础训练预测功能测试
Python功能测试的主程序为`test_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。 基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
## 测试结论汇总 ## 1. 测试结论汇总
- 训练相关: - 训练相关:
| 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) | | 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) |
| :---- | :---- | :---- | :---- | :---- | :---- | | :---- | :---- | :---- | :---- | :---- | :---- |
| DB | ch_ppocr_mobile_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 | | DB | ch_ppocr_mobile_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 <br> 离线量化(无需训练) |
| DB | ch_ppocr_server_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 | | DB | ch_ppocr_server_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 <br> 离线量化(无需训练) |
| CRNN | ch_ppocr_mobile_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 | | CRNN | ch_ppocr_mobile_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:PACT量化 <br> 离线量化(无需训练) |
| CRNN | ch_ppocr_server_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 | | CRNN | ch_ppocr_server_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:PACT量化 <br> 离线量化(无需训练) |
|PP-OCR| ch_ppocr_mobile_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 | |PP-OCR| ch_ppocr_mobile_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
|PP-OCR| ch_ppocr_server_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 | |PP-OCR| ch_ppocr_server_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
|PP-OCRv2| ch_PP-OCRv2 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
- 预测相关: - 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下,
| 算法名称 | 模型名称 |device | batchsize | mkldnn | cpu多线程 | tensorrt | 离线量化 | | 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
| ---- | ---- | ---- | ---- | ---- | ---- | ----| --- | | ---- | ---- | ---- | :----: | :----: | :----: |
| DB |ch_ppocr_mobile_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 | | 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
| DB |ch_ppocr_server_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 | | 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
| CRNN |ch_ppocr_mobile_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 | | 量化模型 | GPU | 1/6 | int8 | - | - |
| CRNN |ch_ppocr_server_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 | | 量化模型 | CPU | 1/6 | - | int8 | 支持 |
|PP-OCR|ch_ppocr_server_v2.0 | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
|PP-OCR|ch_ppocr_server_v2.0 | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
## 2. 测试流程
## 1. 安装依赖 ### 2.1 安装依赖
- 安装PaddlePaddle >= 2.0 - 安装PaddlePaddle >= 2.0
- 安装PaddleOCR依赖 - 安装PaddleOCR依赖
``` ```
...@@ -46,62 +45,75 @@ Python功能测试的主程序为`test_python.sh`,可以测试基于Python的 ...@@ -46,62 +45,75 @@ Python功能测试的主程序为`test_python.sh`,可以测试基于Python的
``` ```
## 2. 功能测试 ### 2.2 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_python.sh`进行测试,最终在```tests/output```目录下生成`infer_*.log`格式的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```PTDN/output```目录下生成`python_infer_*.log`格式的日志文件。
test_python.sh包含四种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: `test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; - 模式1:lite_train_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell ```shell
bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer' bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'lite_train_infer'
bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer' bash PTDN/test_train_inference_python.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'lite_train_infer'
``` ```
- 模式2:whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; - 模式2:whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell ```shell
bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer' bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'whole_infer'
bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer' bash PTDN/test_train_inference_python.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'whole_infer'
``` ```
- 模式3:infer 不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; - 模式3:infer不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell ```shell
bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'infer'
# 用法1: # 用法1:
bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' bash PTDN/test_train_inference_python.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' '1' bash PTDN/test_train_inference_python.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'infer' '1'
``` ```
- 模式4:whole_train_infer , CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; - 模式4:whole_train_inferCE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;
```shell ```shell
bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer' bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'whole_train_infer'
bash tests/test.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer' bash PTDN/test_train_inference_python.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'whole_train_infer'
``` ```
- 模式5:klquant_infer,测试离线量化;
```shell
bash PTDN/prepare.sh ./PTDN/configs/ppocr_det_mobile_params.txt 'klquant_infer'
bash PTDN/test_train_inference_python.sh PTDN/configs/ppocr_det_mobile_params.txt 'klquant_infer'
```
## 3. 精度测试
### 2.3 精度测试
使用compare_results.py脚本比较模型预测的结果是否符合预期,主要步骤包括: 使用compare_results.py脚本比较模型预测的结果是否符合预期,主要步骤包括:
- 提取日志中的预测坐标; - 提取日志中的预测坐标;
- 从本地文件中提取保存好的坐标结果; - 从本地文件中提取保存好的坐标结果;
- 比较上述两个结果是否符合精度预期,误差大于设置阈值时会报错。 - 比较上述两个结果是否符合精度预期,误差大于设置阈值时会报错。
### 使用方式 #### 使用方式
运行命令: 运行命令:
```shell ```shell
python3.7 tests/compare_results.py --gt_file=./tests/results/*.txt --log_file=./tests/output/infer_*.log --atol=1e-3 --rtol=1e-3 python3.7 PTDN/compare_results.py --gt_file=./PTDN/results/python_*.txt --log_file=./PTDN/output/python_*.log --atol=1e-3 --rtol=1e-3
``` ```
参数介绍: 参数介绍:
- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在tests/result/ 文件夹下 - gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在PTDN/result/ 文件夹下
- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持infer_*.log格式传入 - log_file: 指向运行PTDN/test_train_inference_python.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持python_infer_*.log格式传入
- atol: 设置的绝对误差 - atol: 设置的绝对误差
- rtol: 设置的相对误差 - rtol: 设置的相对误差
### 运行结果 #### 运行结果
正常运行效果如下图: 正常运行效果如下图:
<img src="compare_right.png" width="1000"> <img src="compare_right.png" width="1000">
出现不一致结果时的运行输出: 出现不一致结果时的运行输出:
<img src="compare_wrong.png" width="1000"> <img src="compare_wrong.png" width="1000">
## 3. 更多教程
本文档为功能测试用,更丰富的训练预测使用教程请参考:
[模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/training.md)
[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference.md)
...@@ -134,5 +134,5 @@ if [ ${MODE} = "serving_infer" ];then ...@@ -134,5 +134,5 @@ if [ ${MODE} = "serving_infer" ];then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar cd ../ cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar && cd ../
fi fi
# 推理部署导航 # 推理部署导航
飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的推理部署导航,方便用户查阅每种模型的推理部署打通情况,并可以进行一键测试。 ## 1. 简介
飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的推理部署导航PTDN(Paddle Train Deploy Navigation),方便用户查阅每种模型的推理部署打通情况,并可以进行一键测试。
<div align="center"> <div align="center">
<img src="docs/guide.png" width="1000"> <img src="docs/guide.png" width="1000">
</div> </div>
## 2. 汇总信息
打通情况汇总如下,已填写的部分表示可以使用本工具进行一键测试,未填写的表示正在支持中。 打通情况汇总如下,已填写的部分表示可以使用本工具进行一键测试,未填写的表示正在支持中。
| 算法论文 | 模型名称 | 模型类型 | python训练预测 | 其他 | **字段说明:**
| :--- | :--- | :---- | :-------- | :---- | - 基础训练预测:包括模型训练、Paddle Inference Python预测。
| DB |ch_ppocr_mobile_v2.0_det | 检测 | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++ <br> Paddle-Lite: Python, C++ / ARM CPU | - 更多训练方式:包括多机多卡、混合精度。
| DB |ch_ppocr_server_v2.0_det | 检测 | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++ <br> Paddle-Lite: Python, C++ / ARM CPU | - 模型压缩:包括裁剪、离线/在线量化、蒸馏。
- 其他预测部署:包括Paddle Inference C++预测、Paddle Serving部署、Paddle-Lite部署等。
更详细的mkldnn、Tensorrt等预测加速相关功能的支持情况可以查看各测试工具的[更多教程](#more)
| 算法论文 | 模型名称 | 模型类型 | 基础<br>训练预测 | 更多<br>训练方式 | 模型压缩 | 其他预测部署 |
| :--- | :--- | :----: | :--------: | :---- | :---- | :---- |
| DB |ch_ppocr_mobile_v2.0_det | 检测 | 支持 | 多机多卡 <br> 混合精度 | FPGM裁剪 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
| DB |ch_ppocr_server_v2.0_det | 检测 | 支持 | 多机多卡 <br> 混合精度 | FPGM裁剪 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
| DB |ch_PP-OCRv2_det | 检测 | | DB |ch_PP-OCRv2_det | 检测 |
| CRNN |ch_ppocr_mobile_v2.0_rec | 识别 | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++ <br> Paddle-Lite: Python, C++ / ARM CPU | | CRNN |ch_ppocr_mobile_v2.0_rec | 识别 | 支持 | 多机多卡 <br> 混合精度 | PACT量化 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
| CRNN |ch_ppocr_server_v2.0_rec | 识别 | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++ <br> Paddle-Lite: Python, C++ / ARM CPU | | CRNN |ch_ppocr_server_v2.0_rec | 识别 | 支持 | 多机多卡 <br> 混合精度 | PACT量化 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
| CRNN |ch_PP-OCRv2_rec | 识别 | | CRNN |ch_PP-OCRv2_rec | 识别 |
| PP-OCR |ch_ppocr_mobile_v2.0 | 检测+识别 | 支持 | 多机多卡 <br> 混合精度 | - | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别 | 支持 | 多机多卡 <br> 混合精度 | - | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
|PP-OCRv2|ch_PP-OCRv2 | 检测+识别 |
| DB |det_mv3_db_v2.0 | 检测 | | DB |det_mv3_db_v2.0 | 检测 |
| DB |det_r50_vd_db_v2.0 | 检测 | | DB |det_r50_vd_db_v2.0 | 检测 |
| EAST |det_mv3_east_v2.0 | 检测 | | EAST |det_mv3_east_v2.0 | 检测 |
...@@ -39,11 +54,11 @@ ...@@ -39,11 +54,11 @@
## 一键测试工具使用 ## 3. 一键测试工具使用
### 目录介绍 ### 目录介绍
```shell ```shell
tests/ PTDN/
├── configs/ # 配置文件目录 ├── configs/ # 配置文件目录
├── det_mv3_db.yml # 测试mobile版ppocr检测模型训练的yml文件 ├── det_mv3_db.yml # 测试mobile版ppocr检测模型训练的yml文件
├── det_r50_vd_db.yml # 测试server版ppocr检测模型训练的yml文件 ├── det_r50_vd_db.yml # 测试server版ppocr检测模型训练的yml文件
...@@ -56,14 +71,14 @@ tests/ ...@@ -56,14 +71,14 @@ tests/
├── ppocr_rec_server_params.txt # 测试server版ppocr识别模型的参数配置文件 ├── ppocr_rec_server_params.txt # 测试server版ppocr识别模型的参数配置文件
├── ... ├── ...
├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对 ├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对
├── ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型fp32精度的结果 ├── python_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型python预测fp32精度的结果
├── ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型fp16精度的结果 ├── python_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型python预测fp16精度的结果
├── ppocr_det_mobile_results_fp32_cpp.txt # 预存的mobile版ppocr检测模型c++预测的fp32精度的结果 ├── cpp_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型c++预测的fp32精度的结果
├── ppocr_det_mobile_results_fp16_cpp.txt # 预存的mobile版ppocr检测模型c++预测的fp16精度的结果 ├── cpp_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型c++预测的fp16精度的结果
├── ... ├── ...
├── prepare.sh # 完成test_*.sh运行所需要的数据和模型下载 ├── prepare.sh # 完成test_*.sh运行所需要的数据和模型下载
├── test_python.sh # 测试python训练预测的主程序 ├── test_train_inference_python.sh # 测试python训练预测的主程序
├── test_cpp.sh # 测试c++预测的主程序 ├── test_inference_cpp.sh # 测试c++预测的主程序
├── test_serving.sh # 测试serving部署预测的主程序 ├── test_serving.sh # 测试serving部署预测的主程序
├── test_lite.sh # 测试lite部署预测的主程序 ├── test_lite.sh # 测试lite部署预测的主程序
├── compare_results.py # 用于对比log中的预测结果与results中的预存结果精度误差是否在限定范围内 ├── compare_results.py # 用于对比log中的预测结果与results中的预存结果精度误差是否在限定范围内
...@@ -81,13 +96,15 @@ tests/ ...@@ -81,13 +96,15 @@ tests/
3.`compare_results.py`对比log中的预测结果和预存在results目录下的结果,判断预测精度是否符合预期(在误差范围内)。 3.`compare_results.py`对比log中的预测结果和预存在results目录下的结果,判断预测精度是否符合预期(在误差范围内)。
其中,有4个测试主程序,功能如下: 其中,有4个测试主程序,功能如下:
- `test_python.sh`:测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。 - `test_train_inference_python.sh`:测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
- `test_cpp.sh`:测试基于C++的模型推理。 - `test_inference_cpp.sh`:测试基于C++的模型推理。
- `test_serving.sh`:测试基于Paddle Serving的服务化部署功能。 - `test_serving.sh`:测试基于Paddle Serving的服务化部署功能。
- `test_lite.sh`:测试基于Paddle-Lite的端侧预测部署功能。 - `test_lite.sh`:测试基于Paddle-Lite的端侧预测部署功能。
各功能测试中涉及GPU/CPU、mkldnn、Tensorrt等多种参数配置,点击相应链接了解更多细节和使用教程: <a name="more"></a>
[test_python使用](docs/test_python.md) #### 更多教程
[test_cpp使用](docs/test_cpp.md) 各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程:
[test_serving使用](docs/test_serving.md) [test_train_inference_python 使用](docs/test_train_inference_python.md)
[test_lite使用](docs/test_lite.md) [test_inference_cpp 使用](docs/test_inference_cpp.md)
[test_serving 使用](docs/test_serving.md)
[test_lite 使用](docs/test_lite.md)
...@@ -56,7 +56,11 @@ function func_cpp_inference(){ ...@@ -56,7 +56,11 @@ function func_cpp_inference(){
fi fi
for threads in ${cpp_cpu_threads_list[*]}; do for threads in ${cpp_cpu_threads_list[*]}; do
for batch_size in ${cpp_batch_size_list[*]}; do for batch_size in ${cpp_batch_size_list[*]}; do
_save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log" precision="fp32"
if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
precison="int8"
fi
_save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}") set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}") set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}") set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
......
#!/bin/bash #!/bin/bash
source tests/common_func.sh source PTDN/common_func.sh
FILENAME=$1 FILENAME=$1
dataline=$(awk 'NR==67, NR==81{print}' $FILENAME) dataline=$(awk 'NR==67, NR==83{print}' $FILENAME)
# parser params # parser params
IFS=$'\n' IFS=$'\n'
lines=(${dataline}) lines=(${dataline})
# parser serving # parser serving
trans_model_py=$(func_parser_value "${lines[1]}") model_name=$(func_parser_value "${lines[1]}")
infer_model_dir_key=$(func_parser_key "${lines[2]}") python=$(func_parser_value "${lines[2]}")
infer_model_dir_value=$(func_parser_value "${lines[2]}") trans_model_py=$(func_parser_value "${lines[3]}")
model_filename_key=$(func_parser_key "${lines[3]}") infer_model_dir_key=$(func_parser_key "${lines[4]}")
model_filename_value=$(func_parser_value "${lines[3]}") infer_model_dir_value=$(func_parser_value "${lines[4]}")
params_filename_key=$(func_parser_key "${lines[4]}") model_filename_key=$(func_parser_key "${lines[5]}")
params_filename_value=$(func_parser_value "${lines[4]}") model_filename_value=$(func_parser_value "${lines[5]}")
serving_server_key=$(func_parser_key "${lines[5]}") params_filename_key=$(func_parser_key "${lines[6]}")
serving_server_value=$(func_parser_value "${lines[5]}") params_filename_value=$(func_parser_value "${lines[6]}")
serving_client_key=$(func_parser_key "${lines[6]}") serving_server_key=$(func_parser_key "${lines[7]}")
serving_client_value=$(func_parser_value "${lines[6]}") serving_server_value=$(func_parser_value "${lines[7]}")
serving_dir_value=$(func_parser_value "${lines[7]}") serving_client_key=$(func_parser_key "${lines[8]}")
web_service_py=$(func_parser_value "${lines[8]}") serving_client_value=$(func_parser_value "${lines[8]}")
web_use_gpu_key=$(func_parser_key "${lines[9]}") serving_dir_value=$(func_parser_value "${lines[9]}")
web_use_gpu_list=$(func_parser_value "${lines[9]}") web_service_py=$(func_parser_value "${lines[10]}")
web_use_mkldnn_key=$(func_parser_key "${lines[10]}") web_use_gpu_key=$(func_parser_key "${lines[11]}")
web_use_mkldnn_list=$(func_parser_value "${lines[10]}") web_use_gpu_list=$(func_parser_value "${lines[11]}")
web_cpu_threads_key=$(func_parser_key "${lines[11]}") web_use_mkldnn_key=$(func_parser_key "${lines[12]}")
web_cpu_threads_list=$(func_parser_value "${lines[11]}") web_use_mkldnn_list=$(func_parser_value "${lines[12]}")
web_use_trt_key=$(func_parser_key "${lines[12]}") web_cpu_threads_key=$(func_parser_key "${lines[13]}")
web_use_trt_list=$(func_parser_value "${lines[12]}") web_cpu_threads_list=$(func_parser_value "${lines[13]}")
web_precision_key=$(func_parser_key "${lines[13]}") web_use_trt_key=$(func_parser_key "${lines[14]}")
web_precision_list=$(func_parser_value "${lines[13]}") web_use_trt_list=$(func_parser_value "${lines[14]}")
pipeline_py=$(func_parser_value "${lines[14]}") web_precision_key=$(func_parser_key "${lines[15]}")
web_precision_list=$(func_parser_value "${lines[15]}")
pipeline_py=$(func_parser_value "${lines[16]}")
LOG_PATH="../../PTDN/output"
LOG_PATH="./tests/output" mkdir -p ./PTDN/output
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_serving.log" status_log="${LOG_PATH}/results_serving.log"
function func_serving(){ function func_serving(){
IFS='|' IFS='|'
_python=$1 _python=$1
...@@ -65,12 +65,12 @@ function func_serving(){ ...@@ -65,12 +65,12 @@ function func_serving(){
continue continue
fi fi
for threads in ${web_cpu_threads_list[*]}; do for threads in ${web_cpu_threads_list[*]}; do
_save_log_path="${_log_path}/server_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log" _save_log_path="${LOG_PATH}/server_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log"
set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}") set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}")
web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &>${_save_log_path} &" web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &"
eval $web_service_cmd eval $web_service_cmd
sleep 2s sleep 2s
pipeline_cmd="${python} ${pipeline_py}" pipeline_cmd="${python} ${pipeline_py} > ${_save_log_path} 2>&1 "
eval $pipeline_cmd eval $pipeline_cmd
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}" eval "cat ${_save_log_path}"
...@@ -93,13 +93,13 @@ function func_serving(){ ...@@ -93,13 +93,13 @@ function func_serving(){
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then
continue continue
fi fi
_save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log" _save_log_path="${LOG_PATH}/server_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log"
set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}") set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}")
set_precision=$(func_set_params "${web_precision_key}" "${precision}") set_precision=$(func_set_params "${web_precision_key}" "${precision}")
web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} &>${_save_log_path} & " web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & "
eval $web_service_cmd eval $web_service_cmd
sleep 2s sleep 2s
pipeline_cmd="${python} ${pipeline_py}" pipeline_cmd="${python} ${pipeline_py} > ${_save_log_path} 2>&1"
eval $pipeline_cmd eval $pipeline_cmd
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}" eval "cat ${_save_log_path}"
...@@ -129,3 +129,7 @@ eval $env ...@@ -129,3 +129,7 @@ eval $env
echo "################### run test ###################" echo "################### run test ###################"
export Count=0
IFS="|"
func_serving "${web_service_cmd}"
...@@ -5,11 +5,7 @@ FILENAME=$1 ...@@ -5,11 +5,7 @@ FILENAME=$1
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer'] # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer']
MODE=$2 MODE=$2
if [ ${MODE} = "klquant_infer" ]; then dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
dataline=$(awk 'NR==82, NR==98{print}' $FILENAME)
else
dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
fi
# parser params # parser params
IFS=$'\n' IFS=$'\n'
...@@ -93,6 +89,8 @@ infer_value1=$(func_parser_value "${lines[50]}") ...@@ -93,6 +89,8 @@ infer_value1=$(func_parser_value "${lines[50]}")
# parser klquant_infer # parser klquant_infer
if [ ${MODE} = "klquant_infer" ]; then if [ ${MODE} = "klquant_infer" ]; then
dataline=$(awk 'NR==82, NR==98{print}' $FILENAME)
lines=(${dataline})
# parser inference model # parser inference model
infer_model_dir_list=$(func_parser_value "${lines[1]}") infer_model_dir_list=$(func_parser_value "${lines[1]}")
infer_export_list=$(func_parser_value "${lines[2]}") infer_export_list=$(func_parser_value "${lines[2]}")
...@@ -143,14 +141,23 @@ function func_inference(){ ...@@ -143,14 +141,23 @@ function func_inference(){
fi fi
for threads in ${cpu_threads_list[*]}; do for threads in ${cpu_threads_list[*]}; do
for batch_size in ${batch_size_list[*]}; do for batch_size in ${batch_size_list[*]}; do
_save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log" for precision in ${precision_list[*]}; do
if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
continue
fi # skip when enable fp16 but disable mkldnn
if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
continue
fi # skip when quant model inference but precision is not int8
set_precision=$(func_set_params "${precision_key}" "${precision}")
_save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}") set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 " command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 "
eval $command eval $command
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}" eval "cat ${_save_log_path}"
...@@ -158,6 +165,7 @@ function func_inference(){ ...@@ -158,6 +165,7 @@ function func_inference(){
done done
done done
done done
done
elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
for use_trt in ${use_trt_list[*]}; do for use_trt in ${use_trt_list[*]}; do
for precision in ${precision_list[*]}; do for precision in ${precision_list[*]}; do
...@@ -224,6 +232,9 @@ if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then ...@@ -224,6 +232,9 @@ if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
fi fi
#run inference #run inference
is_quant=${infer_quant_flag[Count]} is_quant=${infer_quant_flag[Count]}
if [ ${MODE} = "klquant_infer" ]; then
is_quant="True"
fi
func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
Count=$(($Count + 1)) Count=$(($Count + 1))
done done
...@@ -234,6 +245,7 @@ else ...@@ -234,6 +245,7 @@ else
for gpu in ${gpu_list[*]}; do for gpu in ${gpu_list[*]}; do
use_gpu=${USE_GPU_KEY[Count]} use_gpu=${USE_GPU_KEY[Count]}
Count=$(($Count + 1)) Count=$(($Count + 1))
ips=""
if [ ${gpu} = "-1" ];then if [ ${gpu} = "-1" ];then
env="" env=""
elif [ ${#gpu} -le 1 ];then elif [ ${#gpu} -le 1 ];then
...@@ -253,6 +265,11 @@ else ...@@ -253,6 +265,11 @@ else
env=" " env=" "
fi fi
for autocast in ${autocast_list[*]}; do for autocast in ${autocast_list[*]}; do
if [ ${autocast} = "amp" ]; then
set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
else
set_amp_config=" "
fi
for trainer in ${trainer_list[*]}; do for trainer in ${trainer_list[*]}; do
flag_quant=False flag_quant=False
if [ ${trainer} = ${pact_key} ]; then if [ ${trainer} = ${pact_key} ]; then
...@@ -279,7 +296,6 @@ else ...@@ -279,7 +296,6 @@ else
if [ ${run_train} = "null" ]; then if [ ${run_train} = "null" ]; then
continue continue
fi fi
set_autocast=$(func_set_params "${autocast_key}" "${autocast}") set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}") set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}") set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
...@@ -295,11 +311,11 @@ else ...@@ -295,11 +311,11 @@ else
set_save_model=$(func_set_params "${save_model_key}" "${save_log}") set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} " cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
elif [ ${#gpu} -le 15 ];then # train with multi-gpu elif [ ${#ips} -le 26 ];then # train with multi-gpu
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}" cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
else # train with multi-machine else # train with multi-machine
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}" cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${set_use_gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
fi fi
# run train # run train
eval "unset CUDA_VISIBLE_DEVICES" eval "unset CUDA_VISIBLE_DEVICES"
......
...@@ -14,7 +14,6 @@ Global: ...@@ -14,7 +14,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -14,7 +14,6 @@ Global: ...@@ -14,7 +14,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -14,7 +14,6 @@ Global: ...@@ -14,7 +14,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/arabic_dict.txt character_dict_path: ppocr/utils/dict/arabic_dict.txt
character_type: arabic
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/cyrillic_dict.txt character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
character_type: cyrillic
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/devanagari_dict.txt character_dict_path: ppocr/utils/dict/devanagari_dict.txt
character_type: devanagari
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/en_dict.txt character_dict_path: ppocr/utils/en_dict.txt
character_type: EN
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/french_dict.txt character_dict_path: ppocr/utils/dict/french_dict.txt
character_type: french
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/german_dict.txt character_dict_path: ppocr/utils/dict/german_dict.txt
character_type: german
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/japan_dict.txt character_dict_path: ppocr/utils/dict/japan_dict.txt
character_type: japan
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/korean_dict.txt character_dict_path: ppocr/utils/dict/korean_dict.txt
character_type: korean
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/latin_dict.txt character_dict_path: ppocr/utils/dict/latin_dict.txt
character_type: latin
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: ppocr/utils/en_dict.txt character_dict_path: ppocr/utils/en_dict.txt
character_type: EN
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,11 +14,10 @@ Global: ...@@ -14,11 +14,10 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path: ppocr/utils/EN_symbol_dict.txt
character_type: EN_symbol
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: False
save_res_path: ./output/rec/predicts_nrtr.txt save_res_path: ./output/rec/predicts_nrtr.txt
Optimizer: Optimizer:
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict90.txt character_dict_path: ppocr/utils/dict90.txt
character_type: EN_symbol
max_text_length: 30 max_text_length: 30
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
num_heads: 8 num_heads: 8
infer_mode: False infer_mode: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path: ppocr/utils/EN_symbol_dict.txt
character_type: EN_symbol
max_text_length: 100 max_text_length: 100
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -37,10 +37,9 @@ ...@@ -37,10 +37,9 @@
| checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 | | checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 |
| use_visualdl | 设置是否启用visualdl进行可视化log展示 | False | [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) | | use_visualdl | 设置是否启用visualdl进行可视化log展示 | False | [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) |
| infer_img | 设置预测图像路径或文件夹路径 | ./infer_img | \| | infer_img | 设置预测图像路径或文件夹路径 | ./infer_img | \|
| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | \ | | character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | 如果为空,则默认使用小写字母+数字作为字典 |
| max_text_length | 设置文本最大长度 | 25 | \ | | max_text_length | 设置文本最大长度 | 25 | \ |
| character_type | 设置字符类型 | ch | en/ch, en时将使用默认dict,ch时使用自定义dict| | use_space_char | 设置是否识别空格 | True | |
| use_space_char | 设置是否识别空格 | True | 仅在 character_type=ch 时支持空格 |
| label_list | 设置方向分类器支持的角度 | ['0','180'] | 仅在方向分类器中生效 | | label_list | 设置方向分类器支持的角度 | ['0','180'] | 仅在方向分类器中生效 |
| save_res_path | 设置检测模型的结果保存地址 | ./output/det_db/predicts_db.txt | 仅在检测模型中生效 | | save_res_path | 设置检测模型的结果保存地址 | ./output/det_db/predicts_db.txt | 仅在检测模型中生效 |
...@@ -191,7 +190,6 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi ...@@ -191,7 +190,6 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
use_gpu: True use_gpu: True
epoch_num: 500 epoch_num: 500
... ...
character_type: it # 需要识别的语种
character_dict_path: {path/of/dict} # 字典文件所在路径 character_dict_path: {path/of/dict} # 字典文件所在路径
Train: Train:
...@@ -212,17 +210,17 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi ...@@ -212,17 +210,17 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
目前PaddleOCR支持的多语言算法有: 目前PaddleOCR支持的多语言算法有:
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | | 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | | rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean | | rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | latin | | rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 |
| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | ar | | rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 |
| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | cyrillic | | rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 |
| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | devanagari | | rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 |
更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
...@@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G ...@@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G
CRNN 文本识别模型推理,可以执行如下命令: CRNN 文本识别模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
![](../imgs_words_en/word_336.png) ![](../imgs_words_en/word_336.png)
...@@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073) ...@@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)
- 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。 - 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。
- 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_type,指定为英文"en"。 - 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path,指定为英文字典"./ppocr/utils/ic15_dict.txt"。
``` ```
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
...@@ -303,15 +303,15 @@ dict_character = list(self.character_str) ...@@ -303,15 +303,15 @@ dict_character = list(self.character_str)
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
--rec_model_dir="./inference/srn/" \ --rec_model_dir="./inference/srn/" \
--rec_image_shape="1, 64, 256" \ --rec_image_shape="1, 64, 256" \
--rec_char_type="en" \ --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
--rec_algorithm="SRN" --rec_algorithm="SRN"
``` ```
### 4. 自定义文本识别字典的推理 ### 4. 自定义文本识别字典的推理
如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径,并且设置 `rec_char_type=ch` 如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path"
``` ```
<a name="多语言模型的推理"></a> <a name="多语言模型的推理"></a>
...@@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ...@@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别: 需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
``` ```
![](../imgs_words/korean/1.jpg) ![](../imgs_words/korean/1.jpg)
...@@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de ...@@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de
下面给出基于EAST文本检测和STAR-Net文本识别执行命令: 下面给出基于EAST文本检测和STAR-Net文本识别执行命令:
``` ```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
......
...@@ -159,7 +159,6 @@ PaddleOCR内置了一部分字典,可以按需使用。 ...@@ -159,7 +159,6 @@ PaddleOCR内置了一部分字典,可以按需使用。
- 自定义字典 - 自定义字典
如需自定义dic文件,请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。 如需自定义dic文件,请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。
并将 `character_type` 设置为 `ch`
<a name="支持空格"></a> <a name="支持空格"></a>
### 1.4 添加空格类别 ### 1.4 添加空格类别
...@@ -246,8 +245,6 @@ Global: ...@@ -246,8 +245,6 @@ Global:
... ...
# 添加自定义字典,如修改字典请将路径指向新字典 # 添加自定义字典,如修改字典请将路径指向新字典
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
# 修改字符类型
character_type: ch
... ...
# 识别空格 # 识别空格
use_space_char: True use_space_char: True
...@@ -311,18 +308,18 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi ...@@ -311,18 +308,18 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
按语系划分,目前PaddleOCR支持的语种有: 按语系划分,目前PaddleOCR支持的语种有:
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | | 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | | rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean | | rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | latin | | rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 |
| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | ar | | rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 |
| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | cyrillic | | rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 |
| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | devanagari | | rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 |
更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
......
...@@ -129,3 +129,9 @@ PaddleOCR主要聚焦通用OCR,如果有垂类需求,您可以用PaddleOCR+ ...@@ -129,3 +129,9 @@ PaddleOCR主要聚焦通用OCR,如果有垂类需求,您可以用PaddleOCR+
A:识别模型训练初期acc为0是正常的,多训一段时间指标就上来了。 A:识别模型训练初期acc为0是正常的,多训一段时间指标就上来了。
***
具体的训练教程可点击下方链接跳转:
- [文本检测模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/detection.md)
- [文本识别模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/recognition.md)
- [文本方向分类器训练](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/angle_class.md)
\ No newline at end of file
...@@ -37,9 +37,8 @@ Take rec_chinese_lite_train_v2.0.yml as an example ...@@ -37,9 +37,8 @@ Take rec_chinese_lite_train_v2.0.yml as an example
| checkpoints | set model parameter path | None | Used to load parameters after interruption to continue training| | checkpoints | set model parameter path | None | Used to load parameters after interruption to continue training|
| use_visualdl | Set whether to enable visualdl for visual log display | False | [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) | | use_visualdl | Set whether to enable visualdl for visual log display | False | [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) |
| infer_img | Set inference image path or folder path | ./infer_img | \| | infer_img | Set inference image path or folder path | ./infer_img | \|
| character_dict_path | Set dictionary path | ./ppocr/utils/ppocr_keys_v1.txt | \ | | character_dict_path | Set dictionary path | ./ppocr/utils/ppocr_keys_v1.txt | If the character_dict_path is None, model can only recognize number and lower letters |
| max_text_length | Set the maximum length of text | 25 | \ | | max_text_length | Set the maximum length of text | 25 | \ |
| character_type | Set character type | ch | en/ch, the default dict will be used for en, and the custom dict will be used for ch |
| use_space_char | Set whether to recognize spaces | True | Only support in character_type=ch mode | | use_space_char | Set whether to recognize spaces | True | Only support in character_type=ch mode |
| label_list | Set the angle supported by the direction classifier | ['0','180'] | Only valid in angle classifier model | | label_list | Set the angle supported by the direction classifier | ['0','180'] | Only valid in angle classifier model |
| save_res_path | Set the save address of the test model results | ./output/det_db/predicts_db.txt | Only valid in the text detection model | | save_res_path | Set the save address of the test model results | ./output/det_db/predicts_db.txt | Only valid in the text detection model |
...@@ -196,7 +195,6 @@ Italian is made up of Latin letters, so after executing the command, you will ge ...@@ -196,7 +195,6 @@ Italian is made up of Latin letters, so after executing the command, you will ge
use_gpu: True use_gpu: True
epoch_num: 500 epoch_num: 500
... ...
character_type: it # language
character_dict_path: {path/of/dict} # path of dict character_dict_path: {path/of/dict} # path of dict
Train: Train:
...@@ -218,18 +216,18 @@ Italian is made up of Latin letters, so after executing the command, you will ge ...@@ -218,18 +216,18 @@ Italian is made up of Latin letters, so after executing the command, you will ge
Currently, the multi-language algorithms supported by PaddleOCR are: Currently, the multi-language algorithms supported by PaddleOCR are:
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | | Configuration file | Algorithm name | backbone | trans | seq | pred | language |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | | rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean | | rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean |
| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | latin | | rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin |
| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | ar | | rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic |
| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | cyrillic | | rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic |
| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | devanagari | | rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari |
For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations) For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)
......
...@@ -281,7 +281,7 @@ python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o G ...@@ -281,7 +281,7 @@ python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o G
For CRNN text recognition model inference, execute the following commands: For CRNN text recognition model inference, execute the following commands:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
![](../imgs_words_en/word_336.png) ![](../imgs_words_en/word_336.png)
...@@ -314,7 +314,7 @@ with the training, such as: --rec_image_shape="1, 64, 256" ...@@ -314,7 +314,7 @@ with the training, such as: --rec_image_shape="1, 64, 256"
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
--rec_model_dir="./inference/srn/" \ --rec_model_dir="./inference/srn/" \
--rec_image_shape="1, 64, 256" \ --rec_image_shape="1, 64, 256" \
--rec_char_type="en" \ --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
--rec_algorithm="SRN" --rec_algorithm="SRN"
``` ```
...@@ -323,7 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ...@@ -323,7 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch` If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch`
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path"
``` ```
<a name="MULTILINGUAL_MODEL_INFERENCE"></a> <a name="MULTILINGUAL_MODEL_INFERENCE"></a>
...@@ -333,7 +333,7 @@ If you need to predict other language models, when using inference model predict ...@@ -333,7 +333,7 @@ If you need to predict other language models, when using inference model predict
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition: You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
``` ```
![](../imgs_words/korean/1.jpg) ![](../imgs_words/korean/1.jpg)
...@@ -399,7 +399,7 @@ If you want to try other detection algorithms or recognition algorithms, please ...@@ -399,7 +399,7 @@ If you want to try other detection algorithms or recognition algorithms, please
The following command uses the combination of the EAST text detection and STAR-Net text recognition: The following command uses the combination of the EAST text detection and STAR-Net text recognition:
``` ```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
After executing the command, the recognition result image is as follows: After executing the command, the recognition result image is as follows:
......
...@@ -161,7 +161,7 @@ The current multi-language model is still in the demo stage and will continue to ...@@ -161,7 +161,7 @@ The current multi-language model is still in the demo stage and will continue to
If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) and we will thank you in the Repo. If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) and we will thank you in the Repo.
To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` and set `character_type` to `ch`. To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` .
- Custom dictionary - Custom dictionary
...@@ -172,8 +172,6 @@ If you need to customize dic file, please add character_dict_path field in confi ...@@ -172,8 +172,6 @@ If you need to customize dic file, please add character_dict_path field in confi
If you want to support the recognition of the `space` category, please set the `use_space_char` field in the yml file to `True`. If you want to support the recognition of the `space` category, please set the `use_space_char` field in the yml file to `True`.
**Note: use_space_char only takes effect when character_type=ch**
<a name="TRAINING"></a> <a name="TRAINING"></a>
## 2.Training ## 2.Training
...@@ -250,7 +248,6 @@ Global: ...@@ -250,7 +248,6 @@ Global:
# Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary # Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
# Modify character type # Modify character type
character_type: ch
... ...
# Whether to recognize spaces # Whether to recognize spaces
use_space_char: True use_space_char: True
...@@ -312,18 +309,18 @@ Eval: ...@@ -312,18 +309,18 @@ Eval:
Currently, the multi-language algorithms supported by PaddleOCR are: Currently, the multi-language algorithms supported by PaddleOCR are:
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | | Configuration file | Algorithm name | backbone | trans | seq | pred | language |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | | rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean | | rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean |
| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | latin | | rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin |
| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | ar | | rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic |
| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | cyrillic | | rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic |
| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | devanagari | | rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari |
For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations) For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)
...@@ -471,6 +468,3 @@ inference/det_db/ ...@@ -471,6 +468,3 @@ inference/det_db/
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
``` ```
...@@ -147,3 +147,9 @@ There are several experiences for reference when constructing the data set: ...@@ -147,3 +147,9 @@ There are several experiences for reference when constructing the data set:
A: It is normal for the acc to be 0 at the beginning of the recognition model training, and the indicator will come up after a longer training period. A: It is normal for the acc to be 0 at the beginning of the recognition model training, and the indicator will come up after a longer training period.
***
Click the following links for detailed training tutorial:
- [text detection model training](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/detection.md)
- [text recognition model training](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/recognition.md)
- [text direction classification model training](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_ch/angle_class.md)
doc/joinus.PNG

188.2 KB | W: | H:

doc/joinus.PNG

209.7 KB | W: | H:

doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
  • 2-up
  • Swipe
  • Onion skin
...@@ -21,6 +21,8 @@ import numpy as np ...@@ -21,6 +21,8 @@ import numpy as np
import string import string
import json import json
from ppocr.utils.logging import get_logger
class ClsLabelEncode(object): class ClsLabelEncode(object):
def __init__(self, label_list, **kwargs): def __init__(self, label_list, **kwargs):
...@@ -92,31 +94,23 @@ class BaseRecLabelEncode(object): ...@@ -92,31 +94,23 @@ class BaseRecLabelEncode(object):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = [
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
self.max_text_len = max_text_length self.max_text_len = max_text_length
self.beg_str = "sos" self.beg_str = "sos"
self.end_str = "eos" self.end_str = "eos"
if character_type == "en": self.lower = False
if character_dict_path is None:
logger = get_logger()
logger.warning(
"The character_dict_path is None, model can only recognize number and lower letters"
)
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "EN_symbol": self.lower = True
# same with ASTER setting (use 94 char). else:
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type:
self.character_str = "" self.character_str = ""
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
character_type)
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
...@@ -125,7 +119,6 @@ class BaseRecLabelEncode(object): ...@@ -125,7 +119,6 @@ class BaseRecLabelEncode(object):
if use_space_char: if use_space_char:
self.character_str += " " self.character_str += " "
dict_character = list(self.character_str) dict_character = list(self.character_str)
self.character_type = character_type
dict_character = self.add_special_char(dict_character) dict_character = self.add_special_char(dict_character)
self.dict = {} self.dict = {}
for i, char in enumerate(dict_character): for i, char in enumerate(dict_character):
...@@ -147,7 +140,7 @@ class BaseRecLabelEncode(object): ...@@ -147,7 +140,7 @@ class BaseRecLabelEncode(object):
""" """
if len(text) == 0 or len(text) > self.max_text_len: if len(text) == 0 or len(text) > self.max_text_len:
return None return None
if self.character_type == "en": if self.lower:
text = text.lower() text = text.lower()
text_list = [] text_list = []
for char in text: for char in text:
...@@ -167,13 +160,11 @@ class NRTRLabelEncode(BaseRecLabelEncode): ...@@ -167,13 +160,11 @@ class NRTRLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='EN_symbol',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(NRTRLabelEncode, super(NRTRLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def __call__(self, data): def __call__(self, data):
text = data['label'] text = data['label']
...@@ -200,12 +191,10 @@ class CTCLabelEncode(BaseRecLabelEncode): ...@@ -200,12 +191,10 @@ class CTCLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(CTCLabelEncode, super(CTCLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def __call__(self, data): def __call__(self, data):
text = data['label'] text = data['label']
...@@ -231,12 +220,10 @@ class E2ELabelEncodeTest(BaseRecLabelEncode): ...@@ -231,12 +220,10 @@ class E2ELabelEncodeTest(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='EN',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(E2ELabelEncodeTest, super(E2ELabelEncodeTest, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def __call__(self, data): def __call__(self, data):
import json import json
...@@ -305,12 +292,10 @@ class AttnLabelEncode(BaseRecLabelEncode): ...@@ -305,12 +292,10 @@ class AttnLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(AttnLabelEncode, super(AttnLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.beg_str = "sos" self.beg_str = "sos"
...@@ -353,12 +338,10 @@ class SEEDLabelEncode(BaseRecLabelEncode): ...@@ -353,12 +338,10 @@ class SEEDLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(SEEDLabelEncode, super(SEEDLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.end_str = "eos" self.end_str = "eos"
...@@ -385,12 +368,10 @@ class SRNLabelEncode(BaseRecLabelEncode): ...@@ -385,12 +368,10 @@ class SRNLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length=25, max_text_length=25,
character_dict_path=None, character_dict_path=None,
character_type='en',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(SRNLabelEncode, super(SRNLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
dict_character = dict_character + [self.beg_str, self.end_str] dict_character = dict_character + [self.beg_str, self.end_str]
...@@ -598,12 +579,10 @@ class SARLabelEncode(BaseRecLabelEncode): ...@@ -598,12 +579,10 @@ class SARLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(SARLabelEncode, super(SARLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
beg_end_str = "<BOS/EOS>" beg_end_str = "<BOS/EOS>"
......
...@@ -87,17 +87,17 @@ class RecResizeImg(object): ...@@ -87,17 +87,17 @@ class RecResizeImg(object):
def __init__(self, def __init__(self,
image_shape, image_shape,
infer_mode=False, infer_mode=False,
character_type='ch', character_dict_path='./ppocr/utils/ppocr_keys_v1.txt',
padding=True, padding=True,
**kwargs): **kwargs):
self.image_shape = image_shape self.image_shape = image_shape
self.infer_mode = infer_mode self.infer_mode = infer_mode
self.character_type = character_type self.character_dict_path = character_dict_path
self.padding = padding self.padding = padding
def __call__(self, data): def __call__(self, data):
img = data['image'] img = data['image']
if self.infer_mode and self.character_type == "ch": if self.infer_mode and self.character_dict_path is not None:
norm_img = resize_norm_img_chinese(img, self.image_shape) norm_img = resize_norm_img_chinese(img, self.image_shape)
else: else:
norm_img = resize_norm_img(img, self.image_shape, self.padding) norm_img = resize_norm_img(img, self.image_shape, self.padding)
......
...@@ -21,33 +21,15 @@ import re ...@@ -21,33 +21,15 @@ import re
class BaseRecLabelDecode(object): class BaseRecLabelDecode(object):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False):
character_dict_path=None,
character_type='ch',
use_space_char=False):
support_character_type = [
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
self.beg_str = "sos" self.beg_str = "sos"
self.end_str = "eos" self.end_str = "eos"
if character_type == "en": self.character_str = []
if character_dict_path is None:
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "EN_symbol": else:
# same with ASTER setting (use 94 char).
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type:
self.character_str = []
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
character_type)
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
...@@ -57,9 +39,6 @@ class BaseRecLabelDecode(object): ...@@ -57,9 +39,6 @@ class BaseRecLabelDecode(object):
self.character_str.append(" ") self.character_str.append(" ")
dict_character = list(self.character_str) dict_character = list(self.character_str)
else:
raise NotImplementedError
self.character_type = character_type
dict_character = self.add_special_char(dict_character) dict_character = self.add_special_char(dict_character)
self.dict = {} self.dict = {}
for i, char in enumerate(dict_character): for i, char in enumerate(dict_character):
...@@ -102,13 +81,10 @@ class BaseRecLabelDecode(object): ...@@ -102,13 +81,10 @@ class BaseRecLabelDecode(object):
class CTCLabelDecode(BaseRecLabelDecode): class CTCLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(CTCLabelDecode, self).__init__(character_dict_path, super(CTCLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def __call__(self, preds, label=None, *args, **kwargs): def __call__(self, preds, label=None, *args, **kwargs):
if isinstance(preds, tuple): if isinstance(preds, tuple):
...@@ -136,13 +112,12 @@ class DistillationCTCLabelDecode(CTCLabelDecode): ...@@ -136,13 +112,12 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
def __init__(self, def __init__(self,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
model_name=["student"], model_name=["student"],
key=None, key=None,
**kwargs): **kwargs):
super(DistillationCTCLabelDecode, self).__init__( super(DistillationCTCLabelDecode, self).__init__(character_dict_path,
character_dict_path, character_type, use_space_char) use_space_char)
if not isinstance(model_name, list): if not isinstance(model_name, list):
model_name = [model_name] model_name = [model_name]
self.model_name = model_name self.model_name = model_name
...@@ -162,13 +137,9 @@ class DistillationCTCLabelDecode(CTCLabelDecode): ...@@ -162,13 +137,9 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
class NRTRLabelDecode(BaseRecLabelDecode): class NRTRLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=True, **kwargs):
character_dict_path=None,
character_type='EN_symbol',
use_space_char=True,
**kwargs):
super(NRTRLabelDecode, self).__init__(character_dict_path, super(NRTRLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def __call__(self, preds, label=None, *args, **kwargs): def __call__(self, preds, label=None, *args, **kwargs):
...@@ -230,13 +201,10 @@ class NRTRLabelDecode(BaseRecLabelDecode): ...@@ -230,13 +201,10 @@ class NRTRLabelDecode(BaseRecLabelDecode):
class AttnLabelDecode(BaseRecLabelDecode): class AttnLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(AttnLabelDecode, self).__init__(character_dict_path, super(AttnLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.beg_str = "sos" self.beg_str = "sos"
...@@ -313,13 +281,10 @@ class AttnLabelDecode(BaseRecLabelDecode): ...@@ -313,13 +281,10 @@ class AttnLabelDecode(BaseRecLabelDecode):
class SEEDLabelDecode(BaseRecLabelDecode): class SEEDLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(SEEDLabelDecode, self).__init__(character_dict_path, super(SEEDLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.beg_str = "sos" self.beg_str = "sos"
...@@ -394,13 +359,10 @@ class SEEDLabelDecode(BaseRecLabelDecode): ...@@ -394,13 +359,10 @@ class SEEDLabelDecode(BaseRecLabelDecode):
class SRNLabelDecode(BaseRecLabelDecode): class SRNLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='en',
use_space_char=False,
**kwargs): **kwargs):
super(SRNLabelDecode, self).__init__(character_dict_path, super(SRNLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
self.max_text_length = kwargs.get('max_text_length', 25) self.max_text_length = kwargs.get('max_text_length', 25)
def __call__(self, preds, label=None, *args, **kwargs): def __call__(self, preds, label=None, *args, **kwargs):
...@@ -616,13 +578,10 @@ class TableLabelDecode(object): ...@@ -616,13 +578,10 @@ class TableLabelDecode(object):
class SARLabelDecode(BaseRecLabelDecode): class SARLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(SARLabelDecode, self).__init__(character_dict_path, super(SARLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
self.rm_symbol = kwargs.get('rm_symbol', False) self.rm_symbol = kwargs.get('rm_symbol', False)
......
0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
:
;
<
=
>
?
@
[
\
]
^
_
`
{
|
}
~
\ No newline at end of file
# C++预测功能测试
C++预测功能测试的主程序为`test_cpp.sh`,可以测试基于C++预测库的模型推理功能。
## 测试结论汇总
| 算法名称 | 模型名称 |device | batchsize | mkldnn | cpu多线程 | tensorrt | 离线量化 |
| ---- | ---- | ---- | ---- | ---- | ---- | ----| --- |
| DB |ch_ppocr_mobile_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
| DB |ch_ppocr_server_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
| CRNN |ch_ppocr_mobile_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
| CRNN |ch_ppocr_server_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
|PP-OCR|ch_ppocr_server_v2.0 | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
|PP-OCR|ch_ppocr_server_v2.0 | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
## 1. 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_cpp.sh`进行测试,最终在```tests/output```目录下生成`cpp_infer_*.log`后缀的日志文件。
```shell
bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt
# 用法1:
bash tests/test_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash tests/test_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt '1'
```
## 2. 精度测试
使用compare_results.py脚本比较模型预测的结果是否符合预期,主要步骤包括:
- 提取日志中的预测坐标;
- 从本地文件中提取保存好的坐标结果;
- 比较上述两个结果是否符合精度预期,误差大于设置阈值时会报错。
### 使用方式
运行命令:
```shell
python3.7 tests/compare_results.py --gt_file=./tests/results/*.txt --log_file=./tests/output/infer_*.log --atol=1e-3 --rtol=1e-3
```
参数介绍:
- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在tests/result/ 文件夹下
- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持infer_*.log格式传入
- atol: 设置的绝对误差
- rtol: 设置的相对误差
### 运行结果
正常运行效果如下图:
<img src="compare_right.png" width="1000">
出现不一致结果时的运行输出:
<img src="compare_wrong.png" width="1000">
...@@ -131,14 +131,9 @@ def main(args): ...@@ -131,14 +131,9 @@ def main(args):
img_list.append(img) img_list.append(img)
try: try:
img_list, cls_res, predict_time = text_classifier(img_list) img_list, cls_res, predict_time = text_classifier(img_list)
except: except Exception as E:
logger.info(traceback.format_exc()) logger.info(traceback.format_exc())
logger.info( logger.info(E)
"ERROR!!!! \n"
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n"
"If your model has tps module: "
"TPS does not support variable shape.\n"
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
exit() exit()
for ino in range(len(img_list)): for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
......
...@@ -38,40 +38,34 @@ logger = get_logger() ...@@ -38,40 +38,34 @@ logger = get_logger()
class TextRecognizer(object): class TextRecognizer(object):
def __init__(self, args): def __init__(self, args):
self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
self.character_type = args.rec_char_type
self.rec_batch_num = args.rec_batch_num self.rec_batch_num = args.rec_batch_num
self.rec_algorithm = args.rec_algorithm self.rec_algorithm = args.rec_algorithm
postprocess_params = { postprocess_params = {
'name': 'CTCLabelDecode', 'name': 'CTCLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
if self.rec_algorithm == "SRN": if self.rec_algorithm == "SRN":
postprocess_params = { postprocess_params = {
'name': 'SRNLabelDecode', 'name': 'SRNLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
elif self.rec_algorithm == "RARE": elif self.rec_algorithm == "RARE":
postprocess_params = { postprocess_params = {
'name': 'AttnLabelDecode', 'name': 'AttnLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
elif self.rec_algorithm == 'NRTR': elif self.rec_algorithm == 'NRTR':
postprocess_params = { postprocess_params = {
'name': 'NRTRLabelDecode', 'name': 'NRTRLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
elif self.rec_algorithm == "SAR": elif self.rec_algorithm == "SAR":
postprocess_params = { postprocess_params = {
'name': 'SARLabelDecode', 'name': 'SARLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
......
...@@ -74,7 +74,6 @@ def init_args(): ...@@ -74,7 +74,6 @@ def init_args():
parser.add_argument("--rec_algorithm", type=str, default='CRNN') parser.add_argument("--rec_algorithm", type=str, default='CRNN')
parser.add_argument("--rec_model_dir", type=str) parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument("--rec_batch_num", type=int, default=6) parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--max_text_length", type=int, default=25) parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument( parser.add_argument(
...@@ -268,10 +267,11 @@ def create_predictor(args, mode, logger): ...@@ -268,10 +267,11 @@ def create_predictor(args, mode, logger):
# cache 10 different shapes for mkldnn to avoid memory leak # cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10) config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn() config.enable_mkldnn()
if args.precision == "fp16":
config.enable_mkldnn_bfloat16()
# enable memory optim # enable memory optim
config.enable_memory_optim() config.enable_memory_optim()
#config.disable_glog_info() config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
if mode == 'table': if mode == 'table':
......
...@@ -159,7 +159,8 @@ def train(config, ...@@ -159,7 +159,8 @@ def train(config,
eval_class, eval_class,
pre_best_model_dict, pre_best_model_dict,
logger, logger,
vdl_writer=None): vdl_writer=None,
scaler=None):
cal_metric_during_train = config['Global'].get('cal_metric_during_train', cal_metric_during_train = config['Global'].get('cal_metric_during_train',
False) False)
log_smooth_window = config['Global']['log_smooth_window'] log_smooth_window = config['Global']['log_smooth_window']
...@@ -226,12 +227,27 @@ def train(config, ...@@ -226,12 +227,27 @@ def train(config,
images = batch[0] images = batch[0]
if use_srn: if use_srn:
model_average = True model_average = True
# use amp
if scaler:
with paddle.amp.auto_cast():
if model_type == 'table' or extra_input:
preds = model(images, data=batch[1:])
else:
preds = model(images)
else:
if model_type == 'table' or extra_input: if model_type == 'table' or extra_input:
preds = model(images, data=batch[1:]) preds = model(images, data=batch[1:])
else: else:
preds = model(images) preds = model(images)
loss = loss_class(preds, batch) loss = loss_class(preds, batch)
avg_loss = loss['loss'] avg_loss = loss['loss']
if scaler:
scaled_avg_loss = scaler.scale(avg_loss)
scaled_avg_loss.backward()
scaler.minimize(optimizer, scaled_avg_loss)
else:
avg_loss.backward() avg_loss.backward()
optimizer.step() optimizer.step()
optimizer.clear_grad() optimizer.clear_grad()
......
...@@ -102,10 +102,27 @@ def main(config, device, logger, vdl_writer): ...@@ -102,10 +102,27 @@ def main(config, device, logger, vdl_writer):
if valid_dataloader is not None: if valid_dataloader is not None:
logger.info('valid dataloader has {} iters'.format( logger.info('valid dataloader has {} iters'.format(
len(valid_dataloader))) len(valid_dataloader)))
use_amp = config["Global"].get("use_amp", False)
if use_amp:
AMP_RELATED_FLAGS_SETTING = {
'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
'FLAGS_max_inplace_grad_add': 8,
}
paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
scale_loss = config["Global"].get("scale_loss", 1.0)
use_dynamic_loss_scaling = config["Global"].get(
"use_dynamic_loss_scaling", False)
scaler = paddle.amp.GradScaler(
init_loss_scaling=scale_loss,
use_dynamic_loss_scaling=use_dynamic_loss_scaling)
else:
scaler = None
# start train # start train
program.train(config, train_dataloader, valid_dataloader, device, model, program.train(config, train_dataloader, valid_dataloader, device, model,
loss_class, optimizer, lr_scheduler, post_process_class, loss_class, optimizer, lr_scheduler, post_process_class,
eval_class, pre_best_model_dict, logger, vdl_writer) eval_class, pre_best_model_dict, logger, vdl_writer, scaler)
def test_reader(config, device, logger): def test_reader(config, device, logger):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册