diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml index 0553a65e448db21f196389f6abba7bf8808f0494..f3ad966488dbc2f6f7ca12033bc4a3d35e1b3bd7 100644 --- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml +++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml @@ -37,7 +37,7 @@ Architecture: Head: name: DBHead k: 50 - Student2: + Teacher: pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained freeze_params: false return_all_feats: false @@ -62,15 +62,15 @@ Loss: loss_config_list: - DistillationDMLLoss: model_name_pairs: - - ["Student", "Student2"] + - ["Student", "Teacher"] maps_name: "thrink_maps" weight: 1.0 act: "softmax" - model_name_pairs: ["Student", "Student2"] + model_name_pairs: ["Student", "Teacher"] key: maps - DistillationDBLoss: weight: 1.0 - model_name_list: ["Student", "Student2"] + model_name_list: ["Student", "Teacher"] # key: maps name: DBLoss balance_loss: true @@ -94,7 +94,7 @@ Optimizer: PostProcess: name: DistillationDBPostProcess - model_name: ["Student", "Student2"] + model_name: ["Student", "Teacher"] key: head_out thresh: 0.3 box_thresh: 0.6 diff --git a/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml b/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml new file mode 100644 index 0000000000000000000000000000000000000000..ff536edec4d6e7a85a6e6c189d56a23ffabc5583 --- /dev/null +++ b/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml @@ -0,0 +1,131 @@ +Global: + debug: false + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/v3_en_mobile + save_epoch_step: 3 + eval_batch_step: [0, 2000] + cal_metric_during_train: true + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: false + infer_img: doc/imgs_words/ch/word_1.jpg + character_dict_path: ppocr/utils/en_dict.txt + max_text_length: &max_text_length 25 + infer_mode: false + use_space_char: true + distributed: true + save_res_path: ./output/rec/predicts_ppocrv3_en.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + warmup_epoch: 5 + regularizer: + name: L2 + factor: 3.0e-05 + + +Architecture: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [1, 2] + last_pool_type: avg + Head: + name: MultiHead + head_list: + - CTCHead: + Neck: + name: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + fc_decay: 0.00001 + - SARHead: + enc_dim: 512 + max_text_length: *max_text_length + +Loss: + name: MultiLoss + loss_config_list: + - CTCLoss: + - SARLoss: + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + ignore_space: False + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + ext_op_transform_idx: 1 + label_file_list: + - ./train_data/train_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - RecConAug: + prob: 0.5 + ext_data_num: 2 + image_shape: [48, 320, 3] + - RecAug: + - MultiLabelEncode: + - RecResizeImg: + image_shape: [3, 48, 320] + - KeepKeys: + keep_keys: + - image + - label_ctc + - label_sar + - length + - valid_ratio + loader: + shuffle: true + batch_size_per_card: 128 + drop_last: true + num_workers: 4 +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data + label_file_list: + - ./train_data/val_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - MultiLabelEncode: + - RecResizeImg: + image_shape: [3, 48, 320] + - KeepKeys: + keep_keys: + - image + - label_ctc + - label_sar + - length + - valid_ratio + loader: + shuffle: false + drop_last: false + batch_size_per_card: 128 + num_workers: 4 diff --git a/deploy/README.md b/deploy/README.md index 69e2438996a1329e801f842ef78d2d6e115c5831..3a77ff22a15b624ace56027f606eff61e9efb66c 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -23,10 +23,9 @@ PP-OCR has supported muti deployment schemes. Click the link to get the specific - [Python Inference](../doc/doc_en/inference_ppocr_en.md) - [C++ Inference](./cpp_infer/readme.md) - [Serving (Python/C++)](./pdserving/README.md) -- [Paddle-Lite (ARM CPU/OpenCL ARM GPU/Metal ARM GPU)](./lite/readme.md) +- [Paddle-Lite (ARM CPU/OpenCL ARM GPU)](./lite/readme.md) - [Paddle.js](./paddlejs/README.md) - [Jetson Inference]() -- [XPU Inference]() - [Paddle2ONNX](./paddle2onnx/readme.md) -If you need the deployment tutorial of academic algorithm models other than PP-OCR, please directly enter the main page of corresponding algorithms, [entrance](../doc/doc_en/algorithm_overview_en.md)。 \ No newline at end of file +If you need the deployment tutorial of academic algorithm models other than PP-OCR, please directly enter the main page of corresponding algorithms, [entrance](../doc/doc_en/algorithm_overview_en.md)。 diff --git a/deploy/README_ch.md b/deploy/README_ch.md index 63ae59537316480a302dca7c3714db3c1003553e..9ec90bd36a05a53ae0f27386f6d2eba798e40842 100644 --- a/deploy/README_ch.md +++ b/deploy/README_ch.md @@ -23,10 +23,9 @@ PP-OCR模型已打通多种场景部署方案,点击链接获取具体的使 - [Python 推理](../doc/doc_ch/inference_ppocr.md) - [C++ 推理](./cpp_infer/readme_ch.md) - [Serving 服务化部署(Python/C++)](./pdserving/README_CN.md) -- [Paddle-Lite 端侧部署(ARM CPU/OpenCL ARM GPU/Metal ARM GPU)](./lite/readme_ch.md) +- [Paddle-Lite 端侧部署(ARM CPU/OpenCL ARM GPU)](./lite/readme_ch.md) - [Paddle.js 部署](./paddlejs/README_ch.md) - [Jetson 推理]() -- [XPU 推理]() - [Paddle2ONNX 推理](./paddle2onnx/readme_ch.md) -需要PP-OCR以外的学术算法模型的推理部署,请直接进入相应算法主页面,[入口](../doc/doc_ch/algorithm_overview.md)。 \ No newline at end of file +需要PP-OCR以外的学术算法模型的推理部署,请直接进入相应算法主页面,[入口](../doc/doc_ch/algorithm_overview.md)。 diff --git a/deploy/lite/readme.md b/deploy/lite/readme.md index 4225f8256af85373f5ed8d0213aa902531139c5a..9926e2dd8c973b25b5397fd5825f790528ede279 100644 --- a/deploy/lite/readme.md +++ b/deploy/lite/readme.md @@ -1,19 +1,18 @@ -- [Tutorial of PaddleOCR Mobile deployment](#tutorial-of-paddleocr-mobile-deployment) - - [1. Preparation](#1-preparation) +# Mobile deployment based on Paddle-Lite + +- [1. Preparation](#1-preparation) - [Preparation environment](#preparation-environment) - [1.1 Prepare the cross-compilation environment](#11-prepare-the-cross-compilation-environment) - [1.2 Prepare Paddle-Lite library](#12-prepare-paddle-lite-library) - - [2 Run](#2-run) +- [2. Run](#2-run) - [2.1 Inference Model Optimization](#21-inference-model-optimization) - [2.2 Run optimized model on Phone](#22-run-optimized-model-on-phone) - - [注意:](#注意) - - [FAQ](#faq) +- [FAQ](#faq) -# Tutorial of PaddleOCR Mobile deployment -This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy PaddleOCR ultra-lightweight Chinese and English detection models on mobile phones. +This tutorial will introduce how to use [Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy PaddleOCR ultra-lightweight Chinese and English detection models on mobile phones. -paddle-lite is a lightweight inference engine for PaddlePaddle. It provides efficient inference capabilities for mobile phones and IoT, and extensively integrates cross-platform hardware to provide lightweight deployment solutions for end-side deployment issues. +Paddle-Lite is a lightweight inference engine for PaddlePaddle. It provides efficient inference capabilities for mobile phones and IoT, and extensively integrates cross-platform hardware to provide lightweight deployment solutions for end-side deployment issues. ## 1. Preparation @@ -223,7 +222,7 @@ demo/cxx/ocr/ |-- ocr_db_crnn.cc C++ main code ``` -#### 注意: +**Note**: 1. `ppocr_keys_v1.txt` is a Chinese dictionary file. If the nb model is used for English recognition or other language recognition, dictionary file should be replaced with a dictionary of the corresponding language. PaddleOCR provides a variety of dictionaries under ppocr/utils/, including: ``` dict/french_dict.txt # french diff --git a/deploy/lite/readme_ch.md b/deploy/lite/readme_ch.md index 0acc66f9513e54394cae6f4dc7c6a21b194d79e0..99a543d0d60455443dd872c56a5832c8ca0ff4e9 100644 --- a/deploy/lite/readme_ch.md +++ b/deploy/lite/readme_ch.md @@ -1,15 +1,14 @@ -- [端侧部署](#端侧部署) - - [1. 准备环境](#1-准备环境) +# 端侧部署 + +- [1. 准备环境](#1-准备环境) - [运行准备](#运行准备) - [1.1 准备交叉编译环境](#11-准备交叉编译环境) - [1.2 准备预测库](#12-准备预测库) - - [2 开始运行](#2-开始运行) +- [2 开始运行](#2-开始运行) - [2.1 模型优化](#21-模型优化) - [2.2 与手机联调](#22-与手机联调) - - [注意:](#注意) - - [FAQ](#faq) +- [FAQ](#faq) -# 端侧部署 本教程将介绍基于[Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) 在移动端部署PaddleOCR超轻量中文检测、识别模型的详细步骤。 diff --git a/deploy/paddle2onnx/readme_ch.md b/deploy/paddle2onnx/readme_ch.md index 8e821892142d65caddd6fa3bd8ff24a372fe9a5d..5004cab8338ee7e809033dcf5b8f2184b0da065a 100644 --- a/deploy/paddle2onnx/readme_ch.md +++ b/deploy/paddle2onnx/readme_ch.md @@ -39,14 +39,14 @@ python3.7 -m pip install onnxruntime==1.9.0 有两种方式获取Paddle静态图模型:在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型; 参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。 -以 ppocr 中文检测、识别、分类模型为例: +以 PP-OCRv3 中文检测、识别、分类模型为例: ``` -wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar -cd ./inference && tar xf ch_PP-OCRv2_det_infer.tar && cd .. +wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +cd ./inference && tar xf ch_PP-OCRv3_det_infer.tar && cd .. -wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar -cd ./inference && tar xf ch_PP-OCRv2_rec_infer.tar && cd .. +wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +cd ./inference && tar xf ch_PP-OCRv3_rec_infer.tar && cd .. wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar cd ./inference && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar && cd .. @@ -57,7 +57,7 @@ cd ./inference && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar && cd .. 使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式: ``` -paddle2onnx --model_dir ./inference/ch_PP-OCRv2_det_infer \ +paddle2onnx --model_dir ./inference/ch_PP-OCRv3_det_infer \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ --save_file ./inference/det_onnx/model.onnx \ @@ -65,7 +65,7 @@ paddle2onnx --model_dir ./inference/ch_PP-OCRv2_det_infer \ --input_shape_dict="{'x':[-1,3,-1,-1]}" \ --enable_onnx_checker True -paddle2onnx --model_dir ./inference/ch_PP-OCRv2_rec_infer \ +paddle2onnx --model_dir ./inference/ch_PP-OCRv3_rec_infer \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ --save_file ./inference/rec_onnx/model.onnx \ @@ -105,8 +105,8 @@ python3.7 tools/infer/predict_system.py --use_gpu=False --use_onnx=True \ ``` python3.7 tools/infer/predict_system.py --use_gpu=False \ --cls_model_dir=./inference/ch_ppocr_mobile_v2.0_cls_infer \ ---rec_model_dir=./inference/ch_PP-OCRv2_rec_infer \ ---det_model_dir=./inference/ch_PP-OCRv2_det_infer \ +--rec_model_dir=./inference/ch_PP-OCRv3_rec_infer \ +--det_model_dir=./inference/ch_PP-OCRv3_det_infer \ --image_dir=./deploy/lite/imgs/lite_demo.png ``` diff --git a/deploy/pdserving/README.md b/deploy/pdserving/README.md index d3ba7d4cfbabb111831a6ecbce28c1ac352066fe..55e03c4c2654f336ed942ae03e61e88b61940006 100644 --- a/deploy/pdserving/README.md +++ b/deploy/pdserving/README.md @@ -15,6 +15,14 @@ Some Key Features of Paddle Serving: - Industrial serving features supported, such as models management, online loading, online A/B testing etc. - Highly concurrent and efficient communication between clients and servers supported. +PaddleServing supports deployment in multiple languages. In this example, two deployment methods, python pipeline and C++, are provided. The comparison between the two is as follows: + +| Language | Speed | Secondary development | Do you need to compile | +|-----|-----|---------|------------| +| C++ | fast | Slightly difficult | Single model prediction does not need to be compiled, multi-model concatenation needs to be compiled | +| python | general | easy | single-model/multi-model no compilation required | + + The introduction and tutorial of Paddle Serving service deployment framework reference [document](https://github.com/PaddlePaddle/Serving/blob/develop/README.md). @@ -25,6 +33,7 @@ The introduction and tutorial of Paddle Serving service deployment framework ref - [Environmental preparation](#environmental-preparation) - [Model conversion](#model-conversion) - [Paddle Serving pipeline deployment](#paddle-serving-pipeline-deployment) + - [Paddle Serving C++ deployment](#C++) - [WINDOWS Users](#windows-users) - [FAQ](#faq) @@ -41,23 +50,23 @@ PaddleOCR operating environment and Paddle Serving operating environment are nee ```bash # Install serving which used to start the service -wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl -pip3 install paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl +wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.8.3.post102-py3-none-any.whl +pip3 install paddle_serving_server_gpu-0.8.3.post102-py3-none-any.whl # Install paddle-serving-server for cuda10.1 -# wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl -# pip3 install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl +# wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.8.3.post101-py3-none-any.whl +# pip3 install paddle_serving_server_gpu-0.8.3.post101-py3-none-any.whl # Install serving which used to start the service -wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl -pip3 install paddle_serving_client-0.7.0-cp37-none-any.whl +wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp37-none-any.whl +pip3 install paddle_serving_client-0.8.3-cp37-none-any.whl # Install serving-app -wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl -pip3 install paddle_serving_app-0.7.0-py3-none-any.whl +wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.8.3-py3-none-any.whl +pip3 install paddle_serving_app-0.8.3-py3-none-any.whl ``` - **note:** If you want to install the latest version of PaddleServing, refer to [link](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Latest_Packages_CN.md). + **note:** If you want to install the latest version of PaddleServing, refer to [link](https://github.com/PaddlePaddle/Serving/blob/v0.8.3/doc/Latest_Packages_CN.md). @@ -67,37 +76,37 @@ When using PaddleServing for service deployment, you need to convert the saved i Firstly, download the [inference model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/README_ch.md#pp-ocr%E7%B3%BB%E5%88%97%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8%E6%9B%B4%E6%96%B0%E4%B8%AD) of PPOCR ``` # Download and unzip the OCR text detection model -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar -O ch_PP-OCRv2_det_infer.tar && tar -xf ch_PP-OCRv2_det_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar -O ch_PP-OCRv3_det_infer.tar && tar -xf ch_PP-OCRv3_det_infer.tar # Download and unzip the OCR text recognition model -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar -O ch_PP-OCRv2_rec_infer.tar && tar -xf ch_PP-OCRv2_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar -O ch_PP-OCRv3_rec_infer.tar && tar -xf ch_PP-OCRv3_rec_infer.tar ``` Then, you can use installed paddle_serving_client tool to convert inference model to mobile model. ``` # Detection model conversion -python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_det_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_det_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_det_mobile_2.0_serving/ \ - --serving_client ./ppocr_det_mobile_2.0_client/ + --serving_server ./ppocr_det_v3_serving/ \ + --serving_client ./ppocr_det_v3_client/ # Recognition model conversion -python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_rec_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_rec_mobile_2.0_serving/ \ - --serving_client ./ppocr_rec_mobile_2.0_client/ + --serving_server ./ppocr_rec_v3_serving/ \ + --serving_client ./ppocr_rec_v3_client/ ``` -After the detection model is converted, there will be additional folders of `ppocr_det_mobile_2.0_serving` and `ppocr_det_mobile_2.0_client` in the current folder, with the following format: +After the detection model is converted, there will be additional folders of `ppocr_det_v3_serving` and `ppocr_det_v3_client` in the current folder, with the following format: ``` -|- ppocr_det_mobile_2.0_serving/ +|- ppocr_det_v3_serving/ |- __model__ |- __params__ |- serving_server_conf.prototxt |- serving_server_conf.stream.prototxt -|- ppocr_det_mobile_2.0_client +|- ppocr_det_v3_client |- serving_client_conf.prototxt |- serving_client_conf.stream.prototxt @@ -193,16 +202,13 @@ The recognition model is the same. 2021-05-13 03:42:36,979 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0] ``` + ## C++ Serving Service deployment based on python obviously has the advantage of convenient secondary development. However, the real application often needs to pursue better performance. PaddleServing also provides a more performant C++ deployment version. The C++ service deployment is the same as python in the environment setup and data preparation stages, the difference is when the service is started and the client sends requests. -| Language | Speed ​​| Secondary development | Do you need to compile | -|-----|-----|---------|------------| -| C++ | fast | Slightly difficult | Single model prediction does not need to be compiled, multi-model concatenation needs to be compiled | -| python | general | easy | single-model/multi-model no compilation required | 1. Compile Serving @@ -211,7 +217,7 @@ The C++ service deployment is the same as python in the environment setup and da 2. Run the following command to start the service. ``` # Start the service and save the running log in log.txt - python3 -m paddle_serving_server.serve --model ppocrv2_det_serving ppocrv2_rec_serving --op GeneralDetectionOp GeneralInferOp --port 9293 &>log.txt & + python3 -m paddle_serving_server.serve --model ppocr_det_v3_serving ppocr_rec_v3_serving --op GeneralDetectionOp GeneralInferOp --port 9293 &>log.txt & ``` After the service is successfully started, a log similar to the following will be printed in log.txt ![](./imgs/start_server.png) @@ -219,7 +225,7 @@ The C++ service deployment is the same as python in the environment setup and da 3. Send service request Due to the need for pre and post-processing in the C++Server part, in order to speed up the input to the C++Server is only the base64 encoded string of the picture, it needs to be manually modified - Change the feed_type field and shape field in ppocrv2_det_client/serving_client_conf.prototxt to the following: + Change the feed_type field and shape field in ppocr_det_v3_client/serving_client_conf.prototxt to the following: ``` feed_var { @@ -234,7 +240,7 @@ The C++ service deployment is the same as python in the environment setup and da start the client: ``` - python3 ocr_cpp_client.py ppocrv2_det_client ppocrv2_rec_client + python3 ocr_cpp_client.py ppocr_det_v3_client ppocr_rec_v3_client ``` After successfully running, the predicted result of the model will be printed in the cmd window. An example of the result is: ![](./imgs/results.png) diff --git a/deploy/pdserving/README_CN.md b/deploy/pdserving/README_CN.md index 7d6169569f92d927312ec6ba8ff667d613c4bfa7..0891611db5f39d322473354f7d988b10afa78cbd 100644 --- a/deploy/pdserving/README_CN.md +++ b/deploy/pdserving/README_CN.md @@ -9,13 +9,21 @@ PaddleOCR提供2种服务部署方式: # 基于PaddleServing的服务部署 -本文档将介绍如何使用[PaddleServing](https://github.com/PaddlePaddle/Serving/blob/develop/README_CN.md)工具部署PP-OCR动态图模型的pipeline在线服务。 +本文档将介绍如何使用[PaddleServing](https://github.com/PaddlePaddle/Serving/blob/develop/README_CN.md) 工具部署PP-OCR动态图模型的pipeline在线服务。 相比较于hubserving部署,PaddleServing具备以下优点: - 支持客户端和服务端之间高并发和高效通信 - 支持 工业级的服务能力 例如模型管理,在线加载,在线A/B测试等 - 支持 多种编程语言 开发客户端,例如C++, Python和Java +PaddleServing 支持多种语言部署,本例中提供了python pipeline 和 C++ 两种部署方式,两者的对比如下: + +| 语言 | 速度 | 二次开发 | 是否需要编译 | +|-----|-----|---------|------------| +| C++ | 很快 | 略有难度 | 单模型预测无需编译,多模型串联需要编译 | +| python | 一般 | 容易 | 单模型/多模型 均无需编译| + + 更多有关PaddleServing服务化部署框架介绍和使用教程参考[文档](https://github.com/PaddlePaddle/Serving/blob/develop/README_CN.md)。 AIStudio演示案例可参考 [基于PaddleServing的OCR服务化部署实战](https://aistudio.baidu.com/aistudio/projectdetail/3630726)。 @@ -24,6 +32,7 @@ AIStudio演示案例可参考 [基于PaddleServing的OCR服务化部署实战](h - [环境准备](#环境准备) - [模型转换](#模型转换) - [Paddle Serving pipeline部署](#部署) +- [Paddle Serving C++部署](#C++) - [Windows用户](#Windows用户) - [FAQ](#FAQ) @@ -34,26 +43,33 @@ AIStudio演示案例可参考 [基于PaddleServing的OCR服务化部署实战](h - 准备PaddleOCR的运行环境[链接](../../doc/doc_ch/installation.md) + ``` + git clone https://github.com/PaddlePaddle/PaddleOCR + + # 进入到工作目录 + cd PaddleOCR/deploy/pdserving/ + ``` + - 准备PaddleServing的运行环境,步骤如下 ```bash # 安装serving,用于启动服务 -wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl -pip3 install paddle_serving_server_gpu-0.7.0.post102-py3-none-any.whl +wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.8.3.post102-py3-none-any.whl +pip3 install paddle_serving_server_gpu-0.8.3.post102-py3-none-any.whl # 如果是cuda10.1环境,可以使用下面的命令安装paddle-serving-server -# wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl -# pip3 install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl +# wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.8.3.post101-py3-none-any.whl +# pip3 install paddle_serving_server_gpu-0.8.3.post101-py3-none-any.whl # 安装client,用于向服务发送请求 -wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl -pip3 install paddle_serving_client-0.7.0-cp37-none-any.whl +wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp37-none-any.whl +pip3 install paddle_serving_client-0.8.3-cp37-none-any.whl # 安装serving-app -wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl -pip3 install paddle_serving_app-0.7.0-py3-none-any.whl +wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.8.3-py3-none-any.whl +pip3 install paddle_serving_app-0.8.3-py3-none-any.whl ``` -**Note:** 如果要安装最新版本的PaddleServing参考[链接](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Latest_Packages_CN.md)。 +**Note:** 如果要安装最新版本的PaddleServing参考[链接](https://github.com/PaddlePaddle/Serving/blob/v0.8.3/doc/Latest_Packages_CN.md)。 ## 模型转换 @@ -64,38 +80,38 @@ pip3 install paddle_serving_app-0.7.0-py3-none-any.whl ```bash # 下载并解压 OCR 文本检测模型 -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar -O ch_PP-OCRv2_det_infer.tar && tar -xf ch_PP-OCRv2_det_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar -O ch_PP-OCRv3_det_infer.tar && tar -xf ch_PP-OCRv3_det_infer.tar # 下载并解压 OCR 文本识别模型 -wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar -O ch_PP-OCRv2_rec_infer.tar && tar -xf ch_PP-OCRv2_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar -O ch_PP-OCRv3_rec_infer.tar && tar -xf ch_PP-OCRv3_rec_infer.tar ``` 接下来,用安装的paddle_serving_client把下载的inference模型转换成易于server部署的模型格式。 ```bash # 转换检测模型 -python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_det_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_det_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_det_mobile_2.0_serving/ \ - --serving_client ./ppocr_det_mobile_2.0_client/ + --serving_server ./ppocr_det_v3_serving/ \ + --serving_client ./ppocr_det_v3_client/ # 转换识别模型 -python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ +python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_rec_infer/ \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ - --serving_server ./ppocr_rec_mobile_2.0_serving/ \ - --serving_client ./ppocr_rec_mobile_2.0_client/ + --serving_server ./ppocr_rec_v3_serving/ \ + --serving_client ./ppocr_rec_v3_client/ ``` -检测模型转换完成后,会在当前文件夹多出`ppocr_det_mobile_2.0_serving` 和`ppocr_det_mobile_2.0_client`的文件夹,具备如下格式: +检测模型转换完成后,会在当前文件夹多出`ppocr_det_v3_serving` 和`ppocr_det_v3_client`的文件夹,具备如下格式: ``` -|- ppocr_det_mobile_2.0_serving/ +|- ppocr_det_v3_serving/ |- __model__ |- __params__ |- serving_server_conf.prototxt |- serving_server_conf.stream.prototxt -|- ppocr_det_mobile_2.0_client +|- ppocr_det_v3_client |- serving_client_conf.prototxt |- serving_client_conf.stream.prototxt @@ -105,13 +121,8 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ ## Paddle Serving pipeline部署 -1. 下载PaddleOCR代码,若已下载可跳过此步骤 - ``` - git clone https://github.com/PaddlePaddle/PaddleOCR +1. 确认工作目录下文件结构: - # 进入到工作目录 - cd PaddleOCR/deploy/pdserving/ - ``` pdserver目录包含启动pipeline服务和发送预测请求的代码,包括: ``` __init__.py @@ -196,16 +207,12 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ C++ 服务部署在环境搭建和数据准备阶段与 python 相同,区别在于启动服务和客户端发送请求时不同。 -| 语言 | 速度 | 二次开发 | 是否需要编译 | -|-----|-----|---------|------------| -| C++ | 很快 | 略有难度 | 单模型预测无需编译,多模型串联需要编译 | -| python | 一般 | 容易 | 单模型/多模型 均无需编译| - 1. 准备 Serving 环境 为了提高预测性能,C++ 服务同样提供了多模型串联服务。与python pipeline服务不同,多模型串联的过程中需要将模型前后处理代码写在服务端,因此需要在本地重新编译生成serving。 首先需要下载Serving代码库, 把OCR文本检测预处理相关代码替换到Serving库中 + ``` git clone https://github.com/PaddlePaddle/Serving @@ -223,7 +230,7 @@ cp -rf general_detection_op.cpp Serving/core/general-server/op ``` # 启动服务,运行日志保存在log.txt - python3 -m paddle_serving_server.serve --model ppocrv2_det_serving ppocrv2_rec_serving --op GeneralDetectionOp GeneralInferOp --port 9293 &>log.txt & + python3 -m paddle_serving_server.serve --model ppocr_det_v3_serving ppocr_rec_v3_serving --op GeneralDetectionOp GeneralInferOp --port 9293 &>log.txt & ``` 成功启动服务后,log.txt中会打印类似如下日志 ![](./imgs/start_server.png) @@ -231,7 +238,7 @@ cp -rf general_detection_op.cpp Serving/core/general-server/op 3. 发送服务请求: 由于需要在C++Server部分进行前后处理,为了加速传入C++Server的仅仅是图片的base64编码的字符串,故需要手动修改 - ppocrv2_det_client/serving_client_conf.prototxt 中 feed_type 字段 和 shape 字段,修改成如下内容: + ppocr_det_v3_client/serving_client_conf.prototxt 中 feed_type 字段 和 shape 字段,修改成如下内容: ``` feed_var { name: "x" @@ -243,7 +250,7 @@ cp -rf general_detection_op.cpp Serving/core/general-server/op ``` 启动客户端 ``` - python3 ocr_cpp_client.py ppocrv2_det_client ppocrv2_rec_client + python3 ocr_cpp_client.py ppocr_det_v3_client ppocr_rec_v3_client ``` 成功运行后,模型预测的结果会打印在cmd窗口中,结果示例为: diff --git a/deploy/pdserving/config.yml b/deploy/pdserving/config.yml index 2aae922dfa12f46d1c0ebd352e8d3a7077065cf8..6e30a626d0cdb0b4e5fe6feb737ea46c2bc59f90 100644 --- a/deploy/pdserving/config.yml +++ b/deploy/pdserving/config.yml @@ -34,7 +34,7 @@ op: client_type: local_predictor #det模型路径 - model_config: ./ppocr_det_mobile_2.0_serving + model_config: ./ppocr_det_v3_serving #Fetch结果列表,以client_config中fetch_var的alias_name为准 fetch_list: ["save_infer_model/scale_0.tmp_1"] @@ -60,10 +60,10 @@ op: client_type: local_predictor #rec模型路径 - model_config: ./ppocr_rec_mobile_2.0_serving + model_config: ./ppocr_rec_v3_serving #Fetch结果列表,以client_config中fetch_var的alias_name为准 - fetch_list: ["save_infer_model/scale_0.tmp_1"] + fetch_list: ["softmax_5.tmp_0"] #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 devices: "0" diff --git a/deploy/pdserving/ocr_reader.py b/deploy/pdserving/ocr_reader.py index 67099786ea73b66412dac8f965e20201f0ac1fdc..6a2d57b679d69ab11ac6f0fd74c47a342b391545 100644 --- a/deploy/pdserving/ocr_reader.py +++ b/deploy/pdserving/ocr_reader.py @@ -392,38 +392,8 @@ class OCRReader(object): return norm_img_batch[0] - def postprocess_old(self, outputs, with_score=False): - rec_res = [] - rec_idx_lod = outputs["ctc_greedy_decoder_0.tmp_0.lod"] - rec_idx_batch = outputs["ctc_greedy_decoder_0.tmp_0"] - if with_score: - predict_lod = outputs["softmax_0.tmp_0.lod"] - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - if isinstance(rec_idx_batch, list): - rec_idx_tmp = [x[0] for x in rec_idx_batch[beg:end]] - else: #nd array - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - if with_score: - beg = predict_lod[rno] - end = predict_lod[rno + 1] - if isinstance(outputs["softmax_0.tmp_0"], list): - outputs["softmax_0.tmp_0"] = np.array(outputs[ - "softmax_0.tmp_0"]).astype(np.float32) - probs = outputs["softmax_0.tmp_0"][beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - score = np.mean(probs[valid_ind, ind[valid_ind]]) - rec_res.append([preds_text, score]) - else: - rec_res.append([preds_text]) - return rec_res - def postprocess(self, outputs, with_score=False): - preds = outputs["save_infer_model/scale_0.tmp_1"] + preds = outputs["softmax_5.tmp_0"] try: preds = preds.numpy() except: diff --git a/deploy/pdserving/win/ocr_reader.py b/deploy/pdserving/win/ocr_reader.py index 3f219784fca79715d09ae9353a32d95e2e427cb6..18b9385aa0c7adf7c3e0cd38efd1160655881f0e 100644 --- a/deploy/pdserving/win/ocr_reader.py +++ b/deploy/pdserving/win/ocr_reader.py @@ -392,38 +392,8 @@ class OCRReader(object): return norm_img_batch[0] - def postprocess_old(self, outputs, with_score=False): - rec_res = [] - rec_idx_lod = outputs["ctc_greedy_decoder_0.tmp_0.lod"] - rec_idx_batch = outputs["ctc_greedy_decoder_0.tmp_0"] - if with_score: - predict_lod = outputs["softmax_0.tmp_0.lod"] - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - if isinstance(rec_idx_batch, list): - rec_idx_tmp = [x[0] for x in rec_idx_batch[beg:end]] - else: #nd array - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - if with_score: - beg = predict_lod[rno] - end = predict_lod[rno + 1] - if isinstance(outputs["softmax_0.tmp_0"], list): - outputs["softmax_0.tmp_0"] = np.array(outputs[ - "softmax_0.tmp_0"]).astype(np.float32) - probs = outputs["softmax_0.tmp_0"][beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - score = np.mean(probs[valid_ind, ind[valid_ind]]) - rec_res.append([preds_text, score]) - else: - rec_res.append([preds_text]) - return rec_res - def postprocess(self, outputs, with_score=False): - preds = outputs["save_infer_model/scale_0.tmp_1"] + preds = outputs["softmax_5.tmp_0"] try: preds = preds.numpy() except: diff --git a/doc/doc_ch/algorithm_rec_crnn.md b/doc/doc_ch/algorithm_rec_crnn.md new file mode 100644 index 0000000000000000000000000000000000000000..70aadd3d684e40ebd1d6e627a26b95b35b544d75 --- /dev/null +++ b/doc/doc_ch/algorithm_rec_crnn.md @@ -0,0 +1,140 @@ +# CRNN + +- [1. 算法简介](#1) +- [2. 环境配置](#2) +- [3. 模型训练、评估、预测](#3) + - [3.1 训练](#3-1) + - [3.2 评估](#3-2) + - [3.3 预测](#3-3) +- [4. 推理部署](#4) + - [4.1 Python推理](#4-1) + - [4.2 C++推理](#4-2) + - [4.3 Serving服务化部署](#4-3) + - [4.4 更多推理部署](#4-4) +- [5. FAQ](#5) + + +## 1. 算法简介 + +论文信息: +> [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/abs/1507.05717) + +> Baoguang Shi, Xiang Bai, Cong Yao + +> IEEE, 2015 + +参考[DTRB](https://arxiv.org/abs/1904.01906) 文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: + +|模型|骨干网络|Avg Accuracy|配置文件|下载链接| +|---|---|---|---|---| +|CRNN|Resnet34_vd|81.04%|[configs/rec/rec_r34_vd_none_bilstm_ctc.yml](../../configs/rec/rec_r34_vd_none_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)| +|CRNN|MobileNetV3|77.95%|[configs/rec/rec_mv3_none_bilstm_ctc.yml](../../configs/rec/rec_mv3_none_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)| + + + +## 2. 环境配置 +请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。 + + + +## 3. 模型训练、评估、预测 + +请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化,训练不同的识别模型只需要**更换配置文件**即可。 + +- 训练 + +在完成数据准备后,便可以启动训练,训练命令如下: + +``` +#单卡训练(训练周期长,不建议) +python3 tools/train.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml + +#多卡训练,通过--gpus参数指定卡号 +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c rec_r34_vd_none_bilstm_ctc.yml + +``` + +- 评估 + +``` +# GPU 评估, Global.pretrained_model 为待测权重 +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy +``` + +- 预测: + +``` +# 预测使用的配置文件必须与训练一致 +python3 tools/infer_rec.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +``` + + +## 4. 推理部署 + + +### 4.1 Python推理 + +首先将 CRNN 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练的[模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) 为例,可以使用如下命令进行转换: +```shell +python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.save_inference_dir=./inference/rec_crnn +``` +CRNN 文本识别模型推理,可以执行如下命令: + +```shell +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" +``` + +![](../imgs_words_en/word_336.png) + +执行命令后,上面图像的识别结果如下: + +```bash +Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073) +``` + +**注意**:由于上述模型是参考[DTRB](https://arxiv.org/abs/1904.01906)文本识别训练和评估流程,与超轻量级中文识别模型训练有两方面不同: + +- 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。 + +- 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path,指定为英文字典"./ppocr/utils/ic15_dict.txt"。 + +``` +self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" +dict_character = list(self.character_str) +``` + + + +### 4.2 C++推理 + +准备好推理模型后,参考[cpp infer](../../deploy/cpp_infer/)教程进行操作即可。 + + +### 4.3 Serving服务化部署 + +准备好推理模型后,参考[pdserving](../../deploy/pdserving/)教程进行Serving服务化部署,包括Python Serving和C++ Serving两种模式。 + + +### 4.4 更多推理部署 + +CRNN模型还支持以下推理部署方式: + +- Paddle2ONNX推理:准备好推理模型后,参考[paddle2onnx](../../deploy/paddle2onnx/)教程操作。 + + +## 5. FAQ + + +## 引用 + +```bibtex +@ARTICLE{7801919, + author={Shi, Baoguang and Bai, Xiang and Yao, Cong}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + title={An End-to-End Trainable Neural Network for Image-Based Sequence Recognition and Its Application to Scene Text Recognition}, + year={2017}, + volume={39}, + number={11}, + pages={2298-2304}, + doi={10.1109/TPAMI.2016.2646371}} +``` diff --git a/doc/doc_ch/algorithm_rec_seed.md b/doc/doc_ch/algorithm_rec_seed.md new file mode 100644 index 0000000000000000000000000000000000000000..94c877ffac3f9716786cdf6618d335511d38325a --- /dev/null +++ b/doc/doc_ch/algorithm_rec_seed.md @@ -0,0 +1,113 @@ +# SEED + +- [1. 算法简介](#1) +- [2. 环境配置](#2) +- [3. 模型训练、评估、预测](#3) + - [3.1 训练](#3-1) + - [3.2 评估](#3-2) + - [3.3 预测](#3-3) +- [4. 推理部署](#4) + - [4.1 Python推理](#4-1) + - [4.2 C++推理](#4-2) + - [4.3 Serving服务化部署](#4-3) + - [4.4 更多推理部署](#4-4) +- [5. FAQ](#5) + + +## 1. 算法简介 + +论文信息: +> [SEED: Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition](https://arxiv.org/pdf/2005.10977.pdf) + +> Qiao, Zhi and Zhou, Yu and Yang, Dongbao and Zhou, Yucan and Wang, Weiping + +> CVPR, 2020 + +参考[DTRB](https://arxiv.org/abs/1904.01906) 文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: + +|模型|骨干网络|Avg Accuracy|配置文件|下载链接| +|---|---|---|---|---| +|SEED|Aster_Resnet| 85.2% | [configs/rec/rec_resnet_stn_bilstm_att.yml](../../configs/rec/rec_resnet_stn_bilstm_att.yml) | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) | + + +## 2. 环境配置 +请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。 + + + +## 3. 模型训练、评估、预测 + +请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化,训练不同的识别模型只需要**更换配置文件**即可。 + +- 训练 + +SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) ,并且安装 fasttext 依赖: + +``` +python3 -m pip install fasttext==0.9.1 +``` + +然后,在完成数据准备后,便可以启动训练,训练命令如下: + +``` +#单卡训练(训练周期长,不建议) +python3 tools/train.py -c configs/rec/rec_resnet_stn_bilstm_att.yml + +#多卡训练,通过--gpus参数指定卡号 +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c rec_resnet_stn_bilstm_att.yml + +``` + +- 评估 + +``` +# GPU 评估, Global.pretrained_model 为待测权重 +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_resnet_stn_bilstm_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy +``` + +- 预测: + +``` +# 预测使用的配置文件必须与训练一致 +python3 tools/infer_rec.py -c configs/rec/rec_resnet_stn_bilstm_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +``` + + +## 4. 推理部署 + + +### 4.1 Python推理 + +comming soon + + + +### 4.2 C++推理 + +comming soon + + +### 4.3 Serving服务化部署 + +comming soon + + +### 4.4 更多推理部署 + +comming soon + + +## 5. FAQ + + +## 引用 + +```bibtex +@inproceedings{qiao2020seed, + title={Seed: Semantics enhanced encoder-decoder framework for scene text recognition}, + author={Qiao, Zhi and Zhou, Yu and Yang, Dongbao and Zhou, Yucan and Wang, Weiping}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={13528--13537}, + year={2020} +} +``` diff --git a/doc/doc_ch/algorithm_rec_starnet.md b/doc/doc_ch/algorithm_rec_starnet.md new file mode 100644 index 0000000000000000000000000000000000000000..c9d7706988763a8ac257129ab54915afe11250ac --- /dev/null +++ b/doc/doc_ch/algorithm_rec_starnet.md @@ -0,0 +1,139 @@ +# STAR-Net + +- [1. 算法简介](#1) +- [2. 环境配置](#2) +- [3. 模型训练、评估、预测](#3) + - [3.1 训练](#3-1) + - [3.2 评估](#3-2) + - [3.3 预测](#3-3) +- [4. 推理部署](#4) + - [4.1 Python推理](#4-1) + - [4.2 C++推理](#4-2) + - [4.3 Serving服务化部署](#4-3) + - [4.4 更多推理部署](#4-4) +- [5. FAQ](#5) + + +## 1. 算法简介 + +论文信息: +> [STAR-Net: a spatial attention residue network for scene text recognition.](http://www.bmva.org/bmvc/2016/papers/paper043/paper043.pdf) + +> Wei Liu, Chaofeng Chen, Kwan-Yee K. Wong, Zhizhong Su and Junyu Han. + +> BMVC, pages 43.1-43.13, 2016 + +参考[DTRB](https://arxiv.org/abs/1904.01906) 文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: + +|模型|骨干网络|Avg Accuracy|配置文件|下载链接| +|---|---|---|---|---| +|StarNet|Resnet34_vd|84.44%|[configs/rec/rec_r34_vd_tps_bilstm_ctc.yml](../../configs/rec/rec_r34_vd_tps_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)| +|StarNet|MobileNetV3|81.42%|[configs/rec/rec_mv3_tps_bilstm_ctc.yml](../../configs/rec/rec_mv3_tps_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)| + + + +## 2. 环境配置 +请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。 + + + +## 3. 模型训练、评估、预测 + +请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化,训练不同的识别模型只需要**更换配置文件**即可。 + +- 训练 + +在完成数据准备后,便可以启动训练,训练命令如下: + +``` +#单卡训练(训练周期长,不建议) +python3 tools/train.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml + +#多卡训练,通过--gpus参数指定卡号 +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c rec_r34_vd_tps_bilstm_ctc.yml + +``` + +- 评估 + +``` +# GPU 评估, Global.pretrained_model 为待测权重 +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy +``` + +- 预测: + +``` +# 预测使用的配置文件必须与训练一致 +python3 tools/infer_rec.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +``` + + +## 4. 推理部署 + + +### 4.1 Python推理 + +首先将 STAR-Net 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练的[模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) 为例,可以使用如下命令进行转换: +```shell +python3 tools/export_model.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_tps_bilstm_ctc_v2.0_train/best_accuracy Global.save_inference_dir=./inference/rec_starnet +``` +STAR-Net 文本识别模型推理,可以执行如下命令: + +```shell +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" +``` + +![](../imgs_words_en/word_336.png) + +执行命令后,上面图像的识别结果如下: + +```bash +Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073) +``` + +**注意**:由于上述模型是参考[DTRB](https://arxiv.org/abs/1904.01906)文本识别训练和评估流程,与超轻量级中文识别模型训练有两方面不同: + +- 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。 + +- 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path,指定为英文字典"./ppocr/utils/ic15_dict.txt"。 + +``` +self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" +dict_character = list(self.character_str) +``` + + + +### 4.2 C++推理 + +准备好推理模型后,参考[cpp infer](../../deploy/cpp_infer/)教程进行操作即可。 + + +### 4.3 Serving服务化部署 + +准备好推理模型后,参考[pdserving](../../deploy/pdserving/)教程进行Serving服务化部署,包括Python Serving和C++ Serving两种模式。 + + +### 4.4 更多推理部署 + +STAR-Net模型还支持以下推理部署方式: + +- Paddle2ONNX推理:准备好推理模型后,参考[paddle2onnx](../../deploy/paddle2onnx/)教程操作。 + + +## 5. FAQ + + +## 引用 + +```bibtex +@inproceedings{liu2016star, + title={STAR-Net: a spatial attention residue network for scene text recognition.}, + author={Liu, Wei and Chen, Chaofeng and Wong, Kwan-Yee K and Su, Zhizhong and Han, Junyu}, + booktitle={BMVC}, + volume={2}, + pages={7}, + year={2016} +} +``` diff --git a/doc/doc_ch/multi_languages.md b/doc/doc_ch/multi_languages.md index af9ff82e357e5945bfddf10337d0af3cd04390a0..6838b350403a7044e629e5fcc5893bced98af9d3 100644 --- a/doc/doc_ch/multi_languages.md +++ b/doc/doc_ch/multi_languages.md @@ -176,8 +176,62 @@ ppocr 还支持方向分类, 更多使用方式请参考:[whl包使用说明 ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别模型可以参考 [法语配置文件](../../configs/rec/multi_language/rec_french_lite_train.yml) 修改训练数据路径、字典等参数。 -具体数据准备、训练过程可参考:[文本检测](../doc_ch/detection.md)、[文本识别](../doc_ch/recognition.md),更多功能如预测部署、 -数据标注等功能可以阅读完整的[文档教程](../../README_ch.md)。 +详细数据准备、训练过程可参考:[文本识别](../doc_ch/recognition.md)、[文本检测](../doc_ch/detection.md)。 + +假设已经准备好了训练数据,可根据以下步骤快速启动训练: + +- 修改配置文件 + +以 `rec_french_lite_train.yml` 为例: + +``` +Global: + ... + # 添加自定义字典,如修改字典请将路径指向新字典 + character_dict_path: ./ppocr/utils/dict/french_dict.txt + ... + # 识别空格 + use_space_char: True + +... + +Train: + dataset: + # 数据集格式,支持LMDBDataSet以及SimpleDataSet + name: SimpleDataSet + # 数据集路径 + data_dir: ./train_data/ + # 训练集标签文件 + label_file_list: ["./train_data/french_train.txt"] + ... + +Eval: + dataset: + # 数据集格式,支持LMDBDataSet以及SimpleDataSet + name: SimpleDataSet + # 数据集路径 + data_dir: ./train_data + # 验证集标签文件 + label_file_list: ["./train_data/french_val.txt"] + ... +``` + +- 启动训练: + +``` +# 下载预训练模型 +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar +tar -xf french_mobile_v2.0_rec_train.tar + +#加载预训练模型 单卡训练 +python3 tools/train.py -c configs/rec/rec_french_lite_train.yml -o Global.pretrained_model=french_mobile_v2.0_rec_train/best_accuracy + +#加载预训练模型 多卡训练,通过--gpus参数指定卡号 +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_french_lite_train.yml -o Global.pretrained_model=french_mobile_v2.0_rec_train/best_accuracy +``` + + +更多功能如预测部署、数据标注等功能可以阅读完整的[文档教程](../../README_ch.md)。 ## 4 预测部署 diff --git a/doc/doc_ch/ppocr_introduction.md b/doc/doc_ch/ppocr_introduction.md index 7fcad84aa1dda2bcb71cadb69f425f7ec2d5c348..c316302649664727479cb88a8f054e4f898352c8 100644 --- a/doc/doc_ch/ppocr_introduction.md +++ b/doc/doc_ch/ppocr_introduction.md @@ -52,6 +52,27 @@ PP-OCRv3文本检测从网络结构、蒸馏训练策略两个方向做了进一 |4|1 + 2 + LKPAN|4.6M|86.0|156ms| +PP-OCRv3识别从网络结构、训练策略、数据增强三个方向做了进一步优化: +- 网络结构上:使用[SVTR](todo:add_link)中的 Transformer block 替换LSTM,提升模型精度和预测速度; +- 训练策略上:参考 [GTC](https://arxiv.org/pdf/2002.01276.pdf) 策略,使用注意力机制模块指导CTC训练,定位和识别字符,提升不规则文本的识别精度;设计方向分类前序任务,获取更优预训练模型,加速模型收敛过程,提升精度。 +- 数据增强上:使用[RecConAug](todo:add_link)数据增广方法,随机结合图片,提升训练数据的上下文信息丰富度,增强模型鲁棒性。 + +基于上述策略,PP-OCRv3识别模型相比上一版本,速度加速30%,精度进一步提升4.5%。 具体消融实验: + +| id | 策略 | 模型大小 | 精度 | CPU+mkldnn 预测耗时 | +|-----|-----|--------|----|------------| +| 01 | PP-OCRv2 | 8M | 69.3% | 26ms | +| 02 | SVTR_tiny | 19M | 80.1% | - | +| 03 | LCNet_SVTR_G6 | 8.2M | 76% | - | +| 04 | LCNet_SVTR_G1 | - | - | - | +| 05 | PP-OCRv3 | 12M | 71.9% | 19ms | +| 06 | + GTC | 12M | 75.8% | 19ms | +| 07 | + RecConAug | 12M | 76.3% | 19ms | +| 08 | + SSL pretrain | 12M | 76.9% | 19ms | +| 09 | + UDML | 12M | 78.4% | 19ms | +| 10 | + unlabeled data | 12M | 79.4% | 19ms | + + ## 2. 特性 @@ -129,7 +150,7 @@ PP-OCR中英文模型列表如下: | 模型简介 | 模型名称 | 推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | ------------------------------------- | ----------------------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | 中英文超轻量PP-OCRv3模型(16.2M) | ch_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | -| 英文超轻量PP-OCRv3模型(13.4M) | en_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | +| 英文超轻量PP-OCRv3模型(13.4M) | en_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | | 中英文超轻量PP-OCRv2模型(13.0M) | ch_PP-OCRv2_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) | | 中英文超轻量PP-OCR mobile模型(9.4M) | ch_ppocr_mobile_v2.0_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | | 中英文通用PP-OCR server模型(143.4M) | ch_ppocr_server_v2.0_xx | 服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 34a462f7ab704ce7c57fc7b8ef7f0fb3f1fb8931..0b58beb0b008e28d7fecbef8fea84e3d8fcb5964 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -105,8 +105,6 @@ train_data/rec/train/word_002.jpg 用科技让复杂的世界更简单 若您本地没有数据集,可以在官网下载 [ICDAR2015](http://rrc.cvc.uab.es/?ch=4&com=downloads) 数据,用于快速验证。也可以参考[DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here) ,下载 benchmark 所需的lmdb格式数据集。 -如果希望复现SAR的论文指标,需要下载[SynthAdd](https://pan.baidu.com/share/init?surl=uV0LtoNmcxbO-0YA7Ch4dg), 提取码:627x。此外,真实数据集icdar2013, icdar2015, cocotext, IIIT5也作为训练数据的一部分。具体数据细节可以参考论文SAR。 - 如果你使用的是icdar2015的公开数据集,PaddleOCR 提供了一份用于训练 ICDAR2015 数据集的标签文件,通过以下方式下载: ``` @@ -169,12 +167,13 @@ PaddleOCR内置了一部分字典,可以按需使用。 `ppocr/utils/en_dict.txt` 是一个包含96个字符的英文字典 + 目前的多语言模型仍处在demo阶段,会持续优化模型并补充语种,**非常欢迎您为我们提供其他语言的字典和字体**, 如您愿意可将字典文件提交至 [dict](../../ppocr/utils/dict),我们会在Repo中感谢您。 - 自定义字典 -如需自定义dic文件,请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。 +如需自定义dic文件,请在 `configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。 ## 1.4 添加空格类别 @@ -195,7 +194,7 @@ PaddleOCR提供了多种数据增强方式,默认配置文件中已经添加 # 2. 开始训练 -PaddleOCR提供了训练脚本、评估脚本和预测脚本,本节将以 CRNN 识别模型为例: +PaddleOCR提供了训练脚本、评估脚本和预测脚本,本节将以 PP-OCRv3 英文识别模型为例: ## 2.1 启动训练 @@ -204,11 +203,11 @@ PaddleOCR提供了训练脚本、评估脚本和预测脚本,本节将以 CRNN ``` cd PaddleOCR/ -# 下载MobileNetV3的预训练模型 -wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar +# 下载英文PP-OCRv3的预训练模型 +wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar # 解压模型参数 cd pretrain_models -tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc_v2.0_train.tar +tar -xf en_PP-OCRv3_rec_train.tar && rm -rf en_PP-OCRv3_rec_train.tar ``` 开始训练: @@ -220,44 +219,23 @@ tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc # 训练icdar15英文数据 训练日志会自动保存为 "{save_model_dir}" 下的train.log #单卡训练(训练周期长,不建议) -python3 tools/train.py -c configs/rec/rec_icdar15_train.yml +python3 tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy #多卡训练,通过--gpus参数指定卡号 -python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy ``` -PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_train.yml` 中修改 `eval_batch_step` 设置评估频率,默认每500个iter评估一次。评估过程中默认将最佳acc模型,保存为 `output/rec_CRNN/best_accuracy` 。 +PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml` 中修改 `eval_batch_step` 设置评估频率,默认每500个iter评估一次。评估过程中默认将最佳acc模型,保存为 `output/en_PP-OCRv3_rec/best_accuracy` 。 如果验证集很大,测试将会比较耗时,建议减少评估次数,或训练完再进行评估。 -**提示:** 可通过 -c 参数选择 `configs/rec/` 路径下的多种模型配置进行训练,PaddleOCR支持的识别算法有: +**提示:** 可通过 -c 参数选择 `configs/rec/` 路径下的多种模型配置进行训练,PaddleOCR支持的识别算法可以参考[前沿算法列表](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/algorithm_overview.md#12-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95): -| 配置文件 | 算法名称 | backbone | trans | seq | pred | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | -| [rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml) | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | -| [rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml) | CRNN | ResNet34_vd | None | BiLSTM | ctc | -| rec_icdar15_train.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc | -| rec_mv3_none_bilstm_ctc.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc | -| rec_mv3_none_none_ctc.yml | Rosetta | Mobilenet_v3 large 0.5 | None | None | ctc | -| rec_r34_vd_none_bilstm_ctc.yml | CRNN | Resnet34_vd | None | BiLSTM | ctc | -| rec_r34_vd_none_none_ctc.yml | Rosetta | Resnet34_vd | None | None | ctc | -| rec_mv3_tps_bilstm_att.yml | CRNN | Mobilenet_v3 | TPS | BiLSTM | att | -| rec_r34_vd_tps_bilstm_att.yml | CRNN | Resnet34_vd | TPS | BiLSTM | att | -| rec_r50fpn_vd_none_srn.yml | SRN | Resnet50_fpn_vd | None | rnn | srn | -| rec_mtb_nrtr.yml | NRTR | nrtr_mtb | None | transformer encoder | transformer decoder | -| rec_r31_sar.yml | SAR | ResNet31 | None | LSTM encoder | LSTM decoder | -| rec_resnet_stn_bilstm_att.yml | SEED | Aster_Resnet | STN | BiLSTM | att | +训练中文数据,推荐使用[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件: -*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) ,并且安装 fasttext 依赖: -``` -python3.7 -m pip install fasttext==0.9.1 -``` - -训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件: - -以 `rec_chinese_lite_train_v2.0.yml` 为例: +以 `ch_PP-OCRv3_rec_distillation.yml` 为例: ``` Global: ... @@ -290,7 +268,7 @@ Train: ... - RecResizeImg: # 修改 image_shape 以适应长文本 - image_shape: [3, 32, 320] + image_shape: [3, 48, 320] ... loader: ... @@ -310,7 +288,7 @@ Eval: ... - RecResizeImg: # 修改 image_shape 以适应长文本 - image_shape: [3, 32, 320] + image_shape: [3, 48, 320] ... loader: # 单卡验证的batch_size @@ -325,7 +303,7 @@ Eval: 如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径: ```shell -python3 tools/train.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints=./your/trained/model +python3 tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.checkpoints=./your/trained/model ``` **注意**:`Global.checkpoints`的优先级高于`Global.pretrained_model`的优先级,即同时指定两个参数时,优先加载`Global.checkpoints`指定的模型,如果`Global.checkpoints`指定的模型路径有误,会加载`Global.pretrained_model`指定的模型。 @@ -383,8 +361,8 @@ args1: args1 如果您想进一步加快训练速度,可以使用[自动混合精度训练](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/01_paddle2.0_introduction/basic_concept/amp_cn.html), 以单机单卡为例,命令如下: ```shell -python3 tools/train.py -c configs/rec/rec_icdar15_train.yml \ - -o Global.pretrained_model=./pretrain_models/rec_mv3_none_bilstm_ctc_v2.0_train \ +python3 tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml \ + -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy \ Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True ``` @@ -394,8 +372,8 @@ python3 tools/train.py -c configs/rec/rec_icdar15_train.yml \ 多机多卡训练时,通过 `--ips` 参数设置使用的机器IP地址,通过 `--gpus` 参数设置使用的GPU ID: ```bash -python3 -m paddle.distributed.launch --ips="xx.xx.xx.xx,xx.xx.xx.xx" --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml \ - -o Global.pretrained_model=./pretrain_models/rec_mv3_none_bilstm_ctc_v2.0_train +python3 -m paddle.distributed.launch --ips="xx.xx.xx.xx,xx.xx.xx.xx" --gpus '0,1,2,3' tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml \ + -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy ``` **注意:** 采用多机多卡训练时,需要替换上面命令中的ips值为您机器的地址,机器之间需要能够相互ping通。另外,训练时需要在多个机器上分别启动命令。查看机器ip地址的命令为`ifconfig`。 @@ -485,11 +463,12 @@ DCU设备上运行需要设置环境变量 `export HIP_VISIBLE_DEVICES=0,1,2,3` ## 3.1 指标评估 -训练中模型参数默认保存在`Global.save_model_dir`目录下。在评估指标时,需要设置`Global.checkpoints`指向保存的参数文件。评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。 +训练中模型参数默认保存在`Global.save_model_dir`目录下。在评估指标时,需要设置`Global.checkpoints`指向保存的参数文件。评估数据集可以通过 `configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml` 修改Eval中的 `label_file_path` 设置。 + ``` # GPU 评估, Global.checkpoints 为待测权重 -python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.checkpoints={path/to/weights}/best_accuracy ``` @@ -519,7 +498,7 @@ output/rec/ ``` # 预测英文结果 -python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png +python3 tools/infer_rec.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png ``` 预测图片: @@ -538,7 +517,7 @@ infer_img: doc/imgs_words/en/word_1.png ``` # 预测中文结果 -python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg +python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg ``` 预测图片: @@ -569,15 +548,15 @@ inference 模型(`paddle.jit.save`保存的模型) # Global.pretrained_model 参数设置待转换的训练模型地址,不用添加文件后缀 .pdmodel,.pdopt或.pdparams。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.save_inference_dir=./inference/rec_crnn/ +python3 tools/export_model.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy Global.save_inference_dir=./inference/en_PP-OCRv3_rec/ ``` -**注意:**如果您是在自己的数据集上训练的模型,并且调整了中文字符的字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。 +**注意:**如果您是在自己的数据集上训练的模型,并且调整了中文字符的字典文件,请注意修改配置文件中的`character_dict_path`为自定义字典文件。 转换成功后,在目录下有三个文件: ``` -/inference/rec_crnn/ +inference/en_PP-OCRv3_rec/ ├── inference.pdiparams # 识别inference模型的参数文件 ├── inference.pdiparams.info # 识别inference模型的参数信息,可忽略 └── inference.pdmodel # 识别inference模型的program文件 @@ -588,7 +567,7 @@ python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_trai 如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径 ``` - python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path" + python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 48, 320" --rec_char_dict_path="your text dict path" ``` diff --git a/doc/doc_en/algorithm_rec_aster_en.md b/doc/doc_en/algorithm_rec_aster_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1540681a19f94160e221c37173510395d0fd407f --- /dev/null +++ b/doc/doc_en/algorithm_rec_aster_en.md @@ -0,0 +1,122 @@ +# STAR-Net + +- [1. Introduction](#1) +- [2. Environment](#2) +- [3. Model Training / Evaluation / Prediction](#3) + - [3.1 Training](#3-1) + - [3.2 Evaluation](#3-2) + - [3.3 Prediction](#3-3) +- [4. Inference and Deployment](#4) + - [4.1 Python Inference](#4-1) + - [4.2 C++ Inference](#4-2) + - [4.3 Serving](#4-3) + - [4.4 More](#4-4) +- [5. FAQ](#5) + + +## 1. Introduction + +Paper: +> [STAR-Net: a spatial attention residue network for scene text recognition.](http://www.bmva.org/bmvc/2016/papers/paper043/paper043.pdf) + +> Wei Liu, Chaofeng Chen, Kwan-Yee K. Wong, Zhizhong Su and Junyu Han. + +> BMVC, pages 43.1-43.13, 2016 + +Using MJSynth and SynthText two text recognition datasets for training, and evaluating on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE datasets, the algorithm reproduction effect is as follows: + +|Model|Backbone|ACC|config|Download link| +| --- | --- | --- | --- | --- | +|---|---|---|---|---| +|StarNet|Resnet34_vd|84.44%|[configs/rec/rec_r34_vd_tps_bilstm_ctc.yml](../../configs/rec/rec_r34_vd_tps_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)| +|StarNet|MobileNetV3|81.42%|[configs/rec/rec_mv3_tps_bilstm_ctc.yml](../../configs/rec/rec_mv3_tps_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)| + + +## 2. Environment +Please refer to ["Environment Preparation"](./environment.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone.md) to clone the project code. + + + +## 3. Model Training / Evaluation / Prediction + +Please refer to [Text Recognition Tutorial](./recognition.md). PaddleOCR modularizes the code, and training different recognition models only requires **changing the configuration file**. + +Training: + +Specifically, after the data preparation is completed, the training can be started. The training command is as follows: + +``` +#Single GPU training (long training period, not recommended) +python3 tools/train.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml + +#Multi GPU training, specify the gpu number through the --gpus parameter +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c rec_r34_vd_tps_bilstm_ctc.yml +``` + +Evaluation: + +``` +# GPU evaluation +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy +``` + +Prediction: + +``` +# The configuration file used for prediction must match the training +python3 tools/infer_rec.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +``` + + +## 4. Inference and Deployment + + +### 4.1 Python Inference +First, the model saved during the STAR-Net text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_STAR-Net_train.tar) ), you can use the following command to convert: + +``` +python3 tools/export_model.py -c configs/rec/rec_r34_vd_tps_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_tps_bilstm_ctc_v2.0_train/best_accuracy Global.save_inference_dir=./inference/rec_starnet +``` + +For STAR-Net text recognition model inference, the following commands can be executed: + +``` +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" +``` + + +### 4.2 C++ Inference + +With the inference model prepared, refer to the [cpp infer](../../deploy/cpp_infer/) tutorial for C++ inference. + + + +### 4.3 Serving + +With the inference model prepared, refer to the [pdserving](../../deploy/pdserving/) tutorial for service deployment by Paddle Serving. + + + +### 4.4 More + +More deployment schemes supported for STAR-Net: + +- Paddle2ONNX: with the inference model prepared, please refer to the [paddle2onnx](../../deploy/paddle2onnx/) tutorial. + + + +## 5. FAQ + + +## Citation + +```bibtex +@inproceedings{liu2016star, + title={STAR-Net: a spatial attention residue network for scene text recognition.}, + author={Liu, Wei and Chen, Chaofeng and Wong, Kwan-Yee K and Su, Zhizhong and Han, Junyu}, + booktitle={BMVC}, + volume={2}, + pages={7}, + year={2016} +} +``` diff --git a/doc/doc_en/algorithm_rec_crnn_en.md b/doc/doc_en/algorithm_rec_crnn_en.md new file mode 100644 index 0000000000000000000000000000000000000000..571569ee445d756ca7bdfeea6d5f960187a5a666 --- /dev/null +++ b/doc/doc_en/algorithm_rec_crnn_en.md @@ -0,0 +1,123 @@ +# CRNN + +- [1. Introduction](#1) +- [2. Environment](#2) +- [3. Model Training / Evaluation / Prediction](#3) + - [3.1 Training](#3-1) + - [3.2 Evaluation](#3-2) + - [3.3 Prediction](#3-3) +- [4. Inference and Deployment](#4) + - [4.1 Python Inference](#4-1) + - [4.2 C++ Inference](#4-2) + - [4.3 Serving](#4-3) + - [4.4 More](#4-4) +- [5. FAQ](#5) + + +## 1. Introduction + +Paper: +> [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/abs/1507.05717) + +> Baoguang Shi, Xiang Bai, Cong Yao + +> IEEE, 2015 + +Using MJSynth and SynthText two text recognition datasets for training, and evaluating on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE datasets, the algorithm reproduction effect is as follows: + +|Model|Backbone|ACC|config|Download link| +| --- | --- | --- | --- | --- | +|---|---|---|---|---| +|CRNN|Resnet34_vd|81.04%|[configs/rec/rec_r34_vd_none_bilstm_ctc.yml](../../configs/rec/rec_r34_vd_none_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)| +|CRNN|MobileNetV3|77.95%|[configs/rec/rec_mv3_none_bilstm_ctc.yml](../../configs/rec/rec_mv3_none_bilstm_ctc.yml)|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)| + + +## 2. Environment +Please refer to ["Environment Preparation"](./environment.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone.md) to clone the project code. + + + +## 3. Model Training / Evaluation / Prediction + +Please refer to [Text Recognition Tutorial](./recognition.md). PaddleOCR modularizes the code, and training different recognition models only requires **changing the configuration file**. + +Training: + +Specifically, after the data preparation is completed, the training can be started. The training command is as follows: + +``` +#Single GPU training (long training period, not recommended) +python3 tools/train.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml + +#Multi GPU training, specify the gpu number through the --gpus parameter +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml +``` + +Evaluation: + +``` +# GPU evaluation +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy +``` + +Prediction: + +``` +# The configuration file used for prediction must match the training +python3 tools/infer_rec.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +``` + + +## 4. Inference and Deployment + + +### 4.1 Python Inference +First, the model saved during the CRNN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_CRNN_train.tar) ), you can use the following command to convert: + +``` +python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.save_inference_dir=./inference/rec_crnn +``` + +For CRNN text recognition model inference, the following commands can be executed: + +``` +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" +``` + + +### 4.2 C++ Inference + +With the inference model prepared, refer to the [cpp infer](../../deploy/cpp_infer/) tutorial for C++ inference. + + + +### 4.3 Serving + +With the inference model prepared, refer to the [pdserving](../../deploy/pdserving/) tutorial for service deployment by Paddle Serving. + + + +### 4.4 More + +More deployment schemes supported for CRNN: + +- Paddle2ONNX: with the inference model prepared, please refer to the [paddle2onnx](../../deploy/paddle2onnx/) tutorial. + + + +## 5. FAQ + + +## Citation + +```bibtex +@ARTICLE{7801919, + author={Shi, Baoguang and Bai, Xiang and Yao, Cong}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + title={An End-to-End Trainable Neural Network for Image-Based Sequence Recognition and Its Application to Scene Text Recognition}, + year={2017}, + volume={39}, + number={11}, + pages={2298-2304}, + doi={10.1109/TPAMI.2016.2646371}} +``` diff --git a/doc/doc_en/algorithm_rec_seed_en.md b/doc/doc_en/algorithm_rec_seed_en.md new file mode 100644 index 0000000000000000000000000000000000000000..21679f42fd6302228804db49d731f9b69ec692b2 --- /dev/null +++ b/doc/doc_en/algorithm_rec_seed_en.md @@ -0,0 +1,111 @@ +# SEED + +- [1. Introduction](#1) +- [2. Environment](#2) +- [3. Model Training / Evaluation / Prediction](#3) + - [3.1 Training](#3-1) + - [3.2 Evaluation](#3-2) + - [3.3 Prediction](#3-3) +- [4. Inference and Deployment](#4) + - [4.1 Python Inference](#4-1) + - [4.2 C++ Inference](#4-2) + - [4.3 Serving](#4-3) + - [4.4 More](#4-4) +- [5. FAQ](#5) + + +## 1. Introduction + +Paper: +> [SEED: Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition](https://arxiv.org/pdf/2005.10977.pdf) + +> Qiao, Zhi and Zhou, Yu and Yang, Dongbao and Zhou, Yucan and Wang, Weiping + +> CVPR, 2020 + +Using MJSynth and SynthText two text recognition datasets for training, and evaluating on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE datasets, the algorithm reproduction effect is as follows: + +|Model|Backbone|ACC|config|Download link| +| --- | --- | --- | --- | --- | +|SEED|Aster_Resnet| 85.2% | [configs/rec/rec_resnet_stn_bilstm_att.yml](../../configs/rec/rec_resnet_stn_bilstm_att.yml) | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) | + + +## 2. Environment +Please refer to ["Environment Preparation"](./environment.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone.md) to clone the project code. + + + +## 3. Model Training / Evaluation / Prediction + +Please refer to [Text Recognition Tutorial](./recognition.md). PaddleOCR modularizes the code, and training different recognition models only requires **changing the configuration file**. + +Training: + +The SEED model needs to additionally load the [language model](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) trained by FastText, and install the fasttext dependencies: + +``` +python3 -m pip install fasttext==0.9.1 +``` + +Specifically, after the data preparation is completed, the training can be started. The training command is as follows: + +``` +#Single GPU training (long training period, not recommended) +python3 tools/train.py -c configs/rec/rec_resnet_stn_bilstm_att.yml + +#Multi GPU training, specify the gpu number through the --gpus parameter +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c rec_resnet_stn_bilstm_att.yml +``` + +Evaluation: + +``` +# GPU evaluation +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_resnet_stn_bilstm_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy +``` + +Prediction: + +``` +# The configuration file used for prediction must match the training +python3 tools/infer_rec.py -c configs/rec/rec_resnet_stn_bilstm_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +``` + + +## 4. Inference and Deployment + + +### 4.1 Python Inference + +Not support + + +### 4.2 C++ Inference + +Not support + + +### 4.3 Serving + +Not support + + +### 4.4 More + +Not support + + +## 5. FAQ + + +## Citation + +```bibtex +@inproceedings{qiao2020seed, + title={Seed: Semantics enhanced encoder-decoder framework for scene text recognition}, + author={Qiao, Zhi and Zhou, Yu and Yang, Dongbao and Zhou, Yucan and Wang, Weiping}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={13528--13537}, + year={2020} +} +``` diff --git a/doc/doc_en/ppocr_introduction_en.md b/doc/doc_en/ppocr_introduction_en.md index 173b02006a128d2258246e473de088548745d7ff..c0b3c1c96a1de6a5953984c845f2fcb735f390f6 100644 --- a/doc/doc_en/ppocr_introduction_en.md +++ b/doc/doc_en/ppocr_introduction_en.md @@ -121,7 +121,7 @@ For more tutorials, including model training, model compression, deployment, etc | Model introduction | Model name | Recommended scene | Detection model | Direction classifier | Recognition model | | ------------------------------------------------------------ | ---------------------------- | ----------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | Chinese and English ultra-lightweight PP-OCRv3 model(16.2M) | ch_PP-OCRv3_xx | Mobile & Server | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | -| English ultra-lightweight PP-OCRv3 model(13.4M) | en_PP-OCRv3_xx | Mobile & Server | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/ch_ppocr_mobile_v2.0_cls_train.tar) | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | +| English ultra-lightweight PP-OCRv3 model(13.4M) | en_PP-OCRv3_xx | Mobile & Server | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | | Chinese and English ultra-lightweight PP-OCRv2 model(11.6M) | ch_PP-OCRv2_xx |Mobile & Server|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)| | Chinese and English ultra-lightweight PP-OCR model (9.4M) | ch_ppocr_mobile_v2.0_xx | Mobile & server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | | Chinese and English general PP-OCR model (143.4M) | ch_ppocr_server_v2.0_xx | Server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index a6b255f9501a0c2d34c31162385bcf03ff578aa3..68ffc39121eee4dde4eecdd577200effdb3bdb05 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -28,7 +28,34 @@ To prepare datasets, refer to [ocr_datasets](./dataset/ocr_datasets.md) . -If you want to reproduce the paper SAR, you need to download extra dataset [SynthAdd](https://pan.baidu.com/share/init?surl=uV0LtoNmcxbO-0YA7Ch4dg), extraction code: 627x. Besides, icdar2013, icdar2015, cocotext, IIIT5k datasets are also used to train. For specific details, please refer to the paper SAR. +PaddleOCR provides label files for training the icdar2015 dataset, which can be downloaded in the following ways: + +``` +# Training set label +wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt +# Test Set Label +wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt +``` + +PaddleOCR also provides a data format conversion script, which can convert ICDAR official website label to a data format +supported by PaddleOCR. The data conversion tool is in `ppocr/utils/gen_label.py`, here is the training set as an example: + +``` +# convert the official gt to rec_gt_label.txt +python gen_label.py --mode="rec" --input_path="{path/of/origin/label}" --output_label="rec_gt_label.txt" +``` + +The data format is as follows, (a) is the original picture, (b) is the Ground Truth text file corresponding to each picture: + +![](../datasets/icdar_rec.png) + + +- Multilingual dataset + +The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded using the following two methods. +* [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA) ,Extraction code:frgi. +* [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view) + ### 1.2 Dictionary @@ -101,11 +128,11 @@ First download the pretrain model, you can download the trained model to finetun ``` cd PaddleOCR/ -# Download the pre-trained model of MobileNetV3 -wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar +# Download the pre-trained model of en_PP-OCRv3 +wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar # Decompress model parameters cd pretrain_models -tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc_v2.0_train.tar +tar -xf en_PP-OCRv3_rec_train.tar && rm -rf en_PP-OCRv3_rec_train.tar ``` Start training: @@ -115,9 +142,10 @@ Start training: # Training icdar15 English data and The training log will be automatically saved as train.log under "{save_model_dir}" #specify the single card training(Long training time, not recommended) -python3 tools/train.py -c configs/rec/rec_icdar15_train.yml +python3 tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=en_PP-OCRv3_rec_train/best_accuracy + #specify the card number through --gpus -python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=en_PP-OCRv3_rec_train/best_accuracy ``` @@ -125,31 +153,13 @@ PaddleOCR supports alternating training and evaluation. You can modify `eval_bat If the evaluation set is large, the test will be time-consuming. It is recommended to reduce the number of evaluations, or evaluate after training. -* Tip: You can use the `-c` parameter to select multiple model configurations under the `configs/rec/` path for training. The recognition algorithms supported by PaddleOCR are: - - -| Configuration file | Algorithm | backbone | trans | seq | pred | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | -| [rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml) | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | -| [rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml) | CRNN | ResNet34_vd | None | BiLSTM | ctc | -| rec_chinese_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | -| rec_chinese_common_train.yml | CRNN | ResNet34_vd | None | BiLSTM | ctc | -| rec_icdar15_train.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc | -| rec_mv3_none_bilstm_ctc.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc | -| rec_mv3_none_none_ctc.yml | Rosetta | Mobilenet_v3 large 0.5 | None | None | ctc | -| rec_r34_vd_none_bilstm_ctc.yml | CRNN | Resnet34_vd | None | BiLSTM | ctc | -| rec_r34_vd_none_none_ctc.yml | Rosetta | Resnet34_vd | None | None | ctc | -| rec_mv3_tps_bilstm_att.yml | CRNN | Mobilenet_v3 | TPS | BiLSTM | att | -| rec_r34_vd_tps_bilstm_att.yml | CRNN | Resnet34_vd | TPS | BiLSTM | att | -| rec_r50fpn_vd_none_srn.yml | SRN | Resnet50_fpn_vd | None | rnn | srn | -| rec_mtb_nrtr.yml | NRTR | nrtr_mtb | None | transformer encoder | transformer decoder | -| rec_r31_sar.yml | SAR | ResNet31 | None | LSTM encoder | LSTM decoder | +* Tip: You can use the `-c` parameter to select multiple model configurations under the `configs/rec/` path for training. The recognition algorithms supported at [rec_algorithm](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/algorithm_overview.md): For training Chinese data, it is recommended to use -[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file: -co -Take `rec_chinese_lite_train_v2.0.yml` as an example: +[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file: + +Take `ch_PP-OCRv3_rec_distillation.yml` as an example: ``` Global: ... @@ -183,7 +193,7 @@ Train: ... - RecResizeImg: # Modify image_shape to fit long text - image_shape: [3, 32, 320] + image_shape: [3, 48, 320] ... loader: ... @@ -203,7 +213,7 @@ Eval: ... - RecResizeImg: # Modify image_shape to fit long text - image_shape: [3, 32, 320] + image_shape: [3, 48, 320] ... loader: # Eval batch_size for Single card @@ -380,11 +390,12 @@ Running on a DCU device requires setting the environment variable `export HIP_VI ### 3.1 Evaluation -The model parameters during training are saved in the `Global.save_model_dir` directory by default. When evaluating indicators, you need to set `Global.checkpoints` to point to the saved parameter file. The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file. +The model parameters during training are saved in the `Global.save_model_dir` directory by default. When evaluating indicators, you need to set `Global.checkpoints` to point to the saved parameter file. The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml` file. + ``` # GPU evaluation, Global.checkpoints is the weight to be tested -python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy +python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.checkpoints={path/to/weights}/best_accuracy ``` @@ -417,7 +428,7 @@ Among them, best_accuracy.* is the best model on the evaluation set; iter_epoch_ ``` # Predict English results -python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg +python3 tools/infer_rec.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png ``` @@ -436,7 +447,7 @@ The configuration file used for prediction must be consistent with the training. ``` # Predict Chinese results -python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg +python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg ``` Input image: @@ -467,7 +478,7 @@ The recognition model is converted to the inference model in the same way as the # Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.save_inference_dir=./inference/rec_crnn/ +python3 tools/export_model.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=en_PP-OCRv3_rec_train/best_accuracy Global.save_inference_dir=./inference/en_PP-OCRv3_rec/ ``` If you have a model trained on your own dataset with a different dictionary file, please make sure that you modify the `character_dict_path` in the configuration file to your dictionary file path. @@ -475,7 +486,8 @@ If you have a model trained on your own dataset with a different dictionary file After the conversion is successful, there are three files in the model save directory: ``` -inference/rec_crnn/ + +inference/en_PP-OCRv3_rec/ ├── inference.pdiparams # The parameter file of recognition inference model ├── inference.pdiparams.info # The parameter information of recognition inference model, which can be ignored └── inference.pdmodel # The program file of recognition model diff --git a/ppocr/optimizer/optimizer.py b/ppocr/optimizer/optimizer.py index c450a3a3684eb44cdc758a2b27783b5a81945c38..dd8544e2e7d39be33a9096cad16c4d58eb58bcad 100644 --- a/ppocr/optimizer/optimizer.py +++ b/ppocr/optimizer/optimizer.py @@ -43,12 +43,15 @@ class Momentum(object): self.grad_clip = grad_clip def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] opt = optim.Momentum( learning_rate=self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay, grad_clip=self.grad_clip, - parameters=model.parameters()) + parameters=train_params) return opt @@ -76,6 +79,9 @@ class Adam(object): self.lazy_mode = lazy_mode def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] opt = optim.Adam( learning_rate=self.learning_rate, beta1=self.beta1, @@ -85,7 +91,7 @@ class Adam(object): grad_clip=self.grad_clip, name=self.name, lazy_mode=self.lazy_mode, - parameters=model.parameters()) + parameters=train_params) return opt @@ -118,6 +124,9 @@ class RMSProp(object): self.grad_clip = grad_clip def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] opt = optim.RMSProp( learning_rate=self.learning_rate, momentum=self.momentum, @@ -125,7 +134,7 @@ class RMSProp(object): epsilon=self.epsilon, weight_decay=self.weight_decay, grad_clip=self.grad_clip, - parameters=model.parameters()) + parameters=train_params) return opt @@ -149,6 +158,9 @@ class Adadelta(object): self.name = name def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] opt = optim.Adadelta( learning_rate=self.learning_rate, epsilon=self.epsilon, @@ -156,7 +168,7 @@ class Adadelta(object): weight_decay=self.weight_decay, grad_clip=self.grad_clip, name=self.name, - parameters=model.parameters()) + parameters=train_params) return opt @@ -190,17 +202,20 @@ class AdamW(object): self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay def __call__(self, model): - parameters = model.parameters() + parameters = [ + param for param in model.parameters() if param.trainable is True + ] self.no_weight_decay_param_name_list = [ - p.name for n, p in model.named_parameters() if any(nd in n for nd in self.no_weight_decay_name_list) + p.name for n, p in model.named_parameters() + if any(nd in n for nd in self.no_weight_decay_name_list) ] if self.one_dim_param_no_weight_decay: self.no_weight_decay_param_name_list += [ - p.name for n, p in model.named_parameters() if len(p.shape) == 1 + p.name for n, p in model.named_parameters() if len(p.shape) == 1 ] - + opt = optim.AdamW( learning_rate=self.learning_rate, beta1=self.beta1, @@ -216,4 +231,4 @@ class AdamW(object): return opt def _apply_decay_param_fun(self, name): - return name not in self.no_weight_decay_param_name_list \ No newline at end of file + return name not in self.no_weight_decay_param_name_list diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index 48a84cfdf91555523da9ff882ee463d0e3d2d9b7..4a25ff8b2fa182faaf4f4ce8909c9ec2e9b55ccc 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -60,7 +60,7 @@ def get_image_file_list(img_file): raise Exception("not found any img file in {}".format(img_file)) img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif'} - if os.path.isfile(img_file) and _check_image_file(file_path): + if os.path.isfile(img_file) and _check_image_file(img_file): imgs_lists.append(img_file) elif os.path.isdir(img_file): for single_file in os.listdir(img_file): diff --git a/tools/eval.py b/tools/eval.py index 7fd4fa7ada7b1550bcca8766f5acb9b4d4ed2049..cab28334396c54f1526f830044de0772b5402a11 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -20,8 +20,8 @@ import os import sys __dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) +sys.path.insert(0, __dir__) +sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) from ppocr.data import build_dataloader from ppocr.modeling.architectures import build_model diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 63b635c1114d079478e34682d7c94c433bd21ffa..534f08fbcb3b90eea6a371d9f0ad128e276874c4 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -195,7 +195,7 @@ def main(args): text_sys.text_detector.autolog.report() text_sys.text_recognizer.autolog.report() - with open(os.path.join(draw_img_save_dir, "system_results.txt"), 'w') as f: + with open(os.path.join(draw_img_save_dir, "system_results.txt"), 'w', encoding='utf-8') as f: f.writelines(save_results)