diff --git a/.gitignore b/.gitignore index 1a2dd675e961f1804fa58e2e2e49118536b84ce9..9eecb4f1056fc040d4c9579d593bee2cc4013837 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,7 @@ output/ *.log .clang-format .clang_format.hook + +build/ +dist/ +paddleocr.egg-info/ \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..388882df0c3701780dd6371bc91887356a7bca40 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include LICENSE.txt +include README.md + +recursive-include ppocr/utils *.txt utility.py character.py check.py +recursive-include ppocr/data/det *.py +recursive-include ppocr/postprocess *.py +recursive-include ppocr/postprocess/lanms *.* +recursive-include tools/infer *.py diff --git a/README.md b/README.md index 08a27d8e7e2dd0a1ddcc774b0dd19189fcfb248b..a92100720acf4a5492779e741d519cccd038a223 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ English | [简体中文](README_cn.md) PaddleOCR aims to create rich, leading, and practical OCR tools that help users train better models and apply them into practice. **Recent updates** +- 2020.8.24 Support the use of PaddleOCR through whl package installation,pelease refer [PaddleOCR Package](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md) - 2020.8.16, Release text detection algorithm [SAST](https://arxiv.org/abs/1908.05498) and text recognition algorithm [SRN](https://arxiv.org/abs/2003.12294) - 2020.7.23, Release the playback and PPT of live class on BiliBili station, PaddleOCR Introduction, [address](https://aistudio.baidu.com/aistudio/course/introduce/1519) - 2020.7.15, Add mobile App demo , support both iOS and Android ( based on easyedge and Paddle Lite) diff --git a/README_cn.md b/README_cn.md index 10bcbc505ac25d856c60ec16ad758be7011af751..8bdcce627efda890c147583be4a983ec6ff7df1a 100644 --- a/README_cn.md +++ b/README_cn.md @@ -4,11 +4,11 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力使用者训练出更好的模型,并应用落地。 **近期更新** +- 2020.8.24 支持通过whl包安装使用PaddleOCR,具体参考[Paddleocr Package使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md) +- 2020.8.21 更新8月18日B站直播课回放和PPT,课节2,易学易用的OCR工具大礼包,[获取地址](https://aistudio.baidu.com/aistudio/education/group/info/1519) - 2020.8.16 开源文本检测算法[SAST](https://arxiv.org/abs/1908.05498)和文本识别算法[SRN](https://arxiv.org/abs/2003.12294) -- 2020.7.23 发布7月21日B站直播课回放和PPT,PaddleOCR开源大礼包全面解读,[获取地址](https://aistudio.baidu.com/aistudio/course/introduce/1519) +- 2020.7.23 发布7月21日B站直播课回放和PPT,课节1,PaddleOCR开源大礼包全面解读,[获取地址](https://aistudio.baidu.com/aistudio/course/introduce/1519) - 2020.7.15 添加基于EasyEdge和Paddle-Lite的移动端DEMO,支持iOS和Android系统 -- 2020.7.15 完善预测部署,添加基于C++预测引擎推理、服务化部署和端侧部署方案,以及超轻量级中文OCR模型预测耗时Benchmark -- 2020.7.15 整理OCR相关数据集、常用数据标注以及合成工具 - [more](./doc/doc_ch/update.md) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7d94f66be072067172d56da13d8bb27d9aeac431 --- /dev/null +++ b/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['PaddleOCR', 'draw_ocr'] +from .paddleocr import PaddleOCR +from .tools.infer.utility import draw_ocr diff --git a/deploy/cpp_infer/include/config.h b/deploy/cpp_infer/include/config.h index c5257d8ade72bfc7a68c5ca5c2c78fd5b6c1983c..8db693b121f1f91e30672de53e9b969babb49f8b 100644 --- a/deploy/cpp_infer/include/config.h +++ b/deploy/cpp_infer/include/config.h @@ -41,6 +41,8 @@ public: this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"])); + this->use_zero_copy_run = bool(stoi(config_map_["use_zero_copy_run"])); + this->max_side_len = stoi(config_map_["max_side_len"]); this->det_db_thresh = stod(config_map_["det_db_thresh"]); @@ -68,6 +70,8 @@ public: bool use_mkldnn = false; + bool use_zero_copy_run = false; + int max_side_len = 960; double det_db_thresh = 0.3; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index ed2667eecfea9a09d7da77df37f43a7b9e9bb349..0308d07f3bac67a275452500184e0959b16e8003 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -39,8 +39,8 @@ public: explicit DBDetector(const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const int &max_side_len, - const double &det_db_thresh, + const bool &use_mkldnn, const bool &use_zero_copy_run, + const int &max_side_len, const double &det_db_thresh, const double &det_db_box_thresh, const double &det_db_unclip_ratio, const bool &visualize) { @@ -49,6 +49,7 @@ public: this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->use_mkldnn_ = use_mkldnn; + this->use_zero_copy_run_ = use_zero_copy_run; this->max_side_len_ = max_side_len; @@ -75,6 +76,7 @@ private: int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; bool use_mkldnn_ = false; + bool use_zero_copy_run_ = false; int max_side_len_ = 960; diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index 471aeb58758d1de1d48b4da1067c8532457ddc92..520f0f2879dcec6b30861755b119227efa11b29c 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -38,12 +38,14 @@ public: explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const string &label_path) { + const bool &use_mkldnn, const bool &use_zero_copy_run, + const string &label_path) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->use_mkldnn_ = use_mkldnn; + this->use_zero_copy_run_ = use_zero_copy_run; this->label_list_ = Utility::ReadDict(label_path); this->label_list_.push_back(" "); @@ -64,6 +66,7 @@ private: int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; bool use_mkldnn_ = false; + bool use_zero_copy_run_ = false; std::vector label_list_; diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 27c98e5b84367de09f95c901d168c2d318902c43..1dd33b301e8b7da1df2a6325cedb10b8156c43d2 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -48,14 +48,15 @@ int main(int argc, char **argv) { cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); - DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, - config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.max_side_len, config.det_db_thresh, - config.det_db_box_thresh, config.det_db_unclip_ratio, - config.visualize); + DBDetector det( + config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, + config.cpu_math_library_num_threads, config.use_mkldnn, + config.use_zero_copy_run, config.max_side_len, config.det_db_thresh, + config.det_db_box_thresh, config.det_db_unclip_ratio, config.visualize); CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.char_list_file); + config.use_mkldnn, config.use_zero_copy_run, + config.char_list_file); auto start = std::chrono::system_clock::now(); std::vector>> boxes; diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index c87b653ceab011ef0593e7fb87358325deaf882b..56fbace8cc6fa27f8172bed248573f15d0c98dac 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -31,7 +31,8 @@ void DBDetector::LoadModel(const std::string &model_dir) { } // false for zero copy tensor - config.SwitchUseFeedFetchOps(false); + // true for commom tensor + config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_); // true for multiple input config.SwitchSpecifyInputNames(true); @@ -59,12 +60,22 @@ void DBDetector::Run(cv::Mat &img, std::vector input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); this->permute_op_.Run(&resize_img, input.data()); - auto input_names = this->predictor_->GetInputNames(); - auto input_t = this->predictor_->GetInputTensor(input_names[0]); - input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); - input_t->copy_from_cpu(input.data()); - - this->predictor_->ZeroCopyRun(); + // Inference. + if (this->use_zero_copy_run_) { + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputTensor(input_names[0]); + input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); + input_t->copy_from_cpu(input.data()); + this->predictor_->ZeroCopyRun(); + } else { + paddle::PaddleTensor input_t; + input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; + input_t.data = + paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); + input_t.dtype = PaddleDType::FLOAT32; + std::vector outputs; + this->predictor_->Run({input_t}, &outputs, 1); + } std::vector out_data; auto output_names = this->predictor_->GetOutputNames(); diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index bbd7b9b2269ca776c2433d502893986ebda809c3..a3486db46f6eb6ad0df49619744924e6ef70dd01 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -39,18 +39,29 @@ void CRNNRecognizer::Run(std::vector>> boxes, this->permute_op_.Run(&resize_img, input.data()); - auto input_names = this->predictor_->GetInputNames(); - auto input_t = this->predictor_->GetInputTensor(input_names[0]); - input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); - input_t->copy_from_cpu(input.data()); - - this->predictor_->ZeroCopyRun(); + // Inference. + if (this->use_zero_copy_run_) { + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputTensor(input_names[0]); + input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); + input_t->copy_from_cpu(input.data()); + this->predictor_->ZeroCopyRun(); + } else { + paddle::PaddleTensor input_t; + input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; + input_t.data = + paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); + input_t.dtype = PaddleDType::FLOAT32; + std::vector outputs; + this->predictor_->Run({input_t}, &outputs, 1); + } std::vector rec_idx; auto output_names = this->predictor_->GetOutputNames(); auto output_t = this->predictor_->GetOutputTensor(output_names[0]); auto rec_idx_lod = output_t->lod(); auto shape_out = output_t->shape(); + int out_num = std::accumulate(shape_out.begin(), shape_out.end(), 1, std::multiplies()); @@ -120,7 +131,8 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { } // false for zero copy tensor - config.SwitchUseFeedFetchOps(false); + // true for commom tensor + config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_); // true for multiple input config.SwitchSpecifyInputNames(true); diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt index a049fc7d9dfaac88e69581b7c0aad8af8a9efaab..40beea3a2e6f0260a42202d6411ffb10907bf871 100644 --- a/deploy/cpp_infer/tools/config.txt +++ b/deploy/cpp_infer/tools/config.txt @@ -4,6 +4,7 @@ gpu_id 0 gpu_mem 4000 cpu_math_library_num_threads 10 use_mkldnn 0 +use_zero_copy_run 1 # det config max_side_len 960 diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index fead57f3d12395c6b4a2417fe8a23b1e00a4579b..701b50ed36fc69a6285550e6f53f6f3a09a1a63d 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -5,6 +5,8 @@ 请先参考[快速安装](./installation.md)配置PaddleOCR运行环境。 +*注意:也可以通过 whl 包安装使用PaddleOCR,具体参考[Paddleocr Package使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md)。* + ## 2.inference模型下载 |模型名称|模型简介|检测模型地址|识别模型地址|支持空格的识别模型地址| diff --git a/doc/doc_ch/update.md b/doc/doc_ch/update.md index 1cd7788511c29df8934efe2c1462aaca68c9b92b..23a47df580da065af0ab62aca2c50e507f564f05 100644 --- a/doc/doc_ch/update.md +++ b/doc/doc_ch/update.md @@ -1,6 +1,8 @@ # 更新 +- 2020.8.24 支持通过whl包安装使用PaddleOCR,具体参考[Paddleocr Package使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md) +- 2020.8.21 更新8月18日B站直播课回放和PPT,课节2,易学易用的OCR工具大礼包,[获取地址](https://aistudio.baidu.com/aistudio/education/group/info/1519) - 2020.8.16 开源文本检测算法[SAST](https://arxiv.org/abs/1908.05498)和文本识别算法[SRN](https://arxiv.org/abs/2003.12294) -- 2020.7.23 发布7月21日B站直播课回放和PPT,PaddleOCR开源大礼包全面解读,[获取地址](https://aistudio.baidu.com/aistudio/course/introduce/1519) +- 2020.7.23 发布7月21日B站直播课回放和PPT,课节1,PaddleOCR开源大礼包全面解读,[获取地址](https://aistudio.baidu.com/aistudio/course/introduce/1519) - 2020.7.15 添加基于EasyEdge和Paddle-Lite的移动端DEMO,支持iOS和Android系统 - 2020.7.15 完善预测部署,添加基于C++预测引擎推理、服务化部署和端侧部署方案,以及超轻量级中文OCR模型预测耗时Benchmark - 2020.7.15 整理OCR相关数据集、常用数据标注以及合成工具 diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md new file mode 100644 index 0000000000000000000000000000000000000000..280cc2f62ec40ec2228128c9ddd95088904f647b --- /dev/null +++ b/doc/doc_ch/whl.md @@ -0,0 +1,194 @@ +# paddleocr package使用说明 + +## 快速上手 + +### 安装whl包 + +pip安装 +```bash +pip install paddleocr +``` + +本地构建并安装 +```bash +python setup.py bdist_wheel +pip install dist/paddleocr-0.0.3-py3-none-any.whl +``` +### 1. 代码使用 + +* 检测+识别全流程 +```python +from paddleocr import PaddleOCR, draw_ocr +ocr = PaddleOCR() # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs/11.jpg' +result = ocr.ocr(img_path) +for line in result: + print(line) + +# 显示结果 +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` +结果是一个list,每个item包含了文本框,文字和识别置信度 +```bash +[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] +[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] +[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +...... +``` +结果可视化 + +
+ +
+ +* 单独执行检测 +```python +from paddleocr import PaddleOCR, draw_ocr +ocr = PaddleOCR() # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs/11.jpg' +result = ocr.ocr(img_path,rec=False) +for line in result: + print(line) + +# 显示结果 +from PIL import Image + +image = Image.open(img_path).convert('RGB') +im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` +结果是一个list,每个item只包含文本框 +```bash +[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] +[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] +[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] +...... +``` +结果可视化 + + +
+ +
+ +* 单独执行识别 +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR() # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' +result = ocr.ocr(img_path,det=False) +for line in result: + print(line) +``` +结果是一个list,每个item只包含识别结果和识别置信度 +```bash +['韩国小馆', 0.9907421] +``` + +### 通过命令行使用 + +查看帮助信息 +```bash +paddleocr -h +``` + +* 检测+识别全流程 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg +``` +结果是一个list,每个item包含了文本框,文字和识别置信度 +```bash +[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] +[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] +[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +...... +``` + +* 单独执行检测 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false +``` +结果是一个list,每个item只包含文本框 +```bash +[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] +[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] +[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] +...... +``` + +* 单独执行识别 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false +``` + +结果是一个list,每个item只包含识别结果和识别置信度 +```bash +['韩国小馆', 0.9907421] +``` + +## 自定义模型 +当内置模型无法满足需求时,需要使用到自己训练的模型。 +首先,参照[inference.md](./inference.md) 第一节转换将检测和识别模型转换为inference模型,然后按照如下方式使用 + +### 代码使用 +```python +from paddleocr import PaddleOCR, draw_ocr +# 检测模型和识别模型路径下必须含有model和params文件 +ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}') +img_path = 'PaddleOCR/doc/imgs/11.jpg' +result = ocr.ocr(img_path) +for line in result: + print(line) + +# 显示结果 +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +### 通过命令行使用 + +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} +``` + +## 参数说明 + +| 字段 | 说明 | 默认值 | +|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------| +| use_gpu | 是否使用GPU | TRUE | +| gpu_mem | 初始化占用的GPU内存大小 | 8000M | +| image_dir | 通过命令行调用时执行预测的图片或文件夹路径 | | +| det_algorithm | 使用的检测算法类型 | DB | +| det_model_dir | 检测模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/det`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None | +| det_max_side_len | 检测算法前向时图片长边的最大尺寸,当长边超出这个值时会将长边resize到这个大小,短边等比例缩放 | 960 | +| det_db_thresh | DB模型输出预测图的二值化阈值 | 0.3 | +| det_db_box_thresh | DB模型输出框的阈值,低于此值的预测框会被丢弃 | 0.5 | +| det_db_unclip_ratio | DB模型输出框扩大的比例 | 2 | +| det_east_score_thresh | EAST模型输出预测图的二值化阈值 | 0.8 | +| det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 | +| det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 | +| rec_algorithm | 使用的识别算法类型 | CRNN | +| rec_model_dir | 识别模型所在文件夹。传承那方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/rec`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None | +| rec_image_shape | 识别算法的输入图片尺寸 | "3,32,320" | +| rec_char_type | 识别算法的字符类型,中文(ch)或英文(en) | ch | +| rec_batch_num | 进行识别时,同时前向的图片数 | 30 | +| max_text_length | 识别算法能识别的最大文字长度 | 25 | +| rec_char_dict_path | 识别模型字典路径,当rec_model_dir使用方式2传参时需要修改为自己的字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | +| use_space_char | 是否识别空格 | TRUE | +| enable_mkldnn | 是否启用mkldnn | FALSE | +| det | 前向时使用启动检测 | TRUE | +| rec | 前向时是否启动识别 | TRUE | diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index bf22f22fee75a028e5f5effd6f7e36b08c194222..d1fa1683fcfea14be477c910fb2a8dc7709c5d36 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -5,6 +5,7 @@ Please refer to [quick installation](./installation_en.md) to configure the PaddleOCR operating environment. +*Note: Support the use of PaddleOCR through whl package installation,pelease refer [PaddleOCR Package](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md)。* ## 2.inference models diff --git a/doc/doc_en/update_en.md b/doc/doc_en/update_en.md index dc839d8955afcfa2d1efbee5e02d35f384d6c627..ca050370989ba3cded8c7211b7ab297ebe239c5f 100644 --- a/doc/doc_en/update_en.md +++ b/doc/doc_en/update_en.md @@ -1,4 +1,5 @@ # RECENT UPDATES +- 2020.8.24 Support the use of PaddleOCR through whl package installation,pelease refer [PaddleOCR Package](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md) - 2020.8.16 Release text detection algorithm [SAST](https://arxiv.org/abs/1908.05498) and text recognition algorithm [SRN](https://arxiv.org/abs/2003.12294) - 2020.7.23, Release the playback and PPT of live class on BiliBili station, PaddleOCR Introduction, [address](https://aistudio.baidu.com/aistudio/course/introduce/1519) - 2020.7.15, Add mobile App demo , support both iOS and Android ( based on easyedge and Paddle Lite) diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md new file mode 100644 index 0000000000000000000000000000000000000000..73ab78c111fd4c59a7866ba061877cc91100fb93 --- /dev/null +++ b/doc/doc_en/whl_en.md @@ -0,0 +1,199 @@ +# paddleocr package + +## Get started quickly +### install package +install by pypi +```bash +pip install paddleocr +``` + +build own whl package and install +```bash +python setup.py bdist_wheel +pip install dist/paddleocr-0.0.3-py3-none-any.whl +``` +### 1. Use by code + +* detection and recognition +```python +from paddleocr import PaddleOCR,draw_ocr +ocr = PaddleOCR() # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' +result = ocr.ocr(img_path) +for line in result: + print(line) + +# draw result +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +Output will be a list, each item contains bounding box, text and recognition confidence +```bash +[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] +[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] +[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +...... +``` + +Visualization of results + +
+ +
+ +* only detection +```python +from paddleocr import PaddleOCR,draw_ocr +ocr = PaddleOCR() # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' +result = ocr.ocr(img_path,rec=False) +for line in result: + print(line) + +# draw result +from PIL import Image + +image = Image.open(img_path).convert('RGB') +im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +Output will be a list, each item only contains bounding box +```bash +[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]] +[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]] +[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]] +...... +``` + +Visualization of results + +
+ +
+ +* only recognition +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR() # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png' +result = ocr.ocr(img_path,det=False) +for line in result: + print(line) +``` + +Output will be a list, each item contains text and recognition confidence +```bash +['PAIN', 0.990372] +``` + +### Use by command line + +show help information +```bash +paddleocr -h +``` + +* detection and recognition +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg +``` + +Output will be a list, each item contains bounding box, text and recognition confidence +```bash +[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] +[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] +[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +...... +``` + +* only detection +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false +``` + +Output will be a list, each item only contains bounding box +```bash +[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]] +[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]] +[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]] +...... +``` + +* only recognition +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false +``` + +Output will be a list, each item contains text and recognition confidence +```bash +['PAIN', 0.990372] +``` + +## Use custom model +When the built-in model cannot meet the needs, you need to use your own trained model. +First, refer to the first section of [inference_en.md](./inference_en.md) to convert your det and rec model to inference model, and then use it as follows + +### 1. Use by code + +```python +from paddleocr import PaddleOCR,draw_ocr +# The path of detection and recognition model must contain model and params files +ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}å') +img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' +result = ocr.ocr(img_path) +for line in result: + print(line) + +# draw result +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +### Use by command line + +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} +``` + +## Parameter Description + +| Parameter | Description | Default value | +|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------| +| use_gpu | use GPU or not | TRUE | +| gpu_mem | GPU memory size used for initialization | 8000M | +| image_dir | The images path or folder path for predicting when used by the command line | | +| det_algorithm | Type of detection algorithm selected | DB | +| det_model_dir | the text detection inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/det`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None | +| det_max_side_len | The maximum size of the long side of the image. When the long side exceeds this value, the long side will be resized to this size, and the short side will be scaled proportionally | 960 | +| det_db_thresh | Binarization threshold value of DB output map | 0.3 | +| det_db_box_thresh | The threshold value of the DB output box. Boxes score lower than this value will be discarded | 0.5 | +| det_db_unclip_ratio | The expanded ratio of DB output box | 2 | +| det_east_score_thresh | Binarization threshold value of EAST output map | 0.8 | +| det_east_cover_thresh | The threshold value of the EAST output box. Boxes score lower than this value will be discarded | 0.1 | +| det_east_nms_thresh | The NMS threshold value of EAST model output box | 0.2 | +| rec_algorithm | Type of recognition algorithm selected | CRNN | +| rec_model_dir | the text recognition inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/rec`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None | +| rec_image_shape | image shape of recognition algorithm | "3,32,320" | +| rec_char_type | Character type of recognition algorithm, Chinese (ch) or English (en) | ch | +| rec_batch_num | When performing recognition, the batchsize of forward images | 30 | +| max_text_length | The maximum text length that the recognition algorithm can recognize | 25 | +| rec_char_dict_path | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2 | ./ppocr/utils/ppocr_keys_v1.txt | +| use_space_char | Whether to recognize spaces | TRUE | +| enable_mkldnn | Whether to enable mkldnn | FALSE | +| det | Enable detction when `ppocr.ocr` func exec | TRUE | +| rec | Enable detction when `ppocr.ocr` func exec | TRUE | diff --git a/doc/imgs_results/whl/11_det.jpg b/doc/imgs_results/whl/11_det.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fe0cd23cc24457f5d7084fff0c63c239d09c9969 Binary files /dev/null and b/doc/imgs_results/whl/11_det.jpg differ diff --git a/doc/imgs_results/whl/11_det_rec.jpg b/doc/imgs_results/whl/11_det_rec.jpg new file mode 100644 index 0000000000000000000000000000000000000000..31c566478fd874d10a61dcd54635453e34c20e4c Binary files /dev/null and b/doc/imgs_results/whl/11_det_rec.jpg differ diff --git a/doc/imgs_results/whl/12_det.jpg b/doc/imgs_results/whl/12_det.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1d5ccf2a6b5d3fa9516560e0cb2646ad6b917da6 Binary files /dev/null and b/doc/imgs_results/whl/12_det.jpg differ diff --git a/doc/imgs_results/whl/12_det_rec.jpg b/doc/imgs_results/whl/12_det_rec.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9db8b57e1279362db2c9f3d6a3ba36b77bf13775 Binary files /dev/null and b/doc/imgs_results/whl/12_det_rec.jpg differ diff --git a/paddleocr.py b/paddleocr.py new file mode 100644 index 0000000000000000000000000000000000000000..65bca7ae243e15e4788b5b637be65d57cf9504e5 --- /dev/null +++ b/paddleocr.py @@ -0,0 +1,212 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(__file__) +sys.path.append(os.path.join(__dir__, '')) + +import cv2 +import numpy as np +from pathlib import Path +import tarfile +import requests +from tqdm import tqdm + +from tools.infer import predict_system +from ppocr.utils.utility import initial_logger + +logger = initial_logger() +from ppocr.utils.utility import check_and_read_gif, get_image_file_list + +__all__ = ['PaddleOCR'] + +model_params = { + 'det': 'https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar', + 'rec': + 'https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar', +} + +SUPPORT_DET_MODEL = ['DB'] +SUPPORT_REC_MODEL = ['CRNN'] +BASE_DIR = os.path.expanduser("~/.paddleocr/") + + +def download_with_progressbar(url, save_path): + response = requests.get(url, stream=True) + total_size_in_bytes = int(response.headers.get('content-length', 0)) + block_size = 1024 # 1 Kibibyte + progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) + with open(save_path, 'wb') as file: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + progress_bar.close() + if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: + logger.error("ERROR, something went wrong") + sys.exit(0) + + +def maybe_download(model_storage_directory, url): + # using custom model + if not os.path.exists(os.path.join( + model_storage_directory, 'model')) or not os.path.exists( + os.path.join(model_storage_directory, 'params')): + tmp_path = os.path.join(model_storage_directory, url.split('/')[-1]) + print('download {} to {}'.format(url, tmp_path)) + os.makedirs(model_storage_directory, exist_ok=True) + download_with_progressbar(url, tmp_path) + with tarfile.open(tmp_path, 'r') as tarObj: + for member in tarObj.getmembers(): + if "model" in member.name: + filename = 'model' + elif "params" in member.name: + filename = 'params' + else: + continue + file = tarObj.extractfile(member) + with open( + os.path.join(model_storage_directory, filename), + 'wb') as f: + f.write(file.read()) + os.remove(tmp_path) + + +def parse_args(): + import argparse + + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + # params for prediction engine + parser.add_argument("--use_gpu", type=str2bool, default=True) + parser.add_argument("--ir_optim", type=str2bool, default=True) + parser.add_argument("--use_tensorrt", type=str2bool, default=False) + parser.add_argument("--gpu_mem", type=int, default=8000) + + # params for text detector + parser.add_argument("--image_dir", type=str) + parser.add_argument("--det_algorithm", type=str, default='DB') + parser.add_argument("--det_model_dir", type=str, default=None) + parser.add_argument("--det_max_side_len", type=float, default=960) + + # DB parmas + parser.add_argument("--det_db_thresh", type=float, default=0.3) + parser.add_argument("--det_db_box_thresh", type=float, default=0.5) + parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0) + + # EAST parmas + parser.add_argument("--det_east_score_thresh", type=float, default=0.8) + parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) + parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) + + # params for text recognizer + parser.add_argument("--rec_algorithm", type=str, default='CRNN') + parser.add_argument("--rec_model_dir", type=str, default=None) + parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") + parser.add_argument("--rec_char_type", type=str, default='ch') + parser.add_argument("--rec_batch_num", type=int, default=30) + parser.add_argument("--max_text_length", type=int, default=25) + parser.add_argument( + "--rec_char_dict_path", + type=str, + default="./ppocr/utils/ppocr_keys_v1.txt") + parser.add_argument("--use_space_char", type=bool, default=True) + parser.add_argument("--enable_mkldnn", type=bool, default=False) + + parser.add_argument("--det", type=str2bool, default=True) + parser.add_argument("--rec", type=str2bool, default=True) + return parser.parse_args() + + +class PaddleOCR(predict_system.TextSystem): + def __init__(self, **kwargs): + """ + paddleocr package + args: + **kwargs: other params show in paddleocr --help + """ + postprocess_params = parse_args() + postprocess_params.__dict__.update(**kwargs) + + # init model dir + if postprocess_params.det_model_dir is None: + postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det') + if postprocess_params.rec_model_dir is None: + postprocess_params.rec_model_dir = os.path.join(BASE_DIR, 'rec') + print(postprocess_params) + # download model + maybe_download(postprocess_params.det_model_dir, model_params['det']) + maybe_download(postprocess_params.rec_model_dir, model_params['rec']) + + if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL: + logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL)) + sys.exit(0) + if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL: + logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL)) + sys.exit(0) + + postprocess_params.rec_char_dict_path = Path( + __file__).parent / postprocess_params.rec_char_dict_path + + # init det_model and rec_model + super().__init__(postprocess_params) + + def ocr(self, img, det=True, rec=True): + """ + ocr with paddleocr + args: + img: img for ocr, support ndarray, img_path and list or ndarray + det: use text detection or not, if false, only rec will be exec. default is True + rec: use text recognition or not, if false, only det will be exec. default is True + """ + assert isinstance(img, (np.ndarray, list, str)) + if isinstance(img, str): + image_file = img + img, flag = check_and_read_gif(image_file) + if not flag: + img = cv2.imread(image_file) + if img is None: + logger.error("error in loading image:{}".format(image_file)) + return None + if det and rec: + dt_boxes, rec_res = self.__call__(img) + return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] + elif det and not rec: + dt_boxes, elapse = self.text_detector(img) + if dt_boxes is None: + return None + return [box.tolist() for box in dt_boxes] + else: + if not isinstance(img, list): + img = [img] + rec_res, elapse = self.text_recognizer(img) + return rec_res + + +def main(): + # for com + args = parse_args() + image_file_list = get_image_file_list(args.image_dir) + if len(image_file_list) == 0: + logger.error('no images find in {}'.format(args.image_dir)) + return + ocr_engine = PaddleOCR() + for img_path in image_file_list: + print(img_path) + result = ocr_engine.ocr(img_path, det=args.det, rec=args.rec) + for line in result: + print(line) diff --git a/requirments.txt b/requirments.txt index 94e8478ffad88a6e5cd69424c6aa485400cfae06..ec538138beaed70ec8f5285ea0c4114f22e3b0ef 100644 --- a/requirments.txt +++ b/requirments.txt @@ -1,4 +1,6 @@ shapely imgaug pyclipper -lmdb \ No newline at end of file +lmdb +tqdm +numpy \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..7141f170f3afa2be5217faff66a2aeb12dbefcbe --- /dev/null +++ b/setup.py @@ -0,0 +1,56 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup +from io import open + +with open('requirments.txt', encoding="utf-8-sig") as f: + requirements = f.readlines() + requirements.append('tqdm') + + +def readme(): + with open('doc/doc_en/whl_en.md', encoding="utf-8-sig") as f: + README = f.read() + return README + + +setup( + name='paddleocr', + packages=['paddleocr'], + package_dir={'paddleocr': ''}, + include_package_data=True, + entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, + version='0.0.3', + install_requires=requirements, + license='Apache License 2.0', + description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', + long_description=readme(), + long_description_content_type='text/markdown', + url='https://github.com/PaddlePaddle/PaddleOCR', + download_url='https://github.com/PaddlePaddle/PaddleOCR.git', + keywords=[ + 'ocr textdetection textrecognition paddleocr crnn east star-net rosetta ocrlite db chineseocr chinesetextdetection chinesetextrecognition' + ], + classifiers=[ + 'Intended Audience :: Developers', 'Operating System :: OS Independent', + 'Natural Language :: Chinese (Simplified)', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Utilities' + ], ) diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index 82877c0ef71b56e6afb4ea43725981640c0e8c64..625f87abc39fc0e9d7683f72dafec1d53324873a 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -17,28 +17,32 @@ __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) +import cv2 +import copy +import numpy as np +import math +import time +import sys + +import paddle.fluid as fluid + import tools.infer.utility as utility from ppocr.utils.utility import initial_logger logger = initial_logger() from ppocr.utils.utility import get_image_file_list, check_and_read_gif -import cv2 from ppocr.data.det.sast_process import SASTProcessTest from ppocr.data.det.east_process import EASTProcessTest from ppocr.data.det.db_process import DBProcessTest from ppocr.postprocess.db_postprocess import DBPostProcess from ppocr.postprocess.east_postprocess import EASTPostPocess from ppocr.postprocess.sast_postprocess import SASTPostProcess -import copy -import numpy as np -import math -import time -import sys class TextDetector(object): def __init__(self, args): max_side_len = args.det_max_side_len self.det_algorithm = args.det_algorithm + self.use_zero_copy_run = args.use_zero_copy_run preprocess_params = {'max_side_len': max_side_len} postprocess_params = {} if self.det_algorithm == "DB": @@ -127,7 +131,7 @@ class TextDetector(object): dt_boxes_new.append(box) dt_boxes = np.array(dt_boxes_new) return dt_boxes - + def __call__(self, img): ori_im = img.copy() im, ratio_list = self.preprocess_op(img) @@ -135,8 +139,12 @@ class TextDetector(object): return None, 0 im = im.copy() starttime = time.time() - self.input_tensor.copy_from_cpu(im) - self.predictor.zero_copy_run() + if self.use_zero_copy_run: + self.input_tensor.copy_from_cpu(im) + self.predictor.zero_copy_run() + else: + im = fluid.core.PaddleTensor(im) + self.predictor.run([im]) outputs = [] for output_tensor in self.output_tensors: output = output_tensor.copy_to_cpu() @@ -152,7 +160,7 @@ class TextDetector(object): outs_dict['f_tvo'] = outputs[3] else: outs_dict['maps'] = outputs[0] - + dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list]) dt_boxes = dt_boxes_list[0] if self.det_algorithm == "SAST" and self.det_sast_polygon: diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index c81b4eb2560ee5ad66a85c96efe4de935a2beee1..6a379853a4a7d62cbffcbebbf09e2fb3e2207b27 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -17,15 +17,18 @@ __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) -import tools.infer.utility as utility -from ppocr.utils.utility import initial_logger -logger = initial_logger() -from ppocr.utils.utility import get_image_file_list, check_and_read_gif import cv2 import copy import numpy as np import math import time + +import paddle.fluid as fluid + +import tools.infer.utility as utility +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.utility import get_image_file_list, check_and_read_gif from ppocr.utils.character import CharacterOps @@ -37,6 +40,7 @@ class TextRecognizer(object): self.character_type = args.rec_char_type self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm + self.use_zero_copy_run = args.use_zero_copy_run char_ops_params = { "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, @@ -102,8 +106,12 @@ class TextRecognizer(object): norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() starttime = time.time() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.zero_copy_run() + if self.use_zero_copy_run: + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.zero_copy_run() + else: + norm_img_batch = fluid.core.PaddleTensor(norm_img_batch) + self.predictor.run([norm_img_batch]) if self.loss_type == "ctc": rec_idx_batch = self.output_tensors[0].copy_to_cpu() diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index f8a62679bc17d10380983319a3f239d4a7339646..647a76b20496335cd059242890f86fffe1e3ac1a 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -157,7 +157,6 @@ def main(args): boxes, txts, scores, - draw_txt=True, drop_score=drop_score) draw_img_save = "./inference_results/" if not os.path.exists(draw_img_save): diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 392bc4dfa5831ab64c0ed920ea3e9bfdea04925d..9d7ce13d37567ac80e194a6500a0f629ede4b1d4 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -71,6 +71,7 @@ def parse_args(): default="./ppocr/utils/ppocr_keys_v1.txt") parser.add_argument("--use_space_char", type=bool, default=True) parser.add_argument("--enable_mkldnn", type=bool, default=False) + parser.add_argument("--use_zero_copy_run", type=bool, default=False) return parser.parse_args() @@ -105,9 +106,12 @@ def create_predictor(args, mode): #config.enable_memory_optim() config.disable_glog_info() - # use zero copy - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) + if args.use_zero_copy_run: + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + else: + config.switch_use_feed_fetch_ops(True) + predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() input_tensor = predictor.get_input_tensor(input_names[0]) @@ -139,7 +143,12 @@ def resize_img(img, input_size=600): return im -def draw_ocr(image, boxes, txts, scores, draw_txt=True, drop_score=0.5): +def draw_ocr(image, + boxes, + txts=None, + scores=None, + drop_score=0.5, + font_path="./doc/simfang.ttf"): """ Visualize the results of OCR detection and recognition args: @@ -147,23 +156,29 @@ def draw_ocr(image, boxes, txts, scores, draw_txt=True, drop_score=0.5): boxes(list): boxes with shape(N, 4, 2) txts(list): the texts scores(list): txxs corresponding scores - draw_txt(bool): whether draw text or not drop_score(float): only scores greater than drop_threshold will be visualized + font_path: the path of font which is used to draw text return(array): the visualized img """ if scores is None: scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score or math.isnan(score): + box_num = len(boxes) + for i in range(box_num): + if scores is not None and (scores[i] < drop_score or + math.isnan(scores[i])): continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) + box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64) image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - - if draw_txt: + if txts is not None: img = np.array(resize_img(image, input_size=600)) txt_img = text_visual( - txts, scores, img_h=img.shape[0], img_w=600, threshold=drop_score) + txts, + scores, + img_h=img.shape[0], + img_w=600, + threshold=drop_score, + font_path=font_path) img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) return img return image @@ -241,7 +256,12 @@ def str_count(s): return s_len - math.ceil(en_dg_count / 2) -def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.): +def text_visual(texts, + scores, + img_h=400, + img_w=600, + threshold=0., + font_path="./doc/simfang.ttf"): """ create new blank img and draw txt on it args: @@ -249,6 +269,7 @@ def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.): scores(list|None): corresponding score of each txt img_h(int): the height of blank img img_w(int): the width of blank img + font_path: the path of font which is used to draw text return(array): """ @@ -267,7 +288,7 @@ def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.): font_size = 20 txt_color = (0, 0, 0) - font = ImageFont.truetype("./doc/simfang.ttf", font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") gap = font_size + 5 txt_img_list = [] @@ -348,6 +369,6 @@ if __name__ == '__main__': txts.append(dic['transcription']) scores.append(round(dic['scores'], 3)) - new_img = draw_ocr(image, boxes, txts, scores, draw_txt=True) + new_img = draw_ocr(image, boxes, txts, scores) cv2.imwrite(img_name, new_img)