diff --git a/.gitignore b/.gitignore index 1a2dd675e961f1804fa58e2e2e49118536b84ce9..9eecb4f1056fc040d4c9579d593bee2cc4013837 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,7 @@ output/ *.log .clang-format .clang_format.hook + +build/ +dist/ +paddleocr.egg-info/ \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..388882df0c3701780dd6371bc91887356a7bca40 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include LICENSE.txt +include README.md + +recursive-include ppocr/utils *.txt utility.py character.py check.py +recursive-include ppocr/data/det *.py +recursive-include ppocr/postprocess *.py +recursive-include ppocr/postprocess/lanms *.* +recursive-include tools/infer *.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7d94f66be072067172d56da13d8bb27d9aeac431 --- /dev/null +++ b/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['PaddleOCR', 'draw_ocr'] +from .paddleocr import PaddleOCR +from .tools.infer.utility import draw_ocr diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md new file mode 100644 index 0000000000000000000000000000000000000000..9424346253f8615377c7a125f95209c3e4eb503d --- /dev/null +++ b/doc/doc_ch/whl.md @@ -0,0 +1,251 @@ +# paddleocr package使用说明 + +## 快速上手 + +### 安装whl包 + +pip安装 +```bash +pip install paddleocr +``` + +本地构建并安装 +```bash +python setup.py bdist_wheel +pip install dist/paddleocr-0.0.1-py3-none-any.whl +``` +### 1. 代码使用 + +* 检测+识别全流程 +```python +from paddleocr import PaddleOCR, draw_ocr +ocr = PaddleOCR(model_storage_directory='./model') # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs/11.jpg' +result = ocr.ocr(img_path) +for line in result: + print(line) + +# 显示结果 +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` +结果是一个list,每个item包含了文本框,文字和识别置信度 +```bash +[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] +[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] +[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +[[[22.0, 140.0], [284.0, 140.0], [284.0, 167.0], [22.0, 167.0]], ['每瓶22元,1000瓶起订)', 0.97444016]] +[[[22.0, 174.0], [85.0, 174.0], [85.0, 198.0], [22.0, 198.0]], ['【品牌】', 0.8187138]] +[[[89.0, 176.0], [301.0, 176.0], [301.0, 196.0], [89.0, 196.0]], [':代加工方式/OEMODM', 0.9421848]] +[[[23.0, 205.0], [85.0, 205.0], [85.0, 229.0], [23.0, 229.0]], ['【品名】', 0.76008326]] +[[[88.0, 204.0], [235.0, 206.0], [235.0, 229.0], [88.0, 227.0]], [':纯臻营养护发素', 0.9633639]] +[[[23.0, 236.0], [121.0, 236.0], [121.0, 261.0], [23.0, 261.0]], ['【产品编号】', 0.84101385]] +[[[110.0, 239.0], [239.0, 239.0], [239.0, 256.0], [110.0, 256.0]], ['1:YM-X-3011', 0.8621878]] +[[[414.0, 233.0], [430.0, 233.0], [430.0, 304.0], [414.0, 304.0]], ['ODM OEM', 0.9084018]] +[[[23.0, 268.0], [183.0, 268.0], [183.0, 292.0], [23.0, 292.0]], ['【净含量】:220ml', 0.9278281]] +[[[24.0, 301.0], [118.0, 301.0], [118.0, 321.0], [24.0, 321.0]], ['【适用人群】', 0.90901047]] +[[[127.0, 300.0], [254.0, 300.0], [254.0, 323.0], [127.0, 323.0]], [':适合所有肤质', 0.95465785]] +[[[24.0, 332.0], [117.0, 332.0], [117.0, 353.0], [24.0, 353.0]], ['【主要成分】', 0.88936955]] +[[[139.0, 332.0], [236.0, 332.0], [236.0, 352.0], [139.0, 352.0]], ['鲸蜡硬脂醇', 0.9447544]] +[[[248.0, 332.0], [345.0, 332.0], [345.0, 352.0], [248.0, 352.0]], ['燕麦B-葡聚', 0.89748293]] +[[[54.0, 363.0], [232.0, 363.0], [232.0, 383.0], [54.0, 383.0]], [' 椰油酰胺丙基甜菜碱', 0.902023]] +[[[25.0, 364.0], [64.0, 364.0], [64.0, 383.0], [25.0, 383.0]], ['糖、', 0.985203]] +[[[244.0, 363.0], [281.0, 363.0], [281.0, 382.0], [244.0, 382.0]], ['泛服', 0.44537082]] +[[[367.0, 367.0], [475.0, 367.0], [475.0, 388.0], [367.0, 388.0]], ['(成品包材)', 0.9834532]] +[[[24.0, 395.0], [120.0, 395.0], [120.0, 416.0], [24.0, 416.0]], ['【主要功能】', 0.88684446]] +[[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]], [':可紧致头发磷层', 0.9342501]] +[[[265.0, 395.0], [361.0, 395.0], [361.0, 415.0], [265.0, 415.0]], ['琴,从而达到', 0.8253762]] +[[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]], ['即时持久改善头发光泽的效果,给干燥的头', 0.97785276]] +[[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]], ['发足够的滋养', 0.9577897]] +``` +结果可视化 + +
+ +
+ +* 单独执行检测 +```python +from paddleocr import PaddleOCR, draw_ocr +ocr = PaddleOCR(model_storage_directory='./model') # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs/11.jpg' +result = ocr.ocr(img_path,rec=False) +for line in result: + print(line) + +# 显示结果 +from PIL import Image + +image = Image.open(img_path).convert('RGB') +im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` +结果是一个list,每个item只包含文本框 +```bash +[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] +[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] +[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] +[[265.0, 395.0], [361.0, 395.0], [361.0, 415.0], [265.0, 415.0]] +[[24.0, 395.0], [120.0, 395.0], [120.0, 416.0], [24.0, 416.0]] +[[367.0, 367.0], [475.0, 367.0], [475.0, 388.0], [367.0, 388.0]] +[[54.0, 363.0], [232.0, 363.0], [232.0, 383.0], [54.0, 383.0]] +[[25.0, 364.0], [64.0, 364.0], [64.0, 383.0], [25.0, 383.0]] +[[244.0, 363.0], [281.0, 363.0], [281.0, 382.0], [244.0, 382.0]] +[[248.0, 332.0], [345.0, 332.0], [345.0, 352.0], [248.0, 352.0]] +[[139.0, 332.0], [236.0, 332.0], [236.0, 352.0], [139.0, 352.0]] +[[24.0, 332.0], [117.0, 332.0], [117.0, 353.0], [24.0, 353.0]] +[[127.0, 300.0], [254.0, 300.0], [254.0, 323.0], [127.0, 323.0]] +[[24.0, 301.0], [118.0, 301.0], [118.0, 321.0], [24.0, 321.0]] +[[23.0, 268.0], [183.0, 268.0], [183.0, 292.0], [23.0, 292.0]] +[[110.0, 239.0], [239.0, 239.0], [239.0, 256.0], [110.0, 256.0]] +[[23.0, 236.0], [121.0, 236.0], [121.0, 261.0], [23.0, 261.0]] +[[414.0, 233.0], [430.0, 233.0], [430.0, 304.0], [414.0, 304.0]] +[[88.0, 204.0], [235.0, 206.0], [235.0, 229.0], [88.0, 227.0]] +[[23.0, 205.0], [85.0, 205.0], [85.0, 229.0], [23.0, 229.0]] +[[89.0, 176.0], [301.0, 176.0], [301.0, 196.0], [89.0, 196.0]] +[[22.0, 174.0], [85.0, 174.0], [85.0, 198.0], [22.0, 198.0]] +[[22.0, 140.0], [284.0, 140.0], [284.0, 167.0], [22.0, 167.0]] +[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]] +[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]] +[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]] +``` +结果可视化 + + +
+ +
+ +* 单独执行识别 +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR(model_storage_directory='./model') # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' +result = ocr.ocr(img_path,det=False) +for line in result: + print(line) +``` +结果是一个list,每个item只包含识别结果和识别置信度 +```bash +['韩国小馆', 0.9907421] +``` + +### 通过命令行使用 + +查看帮助信息 +```bash +paddleocr -h +``` + +* 检测+识别全流程 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg +``` +结果是一个list,每个item包含了文本框,文字和识别置信度 +```bash +[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] +[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] +[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +[[[22.0, 140.0], [284.0, 140.0], [284.0, 167.0], [22.0, 167.0]], ['每瓶22元,1000瓶起订)', 0.97444016]] +[[[22.0, 174.0], [85.0, 174.0], [85.0, 198.0], [22.0, 198.0]], ['【品牌】', 0.8187138]] +[[[89.0, 176.0], [301.0, 176.0], [301.0, 196.0], [89.0, 196.0]], [':代加工方式/OEMODM', 0.9421848]] +[[[23.0, 205.0], [85.0, 205.0], [85.0, 229.0], [23.0, 229.0]], ['【品名】', 0.76008326]] +[[[88.0, 204.0], [235.0, 206.0], [235.0, 229.0], [88.0, 227.0]], [':纯臻营养护发素', 0.9633639]] +[[[23.0, 236.0], [121.0, 236.0], [121.0, 261.0], [23.0, 261.0]], ['【产品编号】', 0.84101385]] +[[[110.0, 239.0], [239.0, 239.0], [239.0, 256.0], [110.0, 256.0]], ['1:YM-X-3011', 0.8621878]] +[[[414.0, 233.0], [430.0, 233.0], [430.0, 304.0], [414.0, 304.0]], ['ODM OEM', 0.9084018]] +[[[23.0, 268.0], [183.0, 268.0], [183.0, 292.0], [23.0, 292.0]], ['【净含量】:220ml', 0.9278281]] +[[[24.0, 301.0], [118.0, 301.0], [118.0, 321.0], [24.0, 321.0]], ['【适用人群】', 0.90901047]] +[[[127.0, 300.0], [254.0, 300.0], [254.0, 323.0], [127.0, 323.0]], [':适合所有肤质', 0.95465785]] +[[[24.0, 332.0], [117.0, 332.0], [117.0, 353.0], [24.0, 353.0]], ['【主要成分】', 0.88936955]] +[[[139.0, 332.0], [236.0, 332.0], [236.0, 352.0], [139.0, 352.0]], ['鲸蜡硬脂醇', 0.9447544]] +[[[248.0, 332.0], [345.0, 332.0], [345.0, 352.0], [248.0, 352.0]], ['燕麦B-葡聚', 0.89748293]] +[[[54.0, 363.0], [232.0, 363.0], [232.0, 383.0], [54.0, 383.0]], [' 椰油酰胺丙基甜菜碱', 0.902023]] +[[[25.0, 364.0], [64.0, 364.0], [64.0, 383.0], [25.0, 383.0]], ['糖、', 0.985203]] +[[[244.0, 363.0], [281.0, 363.0], [281.0, 382.0], [244.0, 382.0]], ['泛服', 0.44537082]] +[[[367.0, 367.0], [475.0, 367.0], [475.0, 388.0], [367.0, 388.0]], ['(成品包材)', 0.9834532]] +[[[24.0, 395.0], [120.0, 395.0], [120.0, 416.0], [24.0, 416.0]], ['【主要功能】', 0.88684446]] +[[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]], [':可紧致头发磷层', 0.9342501]] +[[[265.0, 395.0], [361.0, 395.0], [361.0, 415.0], [265.0, 415.0]], ['琴,从而达到', 0.8253762]] +[[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]], ['即时持久改善头发光泽的效果,给干燥的头', 0.97785276]] +[[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]], ['发足够的滋养', 0.9577897]] +``` + +* 单独执行检测 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false +``` +结果是一个list,每个item只包含文本框 +```bash +[[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] +[[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] +[[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] +[[265.0, 395.0], [361.0, 395.0], [361.0, 415.0], [265.0, 415.0]] +[[24.0, 395.0], [120.0, 395.0], [120.0, 416.0], [24.0, 416.0]] +[[367.0, 367.0], [475.0, 367.0], [475.0, 388.0], [367.0, 388.0]] +[[54.0, 363.0], [232.0, 363.0], [232.0, 383.0], [54.0, 383.0]] +[[25.0, 364.0], [64.0, 364.0], [64.0, 383.0], [25.0, 383.0]] +[[244.0, 363.0], [281.0, 363.0], [281.0, 382.0], [244.0, 382.0]] +[[248.0, 332.0], [345.0, 332.0], [345.0, 352.0], [248.0, 352.0]] +[[139.0, 332.0], [236.0, 332.0], [236.0, 352.0], [139.0, 352.0]] +[[24.0, 332.0], [117.0, 332.0], [117.0, 353.0], [24.0, 353.0]] +[[127.0, 300.0], [254.0, 300.0], [254.0, 323.0], [127.0, 323.0]] +[[24.0, 301.0], [118.0, 301.0], [118.0, 321.0], [24.0, 321.0]] +[[23.0, 268.0], [183.0, 268.0], [183.0, 292.0], [23.0, 292.0]] +[[110.0, 239.0], [239.0, 239.0], [239.0, 256.0], [110.0, 256.0]] +[[23.0, 236.0], [121.0, 236.0], [121.0, 261.0], [23.0, 261.0]] +[[414.0, 233.0], [430.0, 233.0], [430.0, 304.0], [414.0, 304.0]] +[[88.0, 204.0], [235.0, 206.0], [235.0, 229.0], [88.0, 227.0]] +[[23.0, 205.0], [85.0, 205.0], [85.0, 229.0], [23.0, 229.0]] +[[89.0, 176.0], [301.0, 176.0], [301.0, 196.0], [89.0, 196.0]] +[[22.0, 174.0], [85.0, 174.0], [85.0, 198.0], [22.0, 198.0]] +[[22.0, 140.0], [284.0, 140.0], [284.0, 167.0], [22.0, 167.0]] +[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]] +[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]] +[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]] +``` + +* 单独执行识别 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false +``` + +结果是一个list,每个item只包含识别结果和识别置信度 +```bash +['韩国小馆', 0.9907421] +``` + +## 参数说明 + +| 字段 | 说明 | 默认值 | +|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------| +| use_gpu | 是否使用GPU | TRUE | +| gpu_mem | 初始化占用的GPU内存大小 | 8000M | +| image_dir | 通过命令行调用时执行预测的图片或文件夹路径 | | +| det_algorithm | 使用的检测算法类型 | DB | +| det_model_name | 有两种使用方式: 1. 检测算法名称,此名称必须在支持列表内(目前只内置了ch_det_mv3_db),传入错误参数时会显示支持的列表 2. 自己转换好的inference模型路径,此时模型路径下必须包含model和params文件。选择此方式时,需要手动指定det_algorithm的值 | ch_det_mv3_db | +| det_max_side_len | 检测算法前向时图片长边的最大尺寸,当长边超出这个值时会将长边resize到这个大小,短边等比例缩放 | 960 | +| det_db_thresh | DB模型输出预测图的二值化阈值 | 0.3 | +| det_db_box_thresh | DB模型输出框的阈值,低于此值的预测框会被丢弃 | 0.5 | +| det_db_unclip_ratio | DB模型输出框扩大的比例 | 2 | +| det_east_score_thresh | EAST模型输出预测图的二值化阈值 | 0.8 | +| det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 | +| det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 | +| rec_algorithm | 使用的识别算法类型 | CRNN | +| rec_model_name | 有两种使用方式: 1. 识别算法名称,此名称必须在支持列表内(目前支持CRNN,Rosetta,STAR,RARE等算法,但是内置的只有ch_rec_mv3_crnn_enhance),传入错误参数时会显示支持的列表 2. 自己转换好的inference模型路径,此时模型路径下必须包含model和params文件。选择此方式时,需要手动指定rec_algorithm的值 | ch_rec_mv3_crnn_enhance | +| rec_image_shape | 识别算法的输入图片尺寸 | "3,32,320" | +| rec_char_type | 识别算法的字符类型,中文(ch)或英文(en) | ch | +| rec_batch_num | 进行识别时,同时前向的图片数 | 30 | +| rec_char_dict_path | 识别模型字典路径,当rec_model_name使用方式2传参时需要修改为自己的路径 | | +| use_space_char | 是否识别空格 | TRUE | +| enable_mkldnn | 是否启用mkldnn | FALSE | +| model_storage_directory | 下载模型保存路径 | ~/.paddleocr | +| det | 前向时使用启动检测 | TRUE | +| rec | 前向时是否启动识别 | TRUE | diff --git a/doc/doc_en/whl.md b/doc/doc_en/whl.md new file mode 100644 index 0000000000000000000000000000000000000000..ad2125a423afccebb5d084d924d110dad3c1a04c --- /dev/null +++ b/doc/doc_en/whl.md @@ -0,0 +1,211 @@ +# paddleocr package + +## Get started quickly +### install package +install by pypi +```bash +pip install paddleocr +``` + +build own whl package and install +```bash +python setup.py bdist_wheel +pip install dist/paddleocr-0.0.1-py3-none-any.whl +``` +### 1. Use by code + +* detection and recognition +```python +from paddleocr import PaddleOCR,draw_ocr +ocr = PaddleOCR(model_storage_directory='./model') # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' +result = ocr.ocr(img_path) +for line in result: + print(line) + +# draw result +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +Output will be a list, each item contains bounding box, text and recognition confidence +```bash +[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] +[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] +[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +[[[395.0, 443.0], [1211.0, 443.0], [1211.0, 489.0], [395.0, 489.0]], ['production of this book;their contributions', 0.9713175]] +[[[395.0, 497.0], [1209.0, 495.0], [1209.0, 531.0], [395.0, 533.0]], ['have been indispensable to its creation.We', 0.96009934]] +[[[393.0, 545.0], [1212.0, 545.0], [1212.0, 591.0], [393.0, 591.0]], ['would also like to express our gratitude to al', 0.9371007]] +[[[393.0, 595.0], [1212.0, 593.0], [1212.0, 635.0], [393.0, 637.0]], ['the producers for their invaluable opinions', 0.96872145]] +[[[393.0, 645.0], [1209.0, 645.0], [1209.0, 685.0], [393.0, 685.0]], ['and assistance throughout this proiect.Andto', 0.94448787]] +[[[392.0, 697.0], [1212.0, 693.0], [1212.0, 735.0], [392.0, 739.0]], ['the many others whose names are not credited', 0.93633145]] +[[[397.0, 753.0], [689.0, 755.0], [689.0, 786.0], [397.0, 784.0]], ['buthavemades', 0.99324507]] +[[[813.0, 749.0], [1212.0, 747.0], [1212.0, 784.0], [813.0, 786.0]], ['inputin this book, we', 0.9166398]] +[[[675.0, 760.0], [799.0, 755.0], [799.0, 778.0], [675.0, 784.0]], ['speciti', 0.9063535]] +[[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]], ['thankyouforyoul', 0.92475533]] +[[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]], ['P', 0.14887337]] +[[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]], ['nuoussupport', 0.9898951]] +``` + +Visualization of results + +
+ +
+ +* only detection +```python +from paddleocr import PaddleOCR,draw_ocr +ocr = PaddleOCR(model_storage_directory='./model') # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' +result = ocr.ocr(img_path,rec=False) +for line in result: + print(line) + +# draw result +from PIL import Image + +image = Image.open(img_path).convert('RGB') +im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +Output will be a list, each item only contains bounding box +```bash +[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]] +[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]] +[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]] +[[675.0, 760.0], [799.0, 755.0], [799.0, 778.0], [675.0, 784.0]] +[[397.0, 753.0], [689.0, 755.0], [689.0, 786.0], [397.0, 784.0]] +[[813.0, 749.0], [1212.0, 747.0], [1212.0, 784.0], [813.0, 786.0]] +[[392.0, 697.0], [1212.0, 693.0], [1212.0, 735.0], [392.0, 739.0]] +[[393.0, 645.0], [1209.0, 645.0], [1209.0, 685.0], [393.0, 685.0]] +[[393.0, 595.0], [1212.0, 593.0], [1212.0, 635.0], [393.0, 637.0]] +[[393.0, 545.0], [1212.0, 545.0], [1212.0, 591.0], [393.0, 591.0]] +[[395.0, 497.0], [1209.0, 495.0], [1209.0, 531.0], [395.0, 533.0]] +[[395.0, 443.0], [1211.0, 443.0], [1211.0, 489.0], [395.0, 489.0]] +[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]] +[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]] +[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]] +``` + +Visualization of results + +
+ +
+ +* only recognition +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR(model_storage_directory='./model') # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png' +result = ocr.ocr(img_path,det=False) +for line in result: + print(line) +``` + +Output will be a list, each item contains text and recognition confidence +```bash +['PAIN', 0.990372] +``` + +### Use by command line + +show help information +```bash +paddleocr -h +``` + +* detection and recognition +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg +``` + +Output will be a list, each item contains bounding box, text and recognition confidence +```bash +[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] +[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] +[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +[[[395.0, 443.0], [1211.0, 443.0], [1211.0, 489.0], [395.0, 489.0]], ['production of this book;their contributions', 0.9713175]] +[[[395.0, 497.0], [1209.0, 495.0], [1209.0, 531.0], [395.0, 533.0]], ['have been indispensable to its creation.We', 0.96009934]] +[[[393.0, 545.0], [1212.0, 545.0], [1212.0, 591.0], [393.0, 591.0]], ['would also like to express our gratitude to al', 0.9371007]] +[[[393.0, 595.0], [1212.0, 593.0], [1212.0, 635.0], [393.0, 637.0]], ['the producers for their invaluable opinions', 0.96872145]] +[[[393.0, 645.0], [1209.0, 645.0], [1209.0, 685.0], [393.0, 685.0]], ['and assistance throughout this proiect.Andto', 0.94448787]] +[[[392.0, 697.0], [1212.0, 693.0], [1212.0, 735.0], [392.0, 739.0]], ['the many others whose names are not credited', 0.93633145]] +[[[397.0, 753.0], [689.0, 755.0], [689.0, 786.0], [397.0, 784.0]], ['buthavemades', 0.99324507]] +[[[813.0, 749.0], [1212.0, 747.0], [1212.0, 784.0], [813.0, 786.0]], ['inputin this book, we', 0.9166398]] +[[[675.0, 760.0], [799.0, 755.0], [799.0, 778.0], [675.0, 784.0]], ['speciti', 0.9063535]] +[[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]], ['thankyouforyoul', 0.92475533]] +[[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]], ['P', 0.14887337]] +[[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]], ['nuoussupport', 0.9898951]] +``` + +* only detection +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false +``` + +Output will be a list, each item only contains bounding box +```bash +[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]] +[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]] +[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]] +[[675.0, 760.0], [799.0, 755.0], [799.0, 778.0], [675.0, 784.0]] +[[397.0, 753.0], [689.0, 755.0], [689.0, 786.0], [397.0, 784.0]] +[[813.0, 749.0], [1212.0, 747.0], [1212.0, 784.0], [813.0, 786.0]] +[[392.0, 697.0], [1212.0, 693.0], [1212.0, 735.0], [392.0, 739.0]] +[[393.0, 645.0], [1209.0, 645.0], [1209.0, 685.0], [393.0, 685.0]] +[[393.0, 595.0], [1212.0, 593.0], [1212.0, 635.0], [393.0, 637.0]] +[[393.0, 545.0], [1212.0, 545.0], [1212.0, 591.0], [393.0, 591.0]] +[[395.0, 497.0], [1209.0, 495.0], [1209.0, 531.0], [395.0, 533.0]] +[[395.0, 443.0], [1211.0, 443.0], [1211.0, 489.0], [395.0, 489.0]] +[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]] +[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]] +[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]] +``` + +* only recognition +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false +``` + +Output will be a list, each item contains text and recognition confidence +```bash +['PAIN', 0.990372] +``` + +## Parameter Description + +| Parameter | Description | Default value | +|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------| +| use_gpu | use GPU or not | TRUE | +| gpu_mem | GPU memory size used for initialization | 8000M | +| image_dir | The images path or folder path for predicting when used by the command line | | +| det_algorithm | Type of detection algorithm selected | DB | +| det_model_name | There are two ways to use: 1. The name of the detection algorithm which must be in the support list(only ch_det_mv3_db is built in currently), and the supported list will be displayed when the wrong parameter is passed in. 2. The path of the inference model that has been converted by yourself. At this time, the model path must contains model and params files. When choosing this method, you need to give the name of det_algorithm | ch_det_mv3_db | +| det_max_side_len | The maximum size of the long side of the image. When the long side exceeds this value, the long side will be resized to this size, and the short side will be scaled proportionally | 960 | +| det_db_thresh | Binarization threshold value of DB output map | 0.3 | +| det_db_box_thresh | The threshold value of the DB output box. Boxes score lower than this value will be discarded | 0.5 | +| det_db_unclip_ratio | The expanded ratio of DB output box | 2 | +| det_east_score_thresh | Binarization threshold value of EAST output map | 0.8 | +| det_east_cover_thresh | The threshold value of the EAST output box. Boxes score lower than this value will be discarded | 0.1 | +| det_east_nms_thresh | The NMS threshold value of EAST model output box | 0.2 | +| rec_algorithm | Type of recognition algorithm selected | CRNN | +| rec_model_name | There are two ways to use: 1. The name of the recognition algorithm which must be in the support list(only supports CRNN, Rosetta, STAR, RARE and other algorithms currently, but only ch_rec_mv3_crnn_enhance is built-in), and the supported list will be displayed when the wrong parameter is passed in. 2. The path of the inference model that has been converted by yourself. At this time, the model path must contains model and params files. When choosing this method, you need to give the name of rec_algorithm | ch_rec_mv3_crnn_enhance | +| rec_image_shape | image shape of recognition algorithm | "3,32,320" | +| rec_char_type | Character type of recognition algorithm, Chinese (ch) or English (en) | ch | +| rec_batch_num | When performing recognition, the batchsize of forward images | 30 | +| rec_char_dict_path | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2 | | +| use_space_char | Whether to recognize spaces | TRUE | +| enable_mkldnn | Whether to enable mkldnn | FALSE | +| model_storage_directory | Download model save path when det_model_name or rec_model_name use mode 1 | ~/.paddleocr | +| det | Enable detction when `ppocr.ocr` func exec | TRUE | +| rec | Enable detction when `ppocr.ocr` func exec | TRUE | diff --git a/doc/imgs_results/whl/11_det.jpg b/doc/imgs_results/whl/11_det.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fe0cd23cc24457f5d7084fff0c63c239d09c9969 Binary files /dev/null and b/doc/imgs_results/whl/11_det.jpg differ diff --git a/doc/imgs_results/whl/11_det_rec.jpg b/doc/imgs_results/whl/11_det_rec.jpg new file mode 100644 index 0000000000000000000000000000000000000000..31c566478fd874d10a61dcd54635453e34c20e4c Binary files /dev/null and b/doc/imgs_results/whl/11_det_rec.jpg differ diff --git a/doc/imgs_results/whl/12_det.jpg b/doc/imgs_results/whl/12_det.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1d5ccf2a6b5d3fa9516560e0cb2646ad6b917da6 Binary files /dev/null and b/doc/imgs_results/whl/12_det.jpg differ diff --git a/doc/imgs_results/whl/12_det_rec.jpg b/doc/imgs_results/whl/12_det_rec.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9db8b57e1279362db2c9f3d6a3ba36b77bf13775 Binary files /dev/null and b/doc/imgs_results/whl/12_det_rec.jpg differ diff --git a/paddleocr.py b/paddleocr.py new file mode 100644 index 0000000000000000000000000000000000000000..3c356bb3ee698c483163a488859e2d4525a2aa46 --- /dev/null +++ b/paddleocr.py @@ -0,0 +1,231 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(__file__) +sys.path.append(os.path.join(__dir__, '')) + +import cv2 +import numpy as np +from pathlib import Path +import tarfile +import requests +from tqdm import tqdm + +from tools.infer import predict_system +from ppocr.utils.utility import initial_logger + +logger = initial_logger() +from ppocr.utils.utility import check_and_read_gif + +__all__ = ['PaddleOCR'] + +model_params = { + 'ch_det_mv3_db': { + 'url': + 'https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar', + 'algorithm': 'DB', + }, + 'ch_rec_mv3_crnn_enhance': { + 'url': + 'https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar', + 'algorithm': 'CRNN' + }, +} + +SUPPORT_DET_MODEL = ['DB'] +SUPPORT_REC_MODEL = ['Rosetta', 'CRNN', 'STARNet', 'RARE'] + + +def download_with_progressbar(url, save_path): + response = requests.get(url, stream=True) + total_size_in_bytes = int(response.headers.get('content-length', 0)) + block_size = 1024 # 1 Kibibyte + progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) + with open(save_path, 'wb') as file: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + progress_bar.close() + if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: + logger.error("ERROR, something went wrong") + sys.exit(0) + + +def download_and_unzip(url, model_storage_directory): + tmp_path = os.path.join(model_storage_directory, url.split('/')[-1]) + print('download {} to {}'.format(url, tmp_path)) + os.makedirs(model_storage_directory, exist_ok=True) + download_with_progressbar(url, tmp_path) + with tarfile.open(tmp_path, 'r') as tarObj: + for filename in tarObj.getnames(): + tarObj.extract(filename, model_storage_directory) + os.remove(tmp_path) + + +def maybe_download(model_storage_directory, model_name, mode='det'): + algorithm = None + # using custom model + if os.path.exists(os.path.join(model_name, 'model')) and os.path.exists( + os.path.join(model_name, 'params')): + return model_name, algorithm + # using the model of paddleocr + model_path = os.path.join(model_storage_directory, model_name) + if not os.path.exists(os.path.join(model_path, + 'model')) or not os.path.exists( + os.path.join(model_path, 'params')): + assert model_name in model_params, 'model must in {}'.format( + model_params.keys()) + download_and_unzip(model_params[model_name]['url'], + model_storage_directory) + algorithm = model_params[model_name]['algorithm'] + return model_path, algorithm + + +def parse_args(): + import argparse + + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + # params for prediction engine + parser.add_argument("--use_gpu", type=str2bool, default=True) + parser.add_argument("--ir_optim", type=str2bool, default=True) + parser.add_argument("--use_tensorrt", type=str2bool, default=False) + parser.add_argument("--gpu_mem", type=int, default=8000) + + # params for text detector + parser.add_argument("--image_dir", type=str) + parser.add_argument("--det_algorithm", type=str, default='DB') + parser.add_argument("--det_model_name", type=str, default='ch_det_mv3_db') + parser.add_argument("--det_max_side_len", type=float, default=960) + + # DB parmas + parser.add_argument("--det_db_thresh", type=float, default=0.3) + parser.add_argument("--det_db_box_thresh", type=float, default=0.5) + parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0) + + # EAST parmas + parser.add_argument("--det_east_score_thresh", type=float, default=0.8) + parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) + parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) + + # params for text recognizer + parser.add_argument("--rec_algorithm", type=str, default='CRNN') + parser.add_argument( + "--rec_model_name", type=str, default='ch_rec_mv3_crnn_enhance') + parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") + parser.add_argument("--rec_char_type", type=str, default='ch') + parser.add_argument("--rec_batch_num", type=int, default=30) + parser.add_argument( + "--rec_char_dict_path", + type=str, + default="./ppocr/utils/ppocr_keys_v1.txt") + parser.add_argument("--use_space_char", type=bool, default=True) + parser.add_argument("--enable_mkldnn", type=bool, default=False) + + parser.add_argument("--model_storage_directory", type=str, default=False) + parser.add_argument("--det", type=str2bool, default=True) + parser.add_argument("--rec", type=str2bool, default=True) + return parser.parse_args() + + +class PaddleOCR(predict_system.TextSystem): + def __init__(self, + det_model_name='ch_det_mv3_db', + rec_model_name='ch_rec_mv3_crnn_enhance', + model_storage_directory=None, + log_level=20, + **kwargs): + """ + paddleocr package + args: + det_model_name: det_model name, keep same with filename in paddleocr. default is ch_det_mv3_db + det_model_name: rec_model name, keep same with filename in paddleocr. default is ch_rec_mv3_crnn_enhance + model_storage_directory: model save path. default is ~/.paddleocr + det model will save to model_storage_directory/det_model + rec model will save to model_storage_directory/rec_model + log_level: + **kwargs: other params show in paddleocr --help + """ + logger.setLevel(log_level) + postprocess_params = parse_args() + # init model dir + if model_storage_directory: + self.model_storage_directory = model_storage_directory + else: + self.model_storage_directory = os.path.expanduser( + "~/.paddleocr/") + '/model' + Path(self.model_storage_directory).mkdir(parents=True, exist_ok=True) + + # download model + det_model_path, det_algorithm = maybe_download( + self.model_storage_directory, det_model_name, 'det') + rec_model_path, rec_algorithm = maybe_download( + self.model_storage_directory, rec_model_name, 'rec') + # update model and post_process params + postprocess_params.__dict__.update(**kwargs) + postprocess_params.det_model_dir = det_model_path + postprocess_params.rec_model_dir = rec_model_path + if det_algorithm is not None: + postprocess_params.det_algorithm = det_algorithm + if rec_algorithm is not None: + postprocess_params.rec_algorithm = rec_algorithm + + if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL: + logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL)) + sys.exit(0) + if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL: + logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL)) + sys.exit(0) + + postprocess_params.rec_char_dict_path = Path( + __file__).parent / postprocess_params.rec_char_dict_path + + # init det_model and rec_model + super().__init__(postprocess_params) + + def ocr(self, img, det=True, rec=True): + """ + ocr with paddleocr + args: + img: img for ocr, support ndarray, img_path and list or ndarray + det: use text detection or not, if false, only rec will be exec. default is True + rec: use text recognition or not, if false, only det will be exec. default is True + """ + assert isinstance(img, (np.ndarray, list, str)) + if isinstance(img, str): + image_file = img + img, flag = check_and_read_gif(image_file) + if not flag: + img = cv2.imread(image_file) + if img is None: + logger.error("error in loading image:{}".format(image_file)) + return None + if det and rec: + dt_boxes, rec_res = self.__call__(img) + return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] + elif det and not rec: + dt_boxes, elapse = self.text_detector(img) + if dt_boxes is None: + return None + return [box.tolist() for box in dt_boxes] + else: + if not isinstance(img, list): + img = [img] + rec_res, elapse = self.text_recognizer(img) + return rec_res diff --git a/requirments.txt b/requirments.txt index 94e8478ffad88a6e5cd69424c6aa485400cfae06..ec538138beaed70ec8f5285ea0c4114f22e3b0ef 100644 --- a/requirments.txt +++ b/requirments.txt @@ -1,4 +1,6 @@ shapely imgaug pyclipper -lmdb \ No newline at end of file +lmdb +tqdm +numpy \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..70f748e6be51b9ed9a9b2dc57d105b6895c61155 --- /dev/null +++ b/setup.py @@ -0,0 +1,61 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup +from io import open + +with open('requirments.txt', encoding="utf-8-sig") as f: + requirements = f.readlines() + requirements.append('tqdm') + + +def readme(): + with open('doc/doc_en/whl.md', encoding="utf-8-sig") as f: + README = f.read() + return README + + +setup( + name='paddleocr', + packages=['paddleocr'], + package_dir={'paddleocr': ''}, + include_package_data=True, + entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, + version='0.0.3', + install_requires=requirements, + license='Apache License 2.0', + description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', + long_description=readme(), + long_description_content_type='text/markdown', + author='Baidu PaddlePaddle', + url='https://github.com/PaddlePaddle/PaddleOCR', + download_url='https://github.com/PaddlePaddle/PaddleOCR.git', + keywords=[ + 'ocr textdetection textrecognition paddleocr crnn east star-net rosetta ocrlite db chineseocr chinesetextdetection chinesetextrecognition' + ], + classifiers=[ + 'Intended Audience :: Developers', 'Operating System :: OS Independent', + 'Natural Language :: Chinese (Simplified)', + 'Programming Language :: Python', 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.5', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Utilities' + ], ) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index b0a0ec1f3037b35d22212cb6fb3555746edc2e99..9e1e094698bd63a5b096a986f384d3337436a620 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -134,7 +134,12 @@ def resize_img(img, input_size=600): return im -def draw_ocr(image, boxes, txts, scores, draw_txt=True, drop_score=0.5): +def draw_ocr(image, + boxes, + txts=None, + scores=None, + drop_score=0.5, + font_path="./doc/simfang.ttf"): """ Visualize the results of OCR detection and recognition args: @@ -142,23 +147,29 @@ def draw_ocr(image, boxes, txts, scores, draw_txt=True, drop_score=0.5): boxes(list): boxes with shape(N, 4, 2) txts(list): the texts scores(list): txxs corresponding scores - draw_txt(bool): whether draw text or not drop_score(float): only scores greater than drop_threshold will be visualized + font_path: the path of font which is used to draw text return(array): the visualized img """ if scores is None: scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score or math.isnan(score): + box_num = len(boxes) + for i in range(box_num): + if scores is not None and (scores[i] < drop_score or + math.isnan(scores[i])): continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) + box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64) image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - - if draw_txt: + if txts is not None: img = np.array(resize_img(image, input_size=600)) txt_img = text_visual( - txts, scores, img_h=img.shape[0], img_w=600, threshold=drop_score) + txts, + scores, + img_h=img.shape[0], + img_w=600, + threshold=drop_score, + font_path=font_path) img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) return img return image @@ -236,7 +247,12 @@ def str_count(s): return s_len - math.ceil(en_dg_count / 2) -def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.): +def text_visual(texts, + scores, + img_h=400, + img_w=600, + threshold=0., + font_path="./doc/simfang.ttf"): """ create new blank img and draw txt on it args: @@ -244,6 +260,7 @@ def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.): scores(list|None): corresponding score of each txt img_h(int): the height of blank img img_w(int): the width of blank img + font_path: the path of font which is used to draw text return(array): """ @@ -262,7 +279,7 @@ def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.): font_size = 20 txt_color = (0, 0, 0) - font = ImageFont.truetype("./doc/simfang.ttf", font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") gap = font_size + 5 txt_img_list = [] @@ -343,6 +360,6 @@ if __name__ == '__main__': txts.append(dic['transcription']) scores.append(round(dic['scores'], 3)) - new_img = draw_ocr(image, boxes, txts, scores, draw_txt=True) + new_img = draw_ocr(image, boxes, txts, scores) cv2.imwrite(img_name, new_img)