From a5694968084a2c24e536d1eb85b8255963ec0fce Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Tue, 3 Aug 2021 15:37:32 +0800 Subject: [PATCH] fix bug when inference with network img --- __init__.py | 2 +- doc/doc_ch/whl.md | 94 ++++++++++++++++++++++++++++++++++++-------- doc/doc_en/whl_en.md | 6 ++- paddleocr.py | 2 +- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/__init__.py b/__init__.py index 504aeca6..e22e466a 100644 --- a/__init__.py +++ b/__init__.py @@ -15,4 +15,4 @@ import paddleocr from .paddleocr import * __version__ = paddleocr.VERSION -__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res'] +__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar'] diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index edacb5b2..167ed7b2 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -5,26 +5,32 @@ ### 1.1 安装whl包 pip安装 + ```bash pip install "paddleocr>=2.0.1" # 推荐使用2.0.1+版本 ``` 本地构建并安装 + ```bash python3 setup.py bdist_wheel pip3 install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x是paddleocr的版本号 ``` ## 2 使用 + ### 2.1 代码使用 + paddleocr whl包会自动下载ppocr轻量级模型作为默认模型,可以根据第3节**自定义模型**进行自定义更换。 * 检测+方向分类器+识别全流程 + ```python from paddleocr import PaddleOCR, draw_ocr + # Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换 # 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。 -ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory +ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' result = ocr.ocr(img_path, cls=True) for line in result: @@ -32,6 +38,7 @@ for line in result: # 显示结果 from PIL import Image + image = Image.open(img_path).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] @@ -40,31 +47,36 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc im_show = Image.fromarray(im_show) im_show.save('result.jpg') ``` + 结果是一个list,每个item包含了文本框,文字和识别置信度 + ```bash [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] ...... ``` + 结果可视化
- * 检测+识别 + ```python from paddleocr import PaddleOCR, draw_ocr -ocr = PaddleOCR() # need to run only once to download and load model into memory + +ocr = PaddleOCR() # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' -result = ocr.ocr(img_path,cls=False) +result = ocr.ocr(img_path, cls=False) for line in result: print(line) # 显示结果 from PIL import Image + image = Image.open(img_path).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] @@ -73,38 +85,46 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc im_show = Image.fromarray(im_show) im_show.save('result.jpg') ``` + 结果是一个list,每个item包含了文本框,文字和识别置信度 + ```bash [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] ...... ``` + 结果可视化
- * 方向分类器+识别 + ```python from paddleocr import PaddleOCR -ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory + +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' result = ocr.ocr(img_path, det=False, cls=True) for line in result: print(line) ``` + 结果是一个list,每个item只包含识别结果和识别置信度 + ```bash ['韩国小馆', 0.9907421] ``` * 单独执行检测 + ```python from paddleocr import PaddleOCR, draw_ocr -ocr = PaddleOCR() # need to run only once to download and load model into memory + +ocr = PaddleOCR() # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' result = ocr.ocr(img_path, rec=False) for line in result: @@ -118,13 +138,16 @@ im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/Pa im_show = Image.fromarray(im_show) im_show.save('result.jpg') ``` + 结果是一个list,每个item只包含文本框 + ```bash [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] [[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]] ...... ``` + 结果可视化 @@ -133,29 +156,37 @@ im_show.save('result.jpg') * 单独执行识别 + ```python from paddleocr import PaddleOCR -ocr = PaddleOCR() # need to run only once to download and load model into memory + +ocr = PaddleOCR() # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' result = ocr.ocr(img_path, det=False) for line in result: print(line) ``` + 结果是一个list,每个item只包含识别结果和识别置信度 + ```bash ['韩国小馆', 0.9907421] ``` * 单独执行方向分类器 + ```python from paddleocr import PaddleOCR -ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory + +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' result = ocr.ocr(img_path, det=False, rec=False, cls=True) for line in result: print(line) ``` + 结果是一个list,每个item只包含分类结果和分类置信度 + ```bash ['0', 0.9999924] ``` @@ -163,15 +194,19 @@ for line in result: ### 2.2 通过命令行使用 查看帮助信息 + ```bash paddleocr -h ``` * 检测+方向分类器+识别全流程 + ```bash paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true ``` + 结果是一个list,每个item包含了文本框,文字和识别置信度 + ```bash [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] @@ -180,10 +215,13 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true ``` * 检测+识别 + ```bash paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg ``` + 结果是一个list,每个item包含了文本框,文字和识别置信度 + ```bash [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] @@ -192,20 +230,25 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg ``` * 方向分类器+识别 + ```bash paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false ``` 结果是一个list,每个item只包含识别结果和识别置信度 + ```bash ['韩国小馆', 0.9907421] ``` * 单独执行检测 + ```bash paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false ``` + 结果是一个list,每个item只包含文本框 + ```bash [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]] [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]] @@ -214,34 +257,42 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false ``` * 单独执行识别 + ```bash paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false ``` 结果是一个list,每个item只包含识别结果和识别置信度 + ```bash ['韩国小馆', 0.9907421] ``` * 单独执行方向分类器 + ```bash paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec false ``` 结果是一个list,每个item只包含分类结果和分类置信度 + ```bash ['0', 0.9999924] ``` ## 3 自定义模型 -当内置模型无法满足需求时,需要使用到自己训练的模型。 -首先,参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型,然后按照如下方式使用 + +当内置模型无法满足需求时,需要使用到自己训练的模型。 首先,参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型,然后按照如下方式使用 ### 3.1 代码使用 + ```python from paddleocr import PaddleOCR, draw_ocr + # 模型路径下必须含有model和params文件 -ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True) +ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', + rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', + use_angle_cls=True) img_path = 'PaddleOCR/doc/imgs/11.jpg' result = ocr.ocr(img_path, cls=True) for line in result: @@ -249,6 +300,7 @@ for line in result: # 显示结果 from PIL import Image + image = Image.open(img_path).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] @@ -269,11 +321,13 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_ ### 4.1 网络图片 - 代码使用 + ```python -from paddleocr import PaddleOCR, draw_ocr +from paddleocr import PaddleOCR, draw_ocr, download_with_progressbar + # Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换 # 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。 -ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory +ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg' result = ocr.ocr(img_path, cls=True) for line in result: @@ -281,7 +335,9 @@ for line in result: # 显示结果 from PIL import Image -image = Image.open(img_path).convert('RGB') + +download_with_progressbar(img_path, 'tmp.jpg') +image = Image.open('tmp.jpg').convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] @@ -289,19 +345,24 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc im_show = Image.fromarray(im_show) im_show.save('result.jpg') ``` + - 命令行模式 + ```bash paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true ``` ### 4.2 numpy数组 + 仅通过代码使用时支持numpy数组作为输入 + ```python import cv2 from paddleocr import PaddleOCR, draw_ocr + # Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换 # 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。 -ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory +ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' img = cv2.imread(img_path) # img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), 如果你自己训练的模型支持灰度图,可以将这句话的注释取消 @@ -311,6 +372,7 @@ for line in result: # 显示结果 from PIL import Image + image = Image.open(img_path).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 79bc9b0b..c8c8353a 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -306,7 +306,7 @@ Support numpy array as input only when used by code ```python import cv2 -from paddleocr import PaddleOCR, draw_ocr +from paddleocr import PaddleOCR, draw_ocr, download_with_progressbar ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' img = cv2.imread(img_path) @@ -317,7 +317,9 @@ for line in result: # show result from PIL import Image -image = Image.open(img_path).convert('RGB') + +download_with_progressbar(img_path, 'tmp.jpg') +image = Image.open('tmp.jpg').convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] diff --git a/paddleocr.py b/paddleocr.py index 5ae812fa..c52737f5 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -33,7 +33,7 @@ from tools.infer.utility import draw_ocr, str2bool from ppstructure.utility import init_args, draw_structure_result from ppstructure.predict_system import OCRSystem, save_structure_res -__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res'] +__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar'] model_urls = { 'det': { -- GitLab