diff --git a/README.md b/README.md index 95326fa08cc5c55a8b9932146a35a1cba800b715..4814fbb9e857c9bcbb1513c5622500454477b942 100644 --- a/README.md +++ b/README.md @@ -212,3 +212,4 @@ We welcome all the contributions to PaddleOCR and appreciate for your feedback v - Many thanks to [lyl120117](https://github.com/lyl120117) for contributing the code for printing the network structure. - Thanks [xiangyubo](https://github.com/xiangyubo) for contributing the handwritten Chinese OCR datasets. - Thanks [authorfu](https://github.com/authorfu) for contributing Android demo and [xiadeye](https://github.com/xiadeye) contributing iOS demo, respectively. +- Thanks [BeyondYourself](https://github.com/BeyondYourself) for contributing many great suggestions and simplifying part of the code style. diff --git a/README_cn.md b/README_cn.md index 47bddd5d6793b97a63eb037860bf59aac4dd0ed9..cc5cb00a38d97fd3dba46b30a76f2dc606e8d027 100644 --- a/README_cn.md +++ b/README_cn.md @@ -32,7 +32,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 上图是超轻量级中文OCR模型效果展示,更多效果图请见[效果展示页面](./doc/doc_ch/visualization.md)。 - 超轻量级中文OCR在线体验地址:https://www.paddlepaddle.org.cn/hub/scene/ocr -- 移动端DEMO体验(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统):[安装包二维码获取地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite) +- 移动端DEMO体验(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统):[安装包二维码获取地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite) Android手机也可以扫描下面二维码安装体验。 @@ -205,8 +205,9 @@ PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训 ## 贡献代码 我们非常欢迎你为PaddleOCR贡献代码,也十分感谢你的反馈。 -- 非常感谢 [Khanh Tran](https://github.com/xxxpsyduck) 贡献了英文文档。 +- 非常感谢 [Khanh Tran](https://github.com/xxxpsyduck) 贡献了英文文档 - 非常感谢 [zhangxin](https://github.com/ZhangXinNan)([Blog](https://blog.csdn.net/sdlypyzq)) 贡献新的可视化方式、添加.gitgnore、处理手动设置PYTHONPATH环境变量的问题 - 非常感谢 [lyl120117](https://github.com/lyl120117) 贡献打印网络结构的代码 - 非常感谢 [xiangyubo](https://github.com/xiangyubo) 贡献手写中文OCR数据集 - 非常感谢 [authorfu](https://github.com/authorfu) 贡献Android和[xiadeye](https://github.com/xiadeye) 贡献IOS的demo代码 +- 非常感谢 [BeyondYourself](https://github.com/BeyondYourself) 给PaddleOCR提了很多非常棒的建议,并简化了PaddleOCR的部分代码风格。 diff --git a/deploy/android_demo/README.md b/deploy/android_demo/README.md index 4d85dee99ab3616594b4ff3a17acb97a6267b12d..e35e757914aa355c97293662652b1e02676e32eb 100644 --- a/deploy/android_demo/README.md +++ b/deploy/android_demo/README.md @@ -1,6 +1,6 @@ # 如何快速测试 ### 1. 安装最新版本的Android Studio -可以从https://developer.android.com/studio下载。本Demo使用是4.0版本Android Studio编写。 +可以从https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。 ### 2. 按照NDK 20 以上版本 Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编译成功。 diff --git a/doc/doc_ch/installation.md b/doc/doc_ch/installation.md index 7a51c5616c470e58ef4f186c4e3c809cf181e494..226af740270788ca99e886d7cdde0419e8703008 100644 --- a/doc/doc_ch/installation.md +++ b/doc/doc_ch/installation.md @@ -7,7 +7,7 @@ PaddleOCR 工作环境 - glibc 2.23 - cuDNN 7.6+ (GPU) -建议使用我们提供的docker运行PaddleOCR,有关docker使用请参考[链接](https://docs.docker.com/get-started/)。 +建议使用我们提供的docker运行PaddleOCR,有关docker、nvidia-docker使用请参考[链接](https://docs.docker.com/get-started/)。 *如您希望使用 mac 或 windows直接运行预测代码,可以从第2步开始执行。* diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index b23837bedeae7368f750cbd1f1413c189abd6923..eda456c5c5d7573fd89de2a8ac0c1042a5b3a59b 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -21,12 +21,11 @@ ln -sf /train_data/dataset * 使用自己数据集: 若您希望使用自己的数据进行训练,请参考下文组织您的数据。 - - 训练集 首先请将训练图片放入同一个文件夹(train_images),并用一个txt文件(rec_gt_train.txt)记录图片路径和标签。 -* 注意: 默认请将图片路径和图片标签用 \t 分割,如用其他方式分割将造成训练报错 +**注意:** 默认请将图片路径和图片标签用 \t 分割,如用其他方式分割将造成训练报错 ``` " 图像文件名 图像标注信息 " @@ -41,12 +40,9 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件,通 wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt # 测试集标签 wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt - - ``` 最终训练集应有如下文件结构: - ``` |-train_data |-ic15_data @@ -150,7 +146,7 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t 如果验证集很大,测试将会比较耗时,建议减少评估次数,或训练完再进行评估。 -* 提示: 可通过 -c 参数选择 `configs/rec/` 路径下的多种模型配置进行训练,PaddleOCR支持的识别算法有: +**提示:** 可通过 -c 参数选择 `configs/rec/` 路径下的多种模型配置进行训练,PaddleOCR支持的识别算法有: | 配置文件 | 算法名称 | backbone | trans | seq | pred | diff --git a/ppocr/data/det/db_process.py b/ppocr/data/det/db_process.py index b64b8c8d227f293aff0eff90d1d85dee8dd85fce..9534c59ef69d830a8d991f421539c5e4e5bb3d39 100644 --- a/ppocr/data/det/db_process.py +++ b/ppocr/data/det/db_process.py @@ -17,7 +17,7 @@ import cv2 import numpy as np import json import sys -from ppocr.utils.utility import initial_logger +from ppocr.utils.utility import initial_logger, check_and_read_gif logger = initial_logger() from .data_augment import AugmentData @@ -100,7 +100,9 @@ class DBProcessTrain(object): def __call__(self, label_infor): img_path, gt_label = self.convert_label_infor(label_infor) - imgvalue = cv2.imread(img_path) + imgvalue, flag = check_and_read_gif(img_path) + if not flag: + imgvalue = cv2.imread(img_path) if imgvalue is None: logger.info("{} does not exist!".format(img_path)) return None diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 510a028451302a92ebc179792ecbcb1ff8649807..1e9dbb9c1db64ea4e572e321de3d6d624b29a3a9 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -233,7 +233,7 @@ class SimpleReader(object): img_num = len(label_infor_list) img_id_list = list(range(img_num)) random.shuffle(img_id_list) - if sys.platform == "win32": + if sys.platform == "win32" and self.num_workers != 1: print("multiprocess is not fully compatible with Windows." "num_workers will be 1.") self.num_workers = 1 diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index 687e2acbc8021ddaad074c5445478cbba2ad2589..e27dd1d8738a25c6a6669b99ad2b6eed4a9f25d0 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -15,6 +15,8 @@ import logging import os import imghdr +import cv2 +from paddle import fluid def initial_logger(): @@ -62,7 +64,7 @@ def get_image_file_list(img_file): if img_file is None or not os.path.exists(img_file): raise Exception("not found any img file in {}".format(img_file)) - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff'} + img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'} if os.path.isfile(img_file) and imghdr.what(img_file) in img_end: imgs_lists.append(img_file) elif os.path.isdir(img_file): @@ -75,7 +77,18 @@ def get_image_file_list(img_file): return imgs_lists -from paddle import fluid +def check_and_read_gif(img_path): + if os.path.basename(img_path)[-3:] in ['gif', 'GIF']: + gif = cv2.VideoCapture(img_path) + ret, frame = gif.read() + if not ret: + logging.info("Cannot read {}. This gif image maybe corrupted.") + return None, False + if len(frame.shape) == 2 or frame.shape[-1] == 1: + frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) + imgvalue = frame[:, :, ::-1] + return imgvalue, True + return None, False def create_multi_devices_program(program, loss_var_name): diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index 28515b785155be833b7d6cce6d31bf371e33ad2c..75644aeb990ab95edb51f2809bb8cc8fbdf3e2be 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -20,7 +20,7 @@ sys.path.append(os.path.join(__dir__, '../..')) import tools.infer.utility as utility from ppocr.utils.utility import initial_logger logger = initial_logger() -from ppocr.utils.utility import get_image_file_list +from ppocr.utils.utility import get_image_file_list, check_and_read_gif import cv2 from ppocr.data.det.east_process import EASTProcessTest from ppocr.data.det.db_process import DBProcessTest @@ -139,7 +139,9 @@ if __name__ == "__main__": if not os.path.exists(draw_img_save): os.makedirs(draw_img_save) for image_file in image_file_list: - img = cv2.imread(image_file) + img, flag = check_and_read_gif(image_file) + if not flag: + img = cv2.imread(image_file) if img is None: logger.info("error in loading image:{}".format(image_file)) continue diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index bd96548d827d7b47a059648e8cedc20086488801..578401fb37b64dbf248460acb2b4c671e6a6724c 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -20,7 +20,7 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) import tools.infer.utility as utility from ppocr.utils.utility import initial_logger logger = initial_logger() -from ppocr.utils.utility import get_image_file_list +from ppocr.utils.utility import get_image_file_list, check_and_read_gif import cv2 import copy import numpy as np @@ -153,7 +153,9 @@ def main(args): valid_image_file_list = [] img_list = [] for image_file in image_file_list: - img = cv2.imread(image_file, cv2.IMREAD_COLOR) + img, flag = check_and_read_gif(image_file) + if not flag: + img = cv2.imread(image_file) if img is None: logger.info("error in loading image:{}".format(image_file)) continue diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 1b7aff1ca8213633b7eff73e13d0ea75eeaedc0b..f8a62679bc17d10380983319a3f239d4a7339646 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -27,7 +27,7 @@ import copy import numpy as np import math import time -from ppocr.utils.utility import get_image_file_list +from ppocr.utils.utility import get_image_file_list, check_and_read_gif from PIL import Image from tools.infer.utility import draw_ocr from tools.infer.utility import draw_ocr_box_txt @@ -49,18 +49,23 @@ class TextSystem(object): points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top ''' - img_crop_width = int(max(np.linalg.norm(points[0] - points[1]), - np.linalg.norm(points[2] - points[3]))) - img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), - np.linalg.norm(points[1] - points[2]))) - pts_std = np.float32([[0, 0], - [img_crop_width, 0], + img_crop_width = int( + max( + np.linalg.norm(points[0] - points[1]), + np.linalg.norm(points[2] - points[3]))) + img_crop_height = int( + max( + np.linalg.norm(points[0] - points[3]), + np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective(img, M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE, - flags=cv2.INTER_CUBIC) + dst_img = cv2.warpPerspective( + img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) dst_img_height, dst_img_width = dst_img.shape[0:2] if dst_img_height * 1.0 / dst_img_width >= 1.5: dst_img = np.rot90(dst_img) @@ -119,25 +124,27 @@ def main(args): is_visualize = True tackle_img_num = 0 for image_file in image_file_list: - img = cv2.imread(image_file) + img, flag = check_and_read_gif(image_file) + if not flag: + img = cv2.imread(image_file) if img is None: logger.info("error in loading image:{}".format(image_file)) continue starttime = time.time() - tackle_img_num += 1 - if not args.use_gpu and args.enable_mkldnn and tackle_img_num % 30 == 0: + tackle_img_num += 1 + if not args.use_gpu and args.enable_mkldnn and tackle_img_num % 30 == 0: text_sys = TextSystem(args) dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime print("Predict time of %s: %.3fs" % (image_file, elapse)) + + drop_score = 0.5 dt_num = len(dt_boxes) - dt_boxes_final = [] for dno in range(dt_num): text, score = rec_res[dno] - if score >= 0.5: + if score >= drop_score: text_str = "%s, %.3f" % (text, score) print(text_str) - dt_boxes_final.append(dt_boxes[dno]) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) @@ -146,7 +153,12 @@ def main(args): scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr( - image, boxes, txts, scores, draw_txt=True, drop_score=0.5) + image, + boxes, + txts, + scores, + draw_txt=True, + drop_score=drop_score) draw_img_save = "./inference_results/" if not os.path.exists(draw_img_save): os.makedirs(draw_img_save)