From 2bd552c801b81a0859efcc489d218e9199974f6a Mon Sep 17 00:00:00 2001 From: WilliamQf <40328063+WilliamQf-AI@users.noreply.github.com> Date: Mon, 21 Aug 2023 10:25:47 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E5=8A=9F=E8=83=BD=EF=BC=9A?= =?UTF-8?q?=E5=BD=93--savefile=E4=B8=BAtrue=E6=97=B6=EF=BC=8C=E5=9C=A8--ou?= =?UTF-8?q?tput=E4=B8=8B=E4=BB=A5=E5=BD=93=E5=89=8D=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E5=90=8D=E7=A7=B0=E5=90=8E=E6=8E=A5=E2=80=9C.txt=E2=80=9D?= =?UTF-8?q?=E4=B8=BA=E6=96=87=E4=BB=B6=E5=90=8D=E4=BF=9D=E5=AD=98ocr?= =?UTF-8?q?=E6=8E=A8=E7=90=86=E7=BB=93=E6=9E=9C=EF=BC=8C=E8=A7=A3=E5=86=B3?= =?UTF-8?q?=E4=BA=86issues=EF=BC=9A=20(#10628)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 实现功能:当--savefile为true时,在--output下以当前图片名称后接“.txt”为文件名保存ocr推理结果,解决了issues: https://github.com/PaddlePaddle/PaddleOCR/issues/10533 * 删除了乱码字符 * 1.删除了乱码字符 * 1.删除了乱码字符 --- paddleocr.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/paddleocr.py b/paddleocr.py index 549419a1..c76f09a4 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -408,6 +408,7 @@ def parse_args(mMain=True): parser.add_argument("--det", type=str2bool, default=True) parser.add_argument("--rec", type=str2bool, default=True) parser.add_argument("--type", type=str, default='ocr') + parser.add_argument("--savefile", type=str2bool, default=False) parser.add_argument( "--ocr_version", type=str, @@ -619,7 +620,7 @@ class PaddleOCR(predict_system.TextSystem): def ocr(self, img, det=True, rec=True, cls=True): """ ocr with paddleocr - args: + args: img: img for ocr, support ndarray, img_path and list or ndarray det: use text detection or not. If false, only rec will be exec. Default is True rec: use text recognition or not. If false, only det will be exec. Default is True @@ -768,10 +769,25 @@ def main(): rec=args.rec, cls=args.use_angle_cls) if result is not None: + lines = [] for idx in range(len(result)): res = result[idx] for line in res: logger.info(line) + val = '[' + for box in line[0]: + val += str(box[0]) + ',' + str(box[1]) + ',' + + val = val[:-1] + val += '],' + line[1][0] + ',' + str(line[1][1]) + '\n' + lines.append(val) + if args.savefile: + if os.path.exists(args.output) is False: + os.mkdir(args.output) + outfile = args.output + '/' + img_name + '.txt' + with open(outfile,'w',encoding='utf-8') as f: + f.writelines(lines) + elif args.type == 'structure': img, flag_gif, flag_pdf = check_and_read(img_path) if not flag_gif and not flag_pdf: -- GitLab