未验证 提交 83beede6 编写于 作者: A andyj 提交者: GitHub

Update check img (#8558)

* add finetune en doc & test=document_fix

* fix dead link & test=document_fix

* fix dead link & test=document_fix

* update check img
上级 c44ae8f4
...@@ -26,6 +26,9 @@ import cv2 ...@@ -26,6 +26,9 @@ import cv2
import logging import logging
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
import base64
from io import BytesIO
from PIL import Image
tools = importlib.import_module('.', 'tools') tools = importlib.import_module('.', 'tools')
ppocr = importlib.import_module('.', 'ppocr') ppocr = importlib.import_module('.', 'ppocr')
...@@ -431,7 +434,25 @@ def check_img(img): ...@@ -431,7 +434,25 @@ def check_img(img):
img, flag_gif, flag_pdf = check_and_read(image_file) img, flag_gif, flag_pdf = check_and_read(image_file)
if not flag_gif and not flag_pdf: if not flag_gif and not flag_pdf:
with open(image_file, 'rb') as f: with open(image_file, 'rb') as f:
img = img_decode(f.read()) img_str = f.read()
img = img_decode(img_str)
if img is None:
try:
buf = BytesIO()
image = BytesIO(img_str)
im = Image.open(image)
rgb = im.convert('RGB')
rgb.save(buf, 'jpeg')
buf.seek(0)
image_bytes = buf.read()
data_base64 = str(base64.b64encode(image_bytes),
encoding="utf-8")
image_decode = base64.b64decode(data_base64)
img_array = np.frombuffer(image_decode, np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
except:
logger.error("error in loading image:{}".format(image_file))
return None
if img is None: if img is None:
logger.error("error in loading image:{}".format(image_file)) logger.error("error in loading image:{}".format(image_file))
return None return None
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册