Merge pull request #7478 from ZhangXinNan/zxdev_dyg

增加draw_ocr_box_txt2:解决文本框倾斜时文字画在外边的情况

Merge pull request #7478 from ZhangXinNan/zxdev_dyg
增加draw_ocr_box_txt2:解决文本框倾斜时文字画在外边的情况
2e6a7f1f · Double_V · GitHub · 1323983c · 51d771ce · 2e6a7f1f
隐藏空白更改
内联并排

Showing with 53 addition and 34 deletion

tools/infer/utility.py tools/infer/utility.py +53 -34

未找到文件。
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -23,6 +23,7 @@ from PIL import Image, ImageDraw, ImageFont
 import math
 from paddle import inference
 import time
+import random
 from ppocr.utils.logging import get_logger
@@ -397,56 +398,74 @@ def draw_ocr(image,
 def draw_ocr_box_txt(image,
                     boxes,
-                     txts,
+                     txts=None,
                     scores=None,
                     drop_score=0.5,
-                     font_path="./doc/simfang.ttf"):
+                     font_path="./doc/fonts/simfang.ttf"):
    h, w = image.height, image.width
    img_left = image.copy()
-    img_right = Image.new('RGB', (w, h), (255, 255, 255))
+    img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
-    import random
    random.seed(0)
    draw_left = ImageDraw.Draw(img_left)
-    draw_right = ImageDraw.Draw(img_right)
+    if txts is None or len(txts) != len(boxes):
+        txts = [None] * len(boxes)
    for idx, (box, txt) in enumerate(zip(boxes, txts)):
        if scores is not None and scores[idx] < drop_score:
            continue
-        color = (random.randint(0, 255), random.randint(0, 255),
+        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
-                 random.randint(0, 255))
        draw_left.polygon(box, fill=color)
-        draw_right.polygon(
+        img_right_text = draw_box_txt_fine((w, h), box, txt, font_path)
-            [
+        pts = np.array(box, np.int32).reshape((-1, 1, 2))
-                box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
+        cv2.polylines(img_right_text, [pts], True, color, 1)
-                box[2][1], box[3][0], box[3][1]
+        img_right = cv2.bitwise_and(img_right, img_right_text)
-            ],
-            outline=color)
-        box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
-            1])**2)
-        box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
-            1])**2)
-        if box_height > 2 * box_width:
-            font_size = max(int(box_width * 0.9), 10)
-            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
-            cur_y = box[0][1]
-            for c in txt:
-                char_size = font.getsize(c)
-                draw_right.text(
-                    (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
-                cur_y += char_size[1]
-        else:
-            font_size = max(int(box_height * 0.8), 10)
-            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
-            draw_right.text(
-                [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
    img_left = Image.blend(image, img_left, 0.5)
    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
    img_show.paste(img_left, (0, 0, w, h))
-    img_show.paste(img_right, (w, 0, w * 2, h))
+    img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
    return np.array(img_show)
+def draw_box_txt_fine(img_size, box, txt, font_path="./doc/fonts/simfang.ttf"):
+    box_height = int(math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][1])**2))
+    box_width = int(math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][1])**2))
+    if box_height > 2 * box_width and box_height > 30:
+        img_text = Image.new('RGB', (box_height, box_width), (255, 255, 255))
+        draw_text = ImageDraw.Draw(img_text)
+        if txt:
+            font = create_font(txt, (box_height, box_width), font_path)
+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+        img_text = img_text.transpose(Image.ROTATE_270)
+    else:
+        img_text = Image.new('RGB', (box_width, box_height), (255, 255, 255))
+        draw_text = ImageDraw.Draw(img_text)
+        if txt:
+            font = create_font(txt, (box_width, box_height), font_path)
+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+    pts1 = np.float32([[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]])
+    pts2 = np.array(box, dtype=np.float32)
+    M = cv2.getPerspectiveTransform(pts1, pts2)
+    img_text = np.array(img_text, dtype=np.uint8)
+    img_right_text = cv2.warpPerspective(img_text, M, img_size,
+                                         flags=cv2.INTER_NEAREST,
+                                         borderMode=cv2.BORDER_CONSTANT,
+                                         borderValue=(255, 255, 255))
+    return img_right_text
+def create_font(txt, sz, font_path="./doc/fonts/simfang.ttf"):
+    font_size = int(sz[1] * 0.99)
+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+    length = font.getsize(txt)[0]
+    if length > sz[0]:
+        font_size = int(font_size * sz[0] / length)
+        font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+    return font
 def str_count(s):
    """
    Count the number of Chinese characters,