[cherry-pick] fix_textbbox in ImageDraw (#8580)

* [BugFix] fix_textbbox in ImageDraw cherry-pick from #8455: * fix_textbbox * compact_py37 * update_req * support to chinese in draw_bbox --------- Co-authored-by: N shiyutang <34859558+shiyutang@users.noreply.github.com>

[cherry-pick] fix_textbbox in ImageDraw (#8580)
* [BugFix] fix_textbbox in ImageDraw cherry-pick from #8455: * fix_textbbox * compact_py37 * update_req * support to chinese in draw_bbox --------- Co-authored-by: N shiyutang <34859558+shiyutang@users.noreply.github.com>
0da52bfb · Tingquan Gao · GitHub · c48d6bfb · 0da52bfb · 0da52bfb
8 changed file
--- a/deploy/pptracking/python/mot/visualize.py
+++ b/deploy/pptracking/python/mot/visualize.py
@@ -20,6 +20,8 @@ import numpy as np
 from PIL import Image, ImageDraw, ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 from collections import deque
+from ppdet.utils.compact import imagedraw_textsize_c
 def visualize_box_mask(im, results, labels, threshold=0.5):
@@ -109,7 +111,7 @@ def draw_box(im, np_boxes, labels, threshold=0.5):
        # draw label
        text = "{} {:.4f}".format(labels[clsid], score)
-        tw, th = draw.textsize(text)
+        tw, th = imagedraw_textsize_c(draw, text)
        draw.rectangle(
            [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
        draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
@@ -202,7 +204,7 @@ def plot_tracking_dict(image,
    im = np.ascontiguousarray(np.copy(image))
    im_h, im_w = im.shape[:2]
    if do_break_in_counting or do_illegal_parking_recognition:
-        entrance = np.array(entrance[:-1])  # last pair is [im_w, im_h] 
+        entrance = np.array(entrance[:-1])  # last pair is [im_w, im_h]
    text_scale = max(0.5, image.shape[1] / 3000.)
    text_thickness = 2

--- a/deploy/python/visualize.py
+++ b/deploy/python/visualize.py
@@ -20,6 +20,7 @@ import numpy as np
 from PIL import Image, ImageDraw, ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 import math
+from ppdet.utils.compact import imagedraw_textsize_c
 def visualize_box_mask(im, results, labels, threshold=0.5):
@@ -159,7 +160,7 @@ def draw_box(im, np_boxes, labels, threshold=0.5):
        # draw label
        text = "{} {:.4f}".format(labels[clsid], score)
-        tw, th = draw.textsize(text)
+        tw, th = imagedraw_textsize_c(draw, text)
        draw.rectangle(
            [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
        draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
@@ -389,7 +390,7 @@ def visualize_action(im,
                id_action_dict[pid] = id_action_dict.get(pid, [])
                id_action_dict[pid].append(action_type)
        for mot_box in mot_boxes:
-            # mot_box is a format with [mot_id, class, score, xmin, ymin, w, h] 
+            # mot_box is a format with [mot_id, class, score, xmin, ymin, w, h]
            if mot_box[0] in id_action_dict:
                text_position = (int(mot_box[3] + mot_box[5] * 0.75),
                                 int(mot_box[4] - 10))
@@ -497,7 +498,7 @@ def draw_press_box_lanes(im, np_boxes, labels, threshold=0.5):
        # draw label
        text = "{}".format(labels[clsid])
-        tw, th = draw.textsize(text)
+        tw, th = imagedraw_textsize_c(draw, text)
        draw.rectangle(
            [(xmin + 1, ymax - th), (xmin + tw + 1, ymax)], fill=color)
        draw.text((xmin + 1, ymax - th), text, fill=(0, 0, 255))
@@ -570,7 +571,7 @@ def visualize_vehicle_retrograde(im, mot_res, vehicle_retrograde_res):
            # draw label
            text = "retrograde"
-            tw, th = draw.textsize(text)
+            tw, th = imagedraw_textsize_c(draw, text)
            draw.rectangle(
                [(xmax + 1, ymin - th), (xmax + tw + 1, ymin)],
                fill=(0, 255, 0))
@@ -636,4 +637,4 @@ def imshow_lanes(img, lanes, show=False, out_file=None, width=4):
    if out_file:
        if not os.path.exists(os.path.dirname(out_file)):
            os.makedirs(os.path.dirname(out_file))
        cv2.imwrite(out_file, img)
\ No newline at end of file
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -51,6 +51,8 @@ from .op_helper import (satisfy_sample_constraint, filter_and_process,
                        is_poly, get_border)
 from ppdet.utils.logger import setup_logger
+from ppdet.utils.compact import imagedraw_textsize_c
 from ppdet.modeling.keypoint_utils import get_affine_transform, affine_transform
 logger = setup_logger(__name__)
@@ -838,7 +840,7 @@ class RandomFlip(BaseOperator):
 class Resize(BaseOperator):
    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
        """
-        Resize image to target size. if keep_ratio is True, 
+        Resize image to target size. if keep_ratio is True,
        resize the image's long side to the maximum of target_size
        if keep_ratio is False, resize the image to target size(h, w)
        Args:
@@ -1108,7 +1110,7 @@ class RandomResize(BaseOperator):
            target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
            keep_ratio (bool): whether keep_raio or not, default true
            interp (int): the interpolation method
-            random_range (bool): whether random select target size of image, the target_size must be 
+            random_range (bool): whether random select target size of image, the target_size must be
                a [[min_short_edge, long_edge], [max_short_edge, long_edge]]
            random_size (bool): whether random select target size of image
            random_interp (bool): whether random select interpolation method
@@ -1903,7 +1905,7 @@ class RandomScaledCrop(BaseOperator):
 @register_op
 class Cutmix(BaseOperator):
    def __init__(self, alpha=1.5, beta=1.5):
-        """ 
+        """
        CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
        Cutmix image and gt_bbbox/gt_score
        Args:
@@ -2212,7 +2214,7 @@ class DebugVisibleImage(BaseOperator):
                fill='green')
            # draw label
            text = str(gt_class[i][0])
-            tw, th = draw.textsize(text)
+            tw, th = imagedraw_textsize_c(draw, text)
            draw.rectangle(
                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
@@ -2417,7 +2419,7 @@ class Poly2Mask(BaseOperator):
 @register_op
 class AugmentHSV(BaseOperator):
-    """ 
+    """
    Augment the SV channel of image data.
    Args:
        fraction (float): the fraction for augment. Default: 0.5.
@@ -2531,7 +2533,7 @@ class RandomResizeCrop(BaseOperator):
        'long', resize the image's long side to the maximum of target_size, if keep_ratio is
        True and mode is 'short', resize the image's short side to the minimum of target_size.
        cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
-        mode (str): resize mode, `long` or `short`. Details see resizes. 
+        mode (str): resize mode, `long` or `short`. Details see resizes.
        prob (float): probability of this op.
        keep_ratio (bool): whether keep_ratio or not, default true
        interp (int): the interpolation method

--- a/ppdet/data/transform/rotated_operators.py
+++ b/ppdet/data/transform/rotated_operators.py
@@ -31,6 +31,7 @@ import copy
 from .operators import register_op, BaseOperator
 from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
 from ppdet.utils.logger import setup_logger
+from ppdet.utils.compact import imagedraw_textsize_c
 logger = setup_logger(__name__)
@@ -107,7 +108,7 @@ class RRotate(BaseOperator):
        matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
        sample['image'] = self.apply_image(image, matrix, h, w)
        polys = sample['gt_poly']
-        # TODO: segment or keypoint to be processed 
+        # TODO: segment or keypoint to be processed
        if len(polys) > 0:
            pts = self.apply_pts(polys, matrix, h, w)
            sample['gt_poly'] = pts
@@ -257,7 +258,7 @@ class Poly2Array(BaseOperator):
 class RResize(BaseOperator):
    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
        """
-        Resize image to target size. if keep_ratio is True, 
+        Resize image to target size. if keep_ratio is True,
        resize the image's long side to the maximum of target_size
        if keep_ratio is False, resize the image to target size(h, w)
        Args:
@@ -433,7 +434,7 @@ class VisibleRBox(BaseOperator):
            xmin = min(x1, x2, x3, x4)
            ymin = min(y1, y2, y3, y4)
            text = str(gt_class[i][0])
-            tw, th = draw.textsize(text)
+            tw, th = imagedraw_textsize_c(draw, text)
            draw.rectangle(
                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))

--- a/ppdet/utils/compact.py
+++ b/ppdet/utils/compact.py
+import PIL
+def imagedraw_textsize_c(draw, text, font=None):
+    if int(PIL.__version__.split('.')[0]) < 10:
+        tw, th = draw.textsize(text, font=font)
+    else:
+        left, top, right, bottom = draw.textbbox((0, 0), text, font=font)
+        tw, th = right - left, bottom - top
+    return tw, th
--- a/ppdet/utils/download.py
+++ b/ppdet/utils/download.py
@@ -237,7 +237,7 @@ def create_voc_list(data_dir, devkit_subdir='VOCdevkit'):
    years = ['2007', '2012']
    # NOTE: since using auto download VOC
-    # dataset, VOC default label list should be used, 
+    # dataset, VOC default label list should be used,
    # do not generate label_list.txt here. For default
    # label, see ../data/source/voc.py
    create_list(devkit_dir, years, data_dir)
@@ -296,7 +296,7 @@ def get_path(url, root_dir, md5sum=None, check_exist=True):
    # new weights format which postfix is 'pdparams' not
    # need to decompress
-    if osp.splitext(fullname)[-1] not in ['.pdparams', '.yml']:
+    if osp.splitext(fullname)[-1] not in ['.pdparams', '.yml', '.ttf']:
        _decompress_dist(fullname)
    return fullpath, False
@@ -411,7 +411,7 @@ def _download_dist(url, path, md5sum=None):
            must_mkdirs(path)
            if not osp.exists(fullname):
-                with open(lock_path, 'w'):  # touch    
+                with open(lock_path, 'w'):  # touch
                    os.utime(lock_path, None)
                if rank_id_curr_node == 0:
                    _download(url, path, md5sum)
@@ -425,7 +425,7 @@ def _download_dist(url, path, md5sum=None):
 def _check_exist_file_md5(filename, md5sum, url):
-    # if md5sum is None, and file to check is weights file, 
+    # if md5sum is None, and file to check is weights file,
    # read md5um from url and check, else check md5sum directly
    return _md5check_from_url(filename, url) if md5sum is None \
            and filename.endswith('pdparams') \
@@ -525,7 +525,7 @@ def _decompress_dist(fname):
            # trainer pipeline in order
            # **change this if you have more elegent methods**
            if ParallelEnv().current_endpoint in unique_endpoints:
-                with open(lock_path, 'w'):  # touch    
+                with open(lock_path, 'w'):  # touch
                    os.utime(lock_path, None)
                _decompress(fname)
                os.remove(lock_path)

--- a/ppdet/utils/visualizer.py
+++ b/ppdet/utils/visualizer.py
@@ -17,13 +17,16 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
+import os
 import numpy as np
-from PIL import Image, ImageDraw
+from PIL import Image, ImageDraw, ImageFont
 import cv2
 import math
 from .colormap import colormap
 from ppdet.utils.logger import setup_logger
+from ppdet.utils.compact import imagedraw_textsize_c
+from ppdet.utils.download import get_path
 logger = setup_logger(__name__)
 __all__ = ['visualize_results']
@@ -85,6 +88,11 @@ def draw_bbox(image, im_id, catid2name, bboxes, threshold):
    """
    Draw bbox on image
    """
+    font_url = "https://paddledet.bj.bcebos.com/simfang.ttf"
+    font_path , _ = get_path(font_url, "~/.cache/paddle/")
+    font_size = 18
+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
    draw = ImageDraw.Draw(image)
    catid2color = {}
@@ -125,10 +133,10 @@ def draw_bbox(image, im_id, catid2name, bboxes, threshold):
        # draw label
        text = "{} {:.2f}".format(catid2name[catid], score)
-        tw, th = draw.textsize(text)
+        tw, th = imagedraw_textsize_c(draw, text, font=font)
        draw.rectangle(
            [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
-        draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+        draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255), font=font)
    return image

--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ terminaltables
 Cython
 pycocotools
 setuptools
-Pillow <= 9.5.0
+Pillow
 # for MOT evaluation and inference
 lap
@@ -21,4 +21,4 @@ sklearn==0.0
 pyclipper
 # for culane data augumetation
 imgaug>=0.4.0
\ No newline at end of file