# -*- coding:utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function import math from PIL import Image, ImageDraw, ImageFont import base64 import cv2 import numpy as np def draw_ocr(image, boxes, txts, scores, font_file, draw_txt=True, drop_score=0.5): """ Visualize the results of OCR detection and recognition args: image(Image|array): RGB image boxes(list): boxes with shape(N, 4, 2) txts(list): the texts scores(list): txxs corresponding scores draw_txt(bool): whether draw text or not drop_score(float): only scores greater than drop_threshold will be visualized return(array): the visualized img """ if scores is None: scores = [1] * len(boxes) for (box, score) in zip(boxes, scores): if score < drop_score or math.isnan(score): continue box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) if draw_txt: img = np.array(resize_img(image, input_size=600)) txt_img = text_visual( txts, scores, font_file, img_h=img.shape[0], img_w=600, threshold=drop_score) img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) return img return image def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): """ create new blank img and draw txt on it args: texts(list): the text will be draw scores(list|None): corresponding score of each txt img_h(int): the height of blank img img_w(int): the width of blank img return(array): """ if scores is not None: assert len(texts) == len( scores), "The number of txts and corresponding scores must match" def create_blank_img(): blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 blank_img[:, img_w - 1:] = 0 blank_img = Image.fromarray(blank_img).convert("RGB") draw_txt = ImageDraw.Draw(blank_img) return blank_img, draw_txt blank_img, draw_txt = create_blank_img() font_size = 20 txt_color = (0, 0, 0) font = ImageFont.truetype(font_file, font_size, encoding="utf-8") gap = font_size + 5 txt_img_list = [] count, index = 1, 0 for idx, txt in enumerate(texts): index += 1 if scores[idx] < threshold or math.isnan(scores[idx]): index -= 1 continue first_line = True while str_count(txt) >= img_w // font_size - 4: tmp = txt txt = tmp[:img_w // font_size - 4] if first_line: new_txt = str(index) + ': ' + txt first_line = False else: new_txt = ' ' + txt draw_txt.text((0, gap * count), new_txt, txt_color, font=font) txt = tmp[img_w // font_size - 4:] if count >= img_h // gap - 1: txt_img_list.append(np.array(blank_img)) blank_img, draw_txt = create_blank_img() count = 0 count += 1 if first_line: new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) else: new_txt = " " + txt + " " + '%.3f' % (scores[idx]) draw_txt.text((0, gap * count), new_txt, txt_color, font=font) # whether add new blank img or not if count >= img_h // gap - 1 and idx + 1 < len(texts): txt_img_list.append(np.array(blank_img)) blank_img, draw_txt = create_blank_img() count = 0 count += 1 txt_img_list.append(np.array(blank_img)) if len(txt_img_list) == 1: blank_img = np.array(txt_img_list[0]) else: blank_img = np.concatenate(txt_img_list, axis=1) return np.array(blank_img) def str_count(s): """ Count the number of Chinese characters, a single English character and a single number equal to half the length of Chinese characters. args: s(string): the input of string return(int): the number of Chinese characters """ import string count_zh = count_pu = 0 s_len = len(s) en_dg_count = 0 for c in s: if c in string.ascii_letters or c.isdigit() or c.isspace(): en_dg_count += 1 elif c.isalpha(): count_zh += 1 else: count_pu += 1 return s_len - math.ceil(en_dg_count / 2) def resize_img(img, input_size=600): img = np.array(img) im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(input_size) / float(im_size_max) im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) return im def get_image_ext(image): if image.shape[2] == 4: return ".png" return ".jpg" def sorted_boxes(dt_boxes): """ Sort text boxes in order from top to bottom, left to right args: dt_boxes(array):detected text boxes with shape [4, 2] return: sorted boxes(array) with shape [4, 2] """ num_boxes = dt_boxes.shape[0] sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) _boxes = list(sorted_boxes) for i in range(num_boxes - 1): if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ (_boxes[i + 1][0][0] < _boxes[i][0][0]): tmp = _boxes[i] _boxes[i] = _boxes[i + 1] _boxes[i + 1] = tmp return _boxes def base64_to_cv2(b64str): data = base64.b64decode(b64str.encode('utf8')) data = np.fromstring(data, np.uint8) data = cv2.imdecode(data, cv2.IMREAD_COLOR) return data