import os import numpy as np import cv2 import time def visualize_e2e_result(im_fn, poly_list, seq_strs, src_im): """ """ result_path = './out' im_basename = os.path.basename(im_fn) im_prefix = im_basename[:im_basename.rfind('.')] vis_det_img = src_im.copy() valid_set = 'partvgg' gt_dir = "/Users/hongyongjie/Downloads/part_vgg_synth/train" text_path = os.path.join(gt_dir, im_prefix + '.txt') fid = open(text_path, 'r') lines = [line.strip() for line in fid.readlines()] for line in lines: if valid_set == 'partvgg': tokens = line.strip().split('\t')[0].split(',') # tokens = line.strip().split(',') coords = tokens[:] coords = list(map(float, coords)) gt_poly = np.array(coords).reshape(1, 4, 2) elif valid_set == 'totaltext': tokens = line.strip().split('\t')[0].split(',') coords = tokens[:] coords_len = len(coords) / 2 coords = list(map(float, coords)) gt_poly = np.array(coords).reshape(1, coords_len, 2) cv2.polylines( vis_det_img, np.array(gt_poly).astype(np.int32), isClosed=True, color=(255, 0, 0), thickness=2) for detected_poly, recognized_str in zip(poly_list, seq_strs): cv2.polylines( vis_det_img, np.array(detected_poly[np.newaxis, ...]).astype(np.int32), isClosed=True, color=(0, 0, 255), thickness=2) cv2.putText( vis_det_img, recognized_str, org=(int(detected_poly[0, 0]), int(detected_poly[0, 1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.7, color=(0, 255, 0), thickness=1) if not os.path.exists(result_path): os.makedirs(result_path) cv2.imwrite("{}/{}_detection.jpg".format(result_path, im_prefix), vis_det_img) def visualization_output(src_image, f_tcl, f_chars, output_dir, image_prefix=None): """ """ # restore BGR image, CHW -> HWC im_mean = [0.485, 0.456, 0.406] im_std = [0.229, 0.224, 0.225] im_mean = np.array(im_mean).reshape((3, 1, 1)) im_std = np.array(im_std).reshape((3, 1, 1)) src_image *= im_std src_image += im_mean src_image = src_image.transpose([1, 2, 0]) src_image = src_image[:, :, ::-1] * 255 # BGR -> RGB H, W, _ = src_image.shape file_prefix = image_prefix if image_prefix is not None else str( int(time.time() * 1000)) if not os.path.exists(output_dir): os.makedirs(output_dir) # visualization f_tcl tcl_file_name = os.path.join(output_dir, file_prefix + '_0_tcl.jpg') vis_tcl_img = src_image.copy() f_tcl_resized = cv2.resize(f_tcl, dsize=(W, H)) vis_tcl_img[:, :, 1] = f_tcl_resized * 255 cv2.imwrite(tcl_file_name, vis_tcl_img) # visualization char maps vis_char_img = src_image.copy() # CHW -> HWC char_file_name = os.path.join(output_dir, file_prefix + '_1_chars.jpg') f_chars = np.argmax(f_chars, axis=2)[:, :, np.newaxis].astype('float32') f_chars[f_chars < 95] = 1.0 f_chars[f_chars == 95] = 0.0 f_chars_resized = cv2.resize(f_chars, dsize=(W, H)) vis_char_img[:, :, 1] = f_chars_resized * 255 cv2.imwrite(char_file_name, vis_char_img) def visualize_point_result(im_fn, point_list, point_pair_list, src_im, gt_dir, result_path): """ """ im_basename = os.path.basename(im_fn) im_prefix = im_basename[:im_basename.rfind('.')] vis_det_img = src_im.copy() # draw gt bbox on the image. text_path = os.path.join(gt_dir, im_prefix + '.txt') fid = open(text_path, 'r') lines = [line.strip() for line in fid.readlines()] for line in lines: tokens = line.strip().split('\t') coords = tokens[0].split(',') coords_len = len(coords) coords = list(map(float, coords)) gt_poly = np.array(coords).reshape(1, coords_len / 2, 2) cv2.polylines( vis_det_img, np.array(gt_poly).astype(np.int32), isClosed=True, color=(255, 255, 255), thickness=1) for point, point_pair in zip(point_list, point_pair_list): cv2.line( vis_det_img, tuple(point_pair[0]), tuple(point_pair[1]), (0, 255, 255), thickness=1) cv2.circle(vis_det_img, tuple(point), 2, (0, 0, 255)) cv2.circle(vis_det_img, tuple(point_pair[0]), 2, (255, 0, 0)) cv2.circle(vis_det_img, tuple(point_pair[1]), 2, (0, 255, 0)) if not os.path.exists(result_path): os.makedirs(result_path) cv2.imwrite("{}/{}_border_points.jpg".format(result_path, im_prefix), vis_det_img) def resize_image(im, max_side_len=512): """ resize image to a size multiple of max_stride which is required by the network :param im: the resized image :param max_side_len: limit of max image size to avoid out of memory in gpu :return: the resized image and the resize ratio """ h, w, _ = im.shape resize_w = w resize_h = h # Fix the longer side if resize_h > resize_w: ratio = float(max_side_len) / resize_h else: ratio = float(max_side_len) / resize_w resize_h = int(resize_h * ratio) resize_w = int(resize_w * ratio) max_stride = 128 resize_h = (resize_h + max_stride - 1) // max_stride * max_stride resize_w = (resize_w + max_stride - 1) // max_stride * max_stride im = cv2.resize(im, (int(resize_w), int(resize_h))) ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) return im, (ratio_h, ratio_w) def resize_image_min(im, max_side_len=512): """ """ print('--> Using resize_image_min') h, w, _ = im.shape resize_w = w resize_h = h # Fix the longer side if resize_h < resize_w: ratio = float(max_side_len) / resize_h else: ratio = float(max_side_len) / resize_w resize_h = int(resize_h * ratio) resize_w = int(resize_w * ratio) max_stride = 128 resize_h = (resize_h + max_stride - 1) // max_stride * max_stride resize_w = (resize_w + max_stride - 1) // max_stride * max_stride im = cv2.resize(im, (int(resize_w), int(resize_h))) ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) return im, (ratio_h, ratio_w) def resize_image_for_totaltext(im, max_side_len=512): """ """ h, w, _ = im.shape resize_w = w resize_h = h ratio = 1.25 if h * ratio > max_side_len: ratio = float(max_side_len) / resize_h # Fix the longer side # if resize_h > resize_w: # ratio = float(max_side_len) / resize_h # else: # ratio = float(max_side_len) / resize_w ### resize_h = int(resize_h * ratio) resize_w = int(resize_w * ratio) max_stride = 128 resize_h = (resize_h + max_stride - 1) // max_stride * max_stride resize_w = (resize_w + max_stride - 1) // max_stride * max_stride im = cv2.resize(im, (int(resize_w), int(resize_h))) ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) return im, (ratio_h, ratio_w) def point_pair2poly(point_pair_list): """ Transfer vertical point_pairs into poly point in clockwise. """ pair_length_list = [] for point_pair in point_pair_list: pair_length = np.linalg.norm(point_pair[0] - point_pair[1]) pair_length_list.append(pair_length) pair_length_list = np.array(pair_length_list) pair_info = (pair_length_list.max(), pair_length_list.min(), pair_length_list.mean()) # constract poly point_num = len(point_pair_list) * 2 point_list = [0] * point_num for idx, point_pair in enumerate(point_pair_list): point_list[idx] = point_pair[0] point_list[point_num - 1 - idx] = point_pair[1] return np.array(point_list).reshape(-1, 2), pair_info def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): """ Generate shrink_quad_along_width. """ ratio_pair = np.array( [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): """ expand poly along width. """ point_num = poly.shape[0] left_quad = np.array( [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) right_quad = np.array( [ poly[point_num // 2 - 2], poly[point_num // 2 - 1], poly[point_num // 2], poly[point_num // 2 + 1] ], dtype=np.float32) right_ratio = 1.0 + \ shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) poly[0] = left_quad_expand[0] poly[-1] = left_quad_expand[-1] poly[point_num // 2 - 1] = right_quad_expand[1] poly[point_num // 2] = right_quad_expand[2] return poly def norm2(x, axis=None): if axis: return np.sqrt(np.sum(x**2, axis=axis)) return np.sqrt(np.sum(x**2)) def cos(p1, p2): return (p1 * p2).sum() / (norm2(p1) * norm2(p2)) def generate_direction_info(image_fn, H, W, ratio_h, ratio_w, max_length=640, out_scale=4, gt_dir=None): """ """ im_basename = os.path.basename(image_fn) im_prefix = im_basename[:im_basename.rfind('.')] instance_direction_map = np.zeros(shape=[H // out_scale, W // out_scale, 3]) if gt_dir is None: gt_dir = '/home/vis/huangzuming/data/SYNTH_DATA/part_vgg_synth_icdar/processed/val/poly' # get gt label map text_path = os.path.join(gt_dir, im_prefix + '.txt') fid = open(text_path, 'r') lines = [line.strip() for line in fid.readlines()] for label_idx, line in enumerate(lines, start=1): coords, txt = line.strip().split('\t') if txt == '###': continue tokens = coords.strip().split(',') coords = list(map(float, tokens)) poly = np.array(coords).reshape(4, 2) * np.array( [ratio_w, ratio_h]).reshape(1, 2) / out_scale mid_idx = poly.shape[0] // 2 direct_vector = ( (poly[mid_idx] + poly[mid_idx - 1]) - (poly[0] + poly[-1])) / 2.0 direct_vector /= len(txt) # l2_distance = norm2(direct_vector) # avg_char_distance = l2_distance / len(txt) avg_char_distance = 1.0 direct_label = (direct_vector[0], direct_vector[1], avg_char_distance) cv2.fillPoly(instance_direction_map, poly.round().astype(np.int32)[np.newaxis, :, :], direct_label) instance_direction_map = instance_direction_map.transpose([2, 0, 1]) return instance_direction_map[:2, ...]