# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import cv2 import numpy as np def decode_image(im_file, im_info): """read rgb image Args: im_file (str|np.ndarray): input can be image path or np.ndarray im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ if isinstance(im_file, str): with open(im_file, 'rb') as f: im_read = f.read() data = np.frombuffer(im_read, dtype='uint8') im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) else: im = im_file im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) return im, im_info class Resize(object): """resize image by target_size and max_size Args: target_size (int): the target size of image keep_ratio (bool): whether keep_ratio or not, default true interp (int): method of resize """ def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): if isinstance(target_size, int): target_size = [target_size, target_size] self.target_size = target_size self.keep_ratio = keep_ratio self.interp = interp def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ assert len(self.target_size) == 2 assert self.target_size[0] > 0 and self.target_size[1] > 0 im_channel = im.shape[2] im_scale_y, im_scale_x = self.generate_scale(im) im = cv2.resize( im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') im_info['scale_factor'] = np.array( [im_scale_y, im_scale_x]).astype('float32') return im, im_info def generate_scale(self, im): """ Args: im (np.ndarray): image (np.ndarray) Returns: im_scale_x: the resize ratio of X im_scale_y: the resize ratio of Y """ origin_shape = im.shape[:2] im_c = im.shape[2] if self.keep_ratio: im_size_min = np.min(origin_shape) im_size_max = np.max(origin_shape) target_size_min = np.min(self.target_size) target_size_max = np.max(self.target_size) im_scale = float(target_size_min) / float(im_size_min) if np.round(im_scale * im_size_max) > target_size_max: im_scale = float(target_size_max) / float(im_size_max) im_scale_x = im_scale im_scale_y = im_scale else: resize_h, resize_w = self.target_size im_scale_y = resize_h / float(origin_shape[0]) im_scale_x = resize_w / float(origin_shape[1]) return im_scale_y, im_scale_x class NormalizeImage(object): """normalize image Args: mean (list): im - mean std (list): im / std is_scale (bool): whether need im / 255 is_channel_first (bool): if True: image shape is CHW, else: HWC """ def __init__(self, mean, std, is_scale=True): self.mean = mean self.std = std self.is_scale = is_scale def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im = im.astype(np.float32, copy=False) mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] if self.is_scale: im = im / 255.0 im -= mean im /= std return im, im_info class Permute(object): """permute image Args: to_bgr (bool): whether convert RGB to BGR channel_first (bool): whether convert HWC to CHW """ def __init__(self, ): super(Permute, self).__init__() def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im = im.transpose((2, 0, 1)).copy() return im, im_info class PadStride(object): """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config Args: stride (bool): model with FPN need image shape % stride == 0 """ def __init__(self, stride=0): self.coarsest_stride = stride def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ coarsest_stride = self.coarsest_stride if coarsest_stride <= 0: return im, im_info im_c, im_h, im_w = im.shape pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) padding_im[:, :im_h, :im_w] = im return padding_im, im_info class LetterBoxResize(object): def __init__(self, target_size): """ Resize image to target size, convert normalized xywh to pixel xyxy format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]). Args: target_size (int|list): image target size. """ super(LetterBoxResize, self).__init__() if isinstance(target_size, int): target_size = [target_size, target_size] self.target_size = target_size def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)): # letterbox: resize a rectangular image to a padded rectangular shape = img.shape[:2] # [height, width] ratio_h = float(height) / shape[0] ratio_w = float(width) / shape[1] ratio = min(ratio_h, ratio_w) new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # [width, height] padw = (width - new_shape[0]) / 2 padh = (height - new_shape[1]) / 2 top, bottom = round(padh - 0.1), round(padh + 0.1) left, right = round(padw - 0.1), round(padw + 0.1) img = cv2.resize( img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border img = cv2.copyMakeBorder( img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular return img, ratio, padw, padh def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ assert len(self.target_size) == 2 assert self.target_size[0] > 0 and self.target_size[1] > 0 height, width = self.target_size h, w = im.shape[:2] im, ratio, padw, padh = self.letterbox(im, height=height, width=width) new_shape = [round(h * ratio), round(w * ratio)] im_info['im_shape'] = np.array(new_shape, dtype=np.float32) im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32) return im, im_info class WarpAffine(object): """Warp affine the image """ def __init__(self, keep_res=False, pad=31, input_h=512, input_w=512, scale=0.4, shift=0.1): self.keep_res = keep_res self.pad = pad self.input_h = input_h self.input_w = input_w self.scale = scale self.shift = shift def _get_3rd_point(self, a, b): assert len( a) == 2, 'input of _get_3rd_point should be point with length of 2' assert len( b) == 2, 'input of _get_3rd_point should be point with length of 2' direction = a - b third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32) return third_pt def rotate_point(self, pt, angle_rad): """Rotate a point by an angle. Args: pt (list[float]): 2 dimensional point to be rotated angle_rad (float): rotation angle by radian Returns: list[float]: Rotated point. """ assert len(pt) == 2 sn, cs = np.sin(angle_rad), np.cos(angle_rad) new_x = pt[0] * cs - pt[1] * sn new_y = pt[0] * sn + pt[1] * cs rotated_pt = [new_x, new_y] return rotated_pt def get_affine_transform(self, center, input_size, rot, output_size, shift=(0., 0.), inv=False): """Get the affine transform matrix, given the center/scale/rot/output_size. Args: center (np.ndarray[2, ]): Center of the bounding box (x, y). input_size (np.ndarray[2, ]): Size of input feature (width, height). rot (float): Rotation angle (degree). output_size (np.ndarray[2, ]): Size of the destination heatmaps. shift (0-100%): Shift translation ratio wrt the width/height. Default (0., 0.). inv (bool): Option to inverse the affine transform direction. (inv=False: src->dst or inv=True: dst->src) Returns: np.ndarray: The transform matrix. """ assert len(center) == 2 assert len(output_size) == 2 assert len(shift) == 2 if not isinstance(input_size, (np.ndarray, list)): input_size = np.array([input_size, input_size], dtype=np.float32) scale_tmp = input_size shift = np.array(shift) src_w = scale_tmp[0] dst_w = output_size[0] dst_h = output_size[1] rot_rad = np.pi * rot / 180 src_dir = self.rotate_point([0., src_w * -0.5], rot_rad) dst_dir = np.array([0., dst_w * -0.5]) src = np.zeros((3, 2), dtype=np.float32) src[0, :] = center + scale_tmp * shift src[1, :] = center + src_dir + scale_tmp * shift src[2, :] = self._get_3rd_point(src[0, :], src[1, :]) dst = np.zeros((3, 2), dtype=np.float32) dst[0, :] = [dst_w * 0.5, dst_h * 0.5] dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir dst[2, :] = self._get_3rd_point(dst[0, :], dst[1, :]) if inv: trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) else: trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) return trans def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) h, w = img.shape[:2] if self.keep_res: input_h = (h | self.pad) + 1 input_w = (w | self.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) c = np.array([w // 2, h // 2], dtype=np.float32) else: s = max(h, w) * 1.0 input_h, input_w = self.input_h, self.input_w c = np.array([w / 2., h / 2.], dtype=np.float32) trans_input = self.get_affine_transform(c, s, 0, [input_w, input_h]) img = cv2.resize(img, (w, h)) inp = cv2.warpAffine( img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) return inp, im_info def preprocess(im, preprocess_ops): # process image by preprocess_ops im_info = { 'scale_factor': np.array( [1., 1.], dtype=np.float32), 'im_shape': None, } im, im_info = decode_image(im, im_info) for operator in preprocess_ops: im, im_info = operator(im, im_info) return im, im_info