# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import cv2 import numpy as np import imgaug.augmenters as iaa from keypoint_preprocess import get_affine_transform from PIL import Image def decode_image(im_file, im_info): """read rgb image Args: im_file (str|np.ndarray): input can be image path or np.ndarray im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ if isinstance(im_file, str): with open(im_file, 'rb') as f: im_read = f.read() data = np.frombuffer(im_read, dtype='uint8') im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) else: im = im_file im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) return im, im_info class Resize_Mult32(object): """resize image by target_size and max_size Args: target_size (int): the target size of image keep_ratio (bool): whether keep_ratio or not, default true interp (int): method of resize """ def __init__(self, limit_side_len, limit_type, interp=cv2.INTER_LINEAR): self.limit_side_len = limit_side_len self.limit_type = limit_type self.interp = interp def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im_channel = im.shape[2] im_scale_y, im_scale_x = self.generate_scale(im) im = cv2.resize( im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') im_info['scale_factor'] = np.array( [im_scale_y, im_scale_x]).astype('float32') return im, im_info def generate_scale(self, img): """ Args: img (np.ndarray): image (np.ndarray) Returns: im_scale_x: the resize ratio of X im_scale_y: the resize ratio of Y """ limit_side_len = self.limit_side_len h, w, c = img.shape # limit the max side if self.limit_type == 'max': if h > w: ratio = float(limit_side_len) / h else: ratio = float(limit_side_len) / w elif self.limit_type == 'min': if h < w: ratio = float(limit_side_len) / h else: ratio = float(limit_side_len) / w elif self.limit_type == 'resize_long': ratio = float(limit_side_len) / max(h, w) else: raise Exception('not support limit type, image ') resize_h = int(h * ratio) resize_w = int(w * ratio) resize_h = max(int(round(resize_h / 32) * 32), 32) resize_w = max(int(round(resize_w / 32) * 32), 32) im_scale_y = resize_h / float(h) im_scale_x = resize_w / float(w) return im_scale_y, im_scale_x class Resize(object): """resize image by target_size and max_size Args: target_size (int): the target size of image keep_ratio (bool): whether keep_ratio or not, default true interp (int): method of resize """ def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): if isinstance(target_size, int): target_size = [target_size, target_size] self.target_size = target_size self.keep_ratio = keep_ratio self.interp = interp def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ assert len(self.target_size) == 2 assert self.target_size[0] > 0 and self.target_size[1] > 0 im_channel = im.shape[2] im_scale_y, im_scale_x = self.generate_scale(im) im = cv2.resize( im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') im_info['scale_factor'] = np.array( [im_scale_y, im_scale_x]).astype('float32') return im, im_info def generate_scale(self, im): """ Args: im (np.ndarray): image (np.ndarray) Returns: im_scale_x: the resize ratio of X im_scale_y: the resize ratio of Y """ origin_shape = im.shape[:2] im_c = im.shape[2] if self.keep_ratio: im_size_min = np.min(origin_shape) im_size_max = np.max(origin_shape) target_size_min = np.min(self.target_size) target_size_max = np.max(self.target_size) im_scale = float(target_size_min) / float(im_size_min) if np.round(im_scale * im_size_max) > target_size_max: im_scale = float(target_size_max) / float(im_size_max) im_scale_x = im_scale im_scale_y = im_scale else: resize_h, resize_w = self.target_size im_scale_y = resize_h / float(origin_shape[0]) im_scale_x = resize_w / float(origin_shape[1]) return im_scale_y, im_scale_x class ShortSizeScale(object): """ Scale images by short size. Args: short_size(float | int): Short size of an image will be scaled to the short_size. fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True do_round(bool): Whether to round up when calculating the zoom ratio. default: False backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow' """ def __init__(self, short_size, fixed_ratio=True, keep_ratio=None, do_round=False, backend='pillow'): self.short_size = short_size assert (fixed_ratio and not keep_ratio) or ( not fixed_ratio ), "fixed_ratio and keep_ratio cannot be true at the same time" self.fixed_ratio = fixed_ratio self.keep_ratio = keep_ratio self.do_round = do_round assert backend in [ 'pillow', 'cv2' ], "Scale's backend must be pillow or cv2, but get {backend}" self.backend = backend def __call__(self, img): """ Performs resize operations. Args: img (PIL.Image): a PIL.Image. return: resized_img: a PIL.Image after scaling. """ result_img = None if isinstance(img, np.ndarray): h, w, _ = img.shape elif isinstance(img, Image.Image): w, h = img.size else: raise NotImplementedError if w <= h: ow = self.short_size if self.fixed_ratio: # default is True oh = int(self.short_size * 4.0 / 3.0) elif not self.keep_ratio: # no oh = self.short_size else: scale_factor = self.short_size / w oh = int(h * float(scale_factor) + 0.5) if self.do_round else int(h * self.short_size / w) ow = int(w * float(scale_factor) + 0.5) if self.do_round else int(w * self.short_size / h) else: oh = self.short_size if self.fixed_ratio: ow = int(self.short_size * 4.0 / 3.0) elif not self.keep_ratio: # no ow = self.short_size else: scale_factor = self.short_size / h oh = int(h * float(scale_factor) + 0.5) if self.do_round else int(h * self.short_size / w) ow = int(w * float(scale_factor) + 0.5) if self.do_round else int(w * self.short_size / h) if type(img) == np.ndarray: img = Image.fromarray(img, mode='RGB') if self.backend == 'pillow': result_img = img.resize((ow, oh), Image.BILINEAR) elif self.backend == 'cv2' and (self.keep_ratio is not None): result_img = cv2.resize( img, (ow, oh), interpolation=cv2.INTER_LINEAR) else: result_img = Image.fromarray( cv2.resize( np.asarray(img), (ow, oh), interpolation=cv2.INTER_LINEAR)) return result_img class NormalizeImage(object): """normalize image Args: mean (list): im - mean std (list): im / std is_scale (bool): whether need im / 255 norm_type (str): type in ['mean_std', 'none'] """ def __init__(self, mean, std, is_scale=True, norm_type='mean_std'): self.mean = mean self.std = std self.is_scale = is_scale self.norm_type = norm_type def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im = im.astype(np.float32, copy=False) if self.is_scale: scale = 1.0 / 255.0 im *= scale if self.norm_type == 'mean_std': mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] im -= mean im /= std return im, im_info class Permute(object): """permute image Args: to_bgr (bool): whether convert RGB to BGR channel_first (bool): whether convert HWC to CHW """ def __init__(self, ): super(Permute, self).__init__() def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im = im.transpose((2, 0, 1)).copy() return im, im_info class PadStride(object): """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config Args: stride (bool): model with FPN need image shape % stride == 0 """ def __init__(self, stride=0): self.coarsest_stride = stride def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ coarsest_stride = self.coarsest_stride if coarsest_stride <= 0: return im, im_info im_c, im_h, im_w = im.shape pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) padding_im[:, :im_h, :im_w] = im return padding_im, im_info class LetterBoxResize(object): def __init__(self, target_size): """ Resize image to target size, convert normalized xywh to pixel xyxy format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]). Args: target_size (int|list): image target size. """ super(LetterBoxResize, self).__init__() if isinstance(target_size, int): target_size = [target_size, target_size] self.target_size = target_size def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)): # letterbox: resize a rectangular image to a padded rectangular shape = img.shape[:2] # [height, width] ratio_h = float(height) / shape[0] ratio_w = float(width) / shape[1] ratio = min(ratio_h, ratio_w) new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # [width, height] padw = (width - new_shape[0]) / 2 padh = (height - new_shape[1]) / 2 top, bottom = round(padh - 0.1), round(padh + 0.1) left, right = round(padw - 0.1), round(padw + 0.1) img = cv2.resize( img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border img = cv2.copyMakeBorder( img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular return img, ratio, padw, padh def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ assert len(self.target_size) == 2 assert self.target_size[0] > 0 and self.target_size[1] > 0 height, width = self.target_size h, w = im.shape[:2] im, ratio, padw, padh = self.letterbox(im, height=height, width=width) new_shape = [round(h * ratio), round(w * ratio)] im_info['im_shape'] = np.array(new_shape, dtype=np.float32) im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32) return im, im_info class Pad(object): def __init__(self, size, fill_value=[114.0, 114.0, 114.0]): """ Pad image to a specified size. Args: size (list[int]): image target size fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0) """ super(Pad, self).__init__() if isinstance(size, int): size = [size, size] self.size = size self.fill_value = fill_value def __call__(self, im, im_info): im_h, im_w = im.shape[:2] h, w = self.size if h == im_h and w == im_w: im = im.astype(np.float32) return im, im_info canvas = np.ones((h, w, 3), dtype=np.float32) canvas *= np.array(self.fill_value, dtype=np.float32) canvas[0:im_h, 0:im_w, :] = im.astype(np.float32) im = canvas return im, im_info class WarpAffine(object): """Warp affine the image """ def __init__(self, keep_res=False, pad=31, input_h=512, input_w=512, scale=0.4, shift=0.1, down_ratio=4): self.keep_res = keep_res self.pad = pad self.input_h = input_h self.input_w = input_w self.scale = scale self.shift = shift self.down_ratio = down_ratio def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) h, w = img.shape[:2] if self.keep_res: # True in detection eval/infer input_h = (h | self.pad) + 1 input_w = (w | self.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) c = np.array([w // 2, h // 2], dtype=np.float32) else: # False in centertrack eval_mot/eval_mot s = max(h, w) * 1.0 input_h, input_w = self.input_h, self.input_w c = np.array([w / 2., h / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) img = cv2.resize(img, (w, h)) inp = cv2.warpAffine( img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) if not self.keep_res: out_h = input_h // self.down_ratio out_w = input_w // self.down_ratio trans_output = get_affine_transform(c, s, 0, [out_w, out_h]) im_info.update({ 'center': c, 'scale': s, 'out_height': out_h, 'out_width': out_w, 'inp_height': input_h, 'inp_width': input_w, 'trans_input': trans_input, 'trans_output': trans_output, }) return inp, im_info class CULaneResize(object): def __init__(self, img_h, img_w, cut_height, prob=0.5): super(CULaneResize, self).__init__() self.img_h = img_h self.img_w = img_w self.cut_height = cut_height self.prob = prob def __call__(self, im, im_info): # cut im = im[self.cut_height:, :, :] # resize transform = iaa.Sometimes(self.prob, iaa.Resize({ "height": self.img_h, "width": self.img_w })) im = transform(image=im.copy().astype(np.uint8)) im = im.astype(np.float32) / 255. # check transpose is need whether the func decode_image is equal to CULaneDataSet cv.imread im = im.transpose(2, 0, 1) return im, im_info def preprocess(im, preprocess_ops): # process image by preprocess_ops im_info = { 'scale_factor': np.array( [1., 1.], dtype=np.float32), 'im_shape': None, } im, im_info = decode_image(im, im_info) for operator in preprocess_ops: im, im_info = operator(im, im_info) return im, im_info