import numpy as np from PIL import Image import random def read_image(path, dtype=np.float32, color=True): """Read an image from a file. This function reads an image from given file. The image is CHW format and the range of its value is :math:`[0, 255]`. If :obj:`color = True`, the order of the channels is RGB. Args: path (str): A path of image file. dtype: The type of array. The default value is :obj:`~numpy.float32`. color (bool): This option determines the number of channels. If :obj:`True`, the number of channels is three. In this case, the order of the channels is RGB. This is the default behaviour. If :obj:`False`, this function returns a grayscale image. Returns: ~numpy.ndarray: An image. """ f = Image.open(path) try: if color: img = f.convert('RGB') else: img = f.convert('P') img = np.asarray(img, dtype=dtype) finally: if hasattr(f, 'close'): f.close() if img.ndim == 2: # reshape (H, W) -> (1, H, W) return img[np.newaxis] else: # transpose (H, W, C) -> (C, H, W) return img.transpose((2, 0, 1)) def resize_bbox(bbox, in_size, out_size): """Resize bounding boxes according to image resize. The bounding boxes are expected to be packed into a two dimensional tensor of shape :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in the image. The second axis represents attributes of the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the four attributes are coordinates of the top left and the bottom right vertices. Args: bbox (~numpy.ndarray): An array whose shape is :math:`(R, 4)`. :math:`R` is the number of bounding boxes. in_size (tuple): A tuple of length 2. The height and the width of the image before resized. out_size (tuple): A tuple of length 2. The height and the width of the image after resized. Returns: ~numpy.ndarray: Bounding boxes rescaled according to the given image shapes. """ bbox = bbox.copy() y_scale = float(out_size[0]) / in_size[0] x_scale = float(out_size[1]) / in_size[1] bbox[:, 0] = y_scale * bbox[:, 0] bbox[:, 2] = y_scale * bbox[:, 2] bbox[:, 1] = x_scale * bbox[:, 1] bbox[:, 3] = x_scale * bbox[:, 3] return bbox def flip_bbox(bbox, size, y_flip=False, x_flip=False): """Flip bounding boxes accordingly. The bounding boxes are expected to be packed into a two dimensional tensor of shape :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in the image. The second axis represents attributes of the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the four attributes are coordinates of the top left and the bottom right vertices. Args: bbox (~numpy.ndarray): An array whose shape is :math:`(R, 4)`. :math:`R` is the number of bounding boxes. size (tuple): A tuple of length 2. The height and the width of the image before resized. y_flip (bool): Flip bounding box according to a vertical flip of an image. x_flip (bool): Flip bounding box according to a horizontal flip of an image. Returns: ~numpy.ndarray: Bounding boxes flipped according to the given flips. """ H, W = size bbox = bbox.copy() if y_flip: y_max = H - bbox[:, 0] y_min = H - bbox[:, 2] bbox[:, 0] = y_min bbox[:, 2] = y_max if x_flip: x_max = W - bbox[:, 1] x_min = W - bbox[:, 3] bbox[:, 1] = x_min bbox[:, 3] = x_max return bbox def crop_bbox( bbox, y_slice=None, x_slice=None, allow_outside_center=True, return_param=False): """Translate bounding boxes to fit within the cropped area of an image. This method is mainly used together with image cropping. This method translates the coordinates of bounding boxes like :func:`data.util.translate_bbox`. In addition, this function truncates the bounding boxes to fit within the cropped area. If a bounding box does not overlap with the cropped area, this bounding box will be removed. The bounding boxes are expected to be packed into a two dimensional tensor of shape :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in the image. The second axis represents attributes of the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the four attributes are coordinates of the top left and the bottom right vertices. Args: bbox (~numpy.ndarray): Bounding boxes to be transformed. The shape is :math:`(R, 4)`. :math:`R` is the number of bounding boxes. y_slice (slice): The slice of y axis. x_slice (slice): The slice of x axis. allow_outside_center (bool): If this argument is :obj:`False`, bounding boxes whose centers are outside of the cropped area are removed. The default value is :obj:`True`. return_param (bool): If :obj:`True`, this function returns indices of kept bounding boxes. Returns: ~numpy.ndarray or (~numpy.ndarray, dict): If :obj:`return_param = False`, returns an array :obj:`bbox`. If :obj:`return_param = True`, returns a tuple whose elements are :obj:`bbox, param`. :obj:`param` is a dictionary of intermediate parameters whose contents are listed below with key, value-type and the description of the value. * **index** (*numpy.ndarray*): An array holding indices of used \ bounding boxes. """ t, b = _slice_to_bounds(y_slice) l, r = _slice_to_bounds(x_slice) crop_bb = np.array((t, l, b, r)) if allow_outside_center: mask = np.ones(bbox.shape[0], dtype=bool) else: center = (bbox[:, :2] + bbox[:, 2:]) / 2 mask = np.logical_and(crop_bb[:2] <= center, center < crop_bb[2:]) \ .all(axis=1) bbox = bbox.copy() bbox[:, :2] = np.maximum(bbox[:, :2], crop_bb[:2]) bbox[:, 2:] = np.minimum(bbox[:, 2:], crop_bb[2:]) bbox[:, :2] -= crop_bb[:2] bbox[:, 2:] -= crop_bb[:2] mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:]).all(axis=1)) bbox = bbox[mask] if return_param: return bbox, {'index': np.flatnonzero(mask)} else: return bbox def _slice_to_bounds(slice_): if slice_ is None: return 0, np.inf if slice_.start is None: l = 0 else: l = slice_.start if slice_.stop is None: u = np.inf else: u = slice_.stop return l, u def translate_bbox(bbox, y_offset=0, x_offset=0): """Translate bounding boxes. This method is mainly used together with image transforms, such as padding and cropping, which translates the left top point of the image from coordinate :math:`(0, 0)` to coordinate :math:`(y, x) = (y_{offset}, x_{offset})`. The bounding boxes are expected to be packed into a two dimensional tensor of shape :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in the image. The second axis represents attributes of the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the four attributes are coordinates of the top left and the bottom right vertices. Args: bbox (~numpy.ndarray): Bounding boxes to be transformed. The shape is :math:`(R, 4)`. :math:`R` is the number of bounding boxes. y_offset (int or float): The offset along y axis. x_offset (int or float): The offset along x axis. Returns: ~numpy.ndarray: Bounding boxes translated according to the given offsets. """ out_bbox = bbox.copy() out_bbox[:, :2] += (y_offset, x_offset) out_bbox[:, 2:] += (y_offset, x_offset) return out_bbox def random_flip(img, y_random=False, x_random=False, return_param=False, copy=False): """Randomly flip an image in vertical or horizontal direction. Args: img (~numpy.ndarray): An array that gets flipped. This is in CHW format. y_random (bool): Randomly flip in vertical direction. x_random (bool): Randomly flip in horizontal direction. return_param (bool): Returns information of flip. copy (bool): If False, a view of :obj:`img` will be returned. Returns: ~numpy.ndarray or (~numpy.ndarray, dict): If :obj:`return_param = False`, returns an array :obj:`out_img` that is the result of flipping. If :obj:`return_param = True`, returns a tuple whose elements are :obj:`out_img, param`. :obj:`param` is a dictionary of intermediate parameters whose contents are listed below with key, value-type and the description of the value. * **y_flip** (*bool*): Whether the image was flipped in the\ vertical direction or not. * **x_flip** (*bool*): Whether the image was flipped in the\ horizontal direction or not. """ y_flip, x_flip = False, False if y_random: y_flip = random.choice([True, False]) if x_random: x_flip = random.choice([True, False]) if y_flip: img = img[:, ::-1, :] if x_flip: img = img[:, :, ::-1] if copy: img = img.copy() if return_param: return img, {'y_flip': y_flip, 'x_flip': x_flip} else: return img