# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from PIL import Image import cv2 import numpy as np def decode_image(im_file, im_info): """read rgb image Args: im_file (str/np.ndarray): path of image/ np.ndarray read by cv2 im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ if isinstance(im_file, str): with open(im_file, 'rb') as f: im_read = f.read() data = np.frombuffer(im_read, dtype='uint8') im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im_info['origin_shape'] = im.shape[:2] im_info['resize_shape'] = im.shape[:2] else: im = im_file #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im_info['origin_shape'] = im.shape[:2] im_info['resize_shape'] = im.shape[:2] return im, im_info class Resize(object): """resize image by target_size and max_size Args: arch (str): model type target_size (int): the target size of image max_size (int): the max size of image use_cv2 (bool): whether us cv2 image_shape (list): input shape of model interp (int): method of resize """ def __init__(self, target_size=800, max_size=1333, use_cv2=True, image_shape=None, interp=cv2.INTER_LINEAR, resize_box=False): self.target_size = target_size self.max_size = max_size self.image_shape = image_shape self.use_cv2 = use_cv2 self.interp = interp def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im_channel = im.shape[2] im_scale_x, im_scale_y = self.generate_scale(im) if self.use_cv2: im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) else: resize_w = int(im_scale_x * float(im.shape[1])) resize_h = int(im_scale_y * float(im.shape[0])) if self.max_size != 0: raise TypeError('If you set max_size to cap the maximum size of image,' 'please set use_cv2 to True to resize the image.') im = im.astype('uint8') im = Image.fromarray(im) im = im.resize((int(resize_w), int(resize_h)), self.interp) im = np.array(im) # padding im when image_shape fixed by infer_cfg.yml if self.max_size != 0 and self.image_shape is not None: padding_im = np.zeros((self.max_size, self.max_size, im_channel), dtype=np.float32) im_h, im_w = im.shape[:2] padding_im[:im_h, :im_w, :] = im im = padding_im im_info['scale'] = [im_scale_x, im_scale_y] im_info['resize_shape'] = im.shape[:2] return im, im_info def generate_scale(self, im): """ Args: im (np.ndarray): image (np.ndarray) Returns: im_scale_x: the resize ratio of X im_scale_y: the resize ratio of Y """ origin_shape = im.shape[:2] im_c = im.shape[2] if self.max_size != 0: im_size_min = np.min(origin_shape[0:2]) im_size_max = np.max(origin_shape[0:2]) im_scale = float(self.target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > self.max_size: im_scale = float(self.max_size) / float(im_size_max) im_scale_x = im_scale im_scale_y = im_scale else: im_scale_x = float(self.target_size) / float(origin_shape[1]) im_scale_y = float(self.target_size) / float(origin_shape[0]) return im_scale_x, im_scale_y class Normalize(object): """normalize image Args: mean (list): im - mean std (list): im / std is_scale (bool): whether need im / 255 is_channel_first (bool): if True: image shape is CHW, else: HWC """ def __init__(self, mean, std, is_scale=True, is_channel_first=False): self.mean = mean self.std = std self.is_scale = is_scale self.is_channel_first = is_channel_first def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ im = im.astype(np.float32, copy=False) if self.is_channel_first: mean = np.array(self.mean)[:, np.newaxis, np.newaxis] std = np.array(self.std)[:, np.newaxis, np.newaxis] else: mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] if self.is_scale: im = im / 255.0 im -= mean im /= std return im, im_info class Permute(object): """permute image Args: to_bgr (bool): whether convert RGB to BGR channel_first (bool): whether convert HWC to CHW """ def __init__(self, to_bgr=False, channel_first=True): self.to_bgr = to_bgr self.channel_first = channel_first def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ if self.channel_first: im = im.transpose((2, 0, 1)).copy() if self.to_bgr: im = im[[2, 1, 0], :, :] return im, im_info class PadStride(object): """ padding image for model with FPN Args: stride (bool): model with FPN need image shape % stride == 0 """ def __init__(self, stride=0): self.coarsest_stride = stride def __call__(self, im, im_info): """ Args: im (np.ndarray): image (np.ndarray) im_info (dict): info of image Returns: im (np.ndarray): processed image (np.ndarray) im_info (dict): info of processed image """ coarsest_stride = self.coarsest_stride if coarsest_stride == 0: return im im_c, im_h, im_w = im.shape pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) padding_im[:, :im_h, :im_w] = im im_info['pad_shape'] = padding_im.shape[1:] return padding_im, im_info def preprocess(im, preprocess_ops): # process image by preprocess_ops im_info = { 'scale': [1., 1.], 'origin_shape': None, 'resize_shape': None, 'pad_shape': None, } im, im_info = decode_image(im, im_info) for operator in preprocess_ops: im, im_info = operator(im, im_info) im = np.array((im, )).astype('float32') return im, im_info