transforms.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import random
import traceback
import numpy as np
from PIL import Image

import logging
logger = logging.getLogger(__name__)

__all__ = ['GroupScale', 'GroupMultiScaleCrop', 'GroupRandomCrop',
           'GroupRandomFlip', 'GroupCenterCrop', 'NormalizeImage',
           'Compose']


class Compose(object):
    def __init__(self, transforms=[]):
        self.transforms = transforms

    def __call__(self, *data):
        for f in self.transforms:
            try:
                data = f(*data)
            except Exception as e:
                stack_info = traceback.format_exc()
                logger.info("fail to perform transform [{}] with error: "
                        "{} and stack:\n{}".format(f, e, str(stack_info)))
                raise e
        return data


class GroupScale(object):
    """
    Group scale image

    Args:
        target_size (int): image resize target size
    """
    def __init__(self, target_size=224):
        self.target_size = target_size

    def __call__(self, imgs, label):
        resized_imgs = []
        for i in range(len(imgs)):
            img = imgs[i]
            w, h = img.size
            if (w <= h and w == self.target_size) or \
                    (h <= w and h == self.target_size):
                resized_imgs.append(img)
                continue

            if w < h:
                ow = self.target_size
                oh = int(self.target_size * 4.0 / 3.0)
                resized_imgs.append(img.resize((ow, oh), Image.BILINEAR))
            else:
                oh = self.target_size
                ow = int(self.target_size * 4.0 / 3.0)
                resized_imgs.append(img.resize((ow, oh), Image.BILINEAR))
        
        return resized_imgs, label


class GroupMultiScaleCrop(object):
    """
    FIXME: add comments
    """
    def __init__(self,
                 short_size=256,
                 scales=None,
                 max_distort=1,
                 fix_crop=True,
                 more_fix_crop=True):
        self.short_size = short_size
        self.scales = scales if scales is not None \
                        else [1, .875, .75, .66]
        self.max_distort = max_distort
        self.fix_crop = fix_crop
        self.more_fix_crop = more_fix_crop

    def __call__(self, imgs, label):
        input_size = [self.short_size, self.short_size]
        im_size = imgs[0].size
        
        # get random crop offset
        def _sample_crop_size(im_size):
            image_w, image_h = im_size[0], im_size[1]
            
            base_size = min(image_w, image_h)
            crop_sizes = [int(base_size * x) for x in self.scales]
            crop_h = [
                input_size[1] if abs(x - input_size[1]) < 3 else x
                for x in crop_sizes
            ]
            crop_w = [
                input_size[0] if abs(x - input_size[0]) < 3 else x
                for x in crop_sizes
            ]
            
            pairs = []
            for i, h in enumerate(crop_h):
                for j, w in enumerate(crop_w):
                    if abs(i - j) <= self.max_distort:
                        pairs.append((w, h))
            crop_pair = random.choice(pairs)
            if not self.fix_crop:
                w_offset = np.random.randint(0, image_w - crop_pair[0])
                h_offset = np.random.randint(0, image_h - crop_pair[1])
            else:
                w_step = (image_w - crop_pair[0]) / 4
                h_step = (image_h - crop_pair[1]) / 4
                
                ret = list()
                ret.append((0, 0))  # upper left
                if w_step != 0:
                    ret.append((4 * w_step, 0))  # upper right
                if h_step != 0:
                    ret.append((0, 4 * h_step))  # lower left
                if h_step != 0 and w_step != 0:
                    ret.append((4 * w_step, 4 * h_step))  # lower right
                if h_step != 0 or w_step != 0:
                    ret.append((2 * w_step, 2 * h_step))  # center
                
                if self.more_fix_crop:
                    ret.append((0, 2 * h_step))  # center left
                    ret.append((4 * w_step, 2 * h_step))  # center right
                    ret.append((2 * w_step, 4 * h_step))  # lower center
                    ret.append((2 * w_step, 0 * h_step))  # upper center
                    
                    ret.append((1 * w_step, 1 * h_step))  # upper left quarter
                    ret.append((3 * w_step, 1 * h_step))  # upper right quarter
                    ret.append((1 * w_step, 3 * h_step))  # lower left quarter
                    ret.append((3 * w_step, 3 * h_step))  # lower righ quarter
                
                w_offset, h_offset = random.choice(ret)
            
            return crop_pair[0], crop_pair[1], w_offset, h_offset
        
        crop_w, crop_h, offset_w, offset_h = _sample_crop_size(im_size)
        crop_imgs = [
            img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h))
            for img in imgs
        ]
        ret_imgs = [
            img.resize((input_size[0], input_size[1]), Image.BILINEAR)
            for img in crop_imgs
        ]

        return ret_imgs, label


class GroupRandomCrop(object):
    def __init__(self, target_size=224):
        self.target_size = target_size

    def __call__(self, imgs, label):
        w, h = imgs[0].size
        th, tw = self.target_size, self.target_size
        
        assert (w >= self.target_size) and (h >= self.target_size), \
            "image width({}) and height({}) should be larger than " \
            "crop size".format(w, h, self.target_size)
        
        out_images = []
        x1 = np.random.randint(0, w - tw)
        y1 = np.random.randint(0, h - th)
        
        for img in imgs:
            if w == tw and h == th:
                out_images.append(img)
            else:
                out_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))
        
        return out_images, label


class GroupRandomFlip(object):
    def __call__(self, imgs, label):
        v = np.random.random()
        if v < 0.5:
            ret = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in imgs]
            return ret, label
        else:
            return imgs, label 


class GroupCenterCrop(object):
    def __init__(self, target_size=224):
        self.target_size = target_size

    def __call__(self, imgs, label):
        crop_imgs = []
        for img in imgs:
            w, h = img.size
            th, tw = self.target_size, self.target_size
            assert (w >= self.target_size) and (h >= self.target_size), \
                "image width({}) and height({}) should be larger " \
                "than crop size".format(w, h, self.target_size)
            x1 = int(round((w - tw) / 2.))
            y1 = int(round((h - th) / 2.))
            crop_imgs.append(img.crop((x1, y1, x1 + tw, y1 + th)))
        
        return crop_imgs, label 


class NormalizeImage(object):
    def __init__(self,
                 target_size=224,
                 img_mean=[0.485, 0.456, 0.406],
                 img_std=[0.229, 0.224, 0.225],
                 seg_num=8,
                 seg_len=1):
        self.target_size = target_size
        self.img_mean = np.array(img_mean).reshape((3, 1, 1)).astype('float32')
        self.img_std = np.array(img_std).reshape((3, 1, 1)).astype('float32')
        self.seg_num = seg_num
        self.seg_len = seg_len

    def __call__(self, imgs, label):
        np_imgs = (np.array(imgs[0]).astype('float32').transpose(
            (2, 0, 1))).reshape(1, 3, self.target_size,
            self.target_size) / 255
        for i in range(len(imgs) - 1):
            img = (np.array(imgs[i + 1]).astype('float32').transpose(
                (2, 0, 1))).reshape(1, 3, self.target_size,
                self.target_size) / 255
            np_imgs = np.concatenate((np_imgs, img))
        
        np_imgs -= self.img_mean
        np_imgs /= self.img_std
        np_imgs = np.reshape(np_imgs, (self.seg_num, self.seg_len * 3,
                             self.target_size, self.target_size))
        
        return np_imgs, label