utils.py

import math
import numpy as np
from PIL import Image

import paddle

# set random seed for reproducibility
np.random.seed(0)


def is_image_file(filename):
    return any(
        filename.endswith(extension)
        for extension in ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG'])


def calculate_valid_crop_size(crop_size, upscale_factor):
    return crop_size - (crop_size % upscale_factor)


def gaussian_noise(image, std_dev):
    noise = np.rint(
        np.random.normal(loc=0.0, scale=std_dev, size=np.shape(image)))
    return Image.fromarray(np.clip(image + noise, 0, 255).astype(np.uint8))


#################################################################################
# MATLAB imresize taken from ESRGAN (https://github.com/xinntao/BasicSR)
#################################################################################


def cubic(x):
    absx = paddle.abs(x)
    absx2 = absx**2
    absx3 = absx**3
    temp1 = paddle.cast((absx <= 1), absx.dtype)
    temp2 = paddle.cast((absx > 1), absx.dtype) * paddle.cast(
        (absx <= 2), absx.dtype)
    return (1.5 * absx3 - 2.5 * absx2 +
            1) * temp1 + (-0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2) * temp2


def calculate_weights_indices(in_length, out_length, scale, kernel,
                              kernel_width, antialiasing):
    if (scale < 1) and (antialiasing):
        # Use a modified kernel to simultaneously interpolate and antialias- larger kernel width
        kernel_width = kernel_width / scale

    # Output-space coordinates
    x = paddle.linspace(1, out_length, out_length)

    # Input-space coordinates. Calculate the inverse mapping such that 0.5
    # in output space maps to 0.5 in input space, and 0.5+scale in output
    # space maps to 1.5 in input space.
    u = x / scale + 0.5 * (1 - 1 / scale)

    # What is the left-most pixel that can be involved in the computation?
    left = paddle.floor(u - kernel_width / 2)

    # What is the maximum number of pixels that can be involved in the
    # computation?  Note: it's OK to use an extra pixel here; if the
    # corresponding weights are all zero, it will be eliminated at the end
    # of this function.
    P = math.ceil(kernel_width) + 2

    # The indices of the input pixels involved in computing the k-th output
    # pixel are in row k of the indices matrix.
    indices = left.reshape([out_length, 1]).expand([
        out_length, P
    ]) + paddle.linspace(0, P - 1, P).reshape([1, P]).expand([out_length, P])

    # The weights used to compute the k-th output pixel are in row k of the
    # weights matrix.
    distance_to_center = u.reshape([out_length, 1]).expand([out_length, P
                                                            ]) - indices
    # apply cubic kernel
    if (scale < 1) and (antialiasing):
        weights = scale * cubic(distance_to_center * scale)
    else:
        weights = cubic(distance_to_center)
    # Normalize the weights matrix so that each row sums to 1.
    weights_sum = paddle.sum(weights, 1).reshape([out_length, 1])
    weights = weights / weights_sum.expand([out_length, P])

    # If a column in weights is all zero, get rid of it. only consider the first and last column.
    weights_zero_tmp = np.sum((weights.numpy() == 0), 0)
    if not math.isclose(weights_zero_tmp[0], 0, rel_tol=1e-6):
        indices = indices[:, 1:1 + P - 2]
        weights = weights[:, 1:1 + P - 2]

    if not math.isclose(weights_zero_tmp[-1], 0, rel_tol=1e-6):
        indices = indices[:, 0:P - 2]
        weights = weights[:, 0:P - 2]

    sym_len_s = -indices.min() + 1
    sym_len_e = indices.max() - in_length
    indices = indices + sym_len_s - 1
    return weights, indices, int(sym_len_s), int(sym_len_e)


def imresize(img, scale, antialiasing=True):
    # Now the scale should be the same for H and W
    # input: img: CHW RGB [0,1]
    # output: CHW RGB [0,1] w/o round

    in_C, in_H, in_W = img.shape
    _, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale)
    kernel_width = 4
    kernel = 'cubic'

    # Return the desired dimension order for performing the resize.  The
    # strategy is to perform the resize first along the dimension with the
    # smallest scale factor.
    # Now we do not support this.

    # get weights and indices
    weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices(
        in_H, out_H, scale, kernel, kernel_width, antialiasing)
    weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices(
        in_W, out_W, scale, kernel, kernel_width, antialiasing)
    # process H dimension
    # symmetric copying
    img_aug = paddle.zeros([in_C, in_H + sym_len_Hs + sym_len_He, in_W])
    img_aug[:, sym_len_Hs:sym_len_Hs + in_H, :] = img

    sym_patch = img[:, :sym_len_Hs, :]
    inv_idx = paddle.arange(sym_patch.shape[1] - 1, -1, -1)
    sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 1)

    img_aug[:, :sym_len_Hs, :] = sym_patch_inv

    sym_patch = img[:, -sym_len_He:, :]
    inv_idx = paddle.arange(sym_patch.shape[1] - 1, -1, -1)
    sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 1)

    img_aug[:,
            sym_len_Hs + in_H:sym_len_Hs + in_H + sym_len_He, :] = sym_patch_inv

    out_1 = paddle.zeros([in_C, out_H, in_W])
    kernel_width = weights_H.shape[1]
    for i in range(out_H):
        idx = int(indices_H[i][0])

        out_1[0, i, :] = paddle.mv(
            img_aug[0, idx:idx + kernel_width, :].transpose([1, 0]),
            (weights_H[i]))
        out_1[1, i, :] = paddle.mv(
            img_aug[1, idx:idx + kernel_width, :].transpose([1, 0]),
            (weights_H[i]))
        out_1[2, i, :] = paddle.mv(
            img_aug[2, idx:idx + kernel_width, :].transpose([1, 0]),
            (weights_H[i]))

    # process W dimension
    # symmetric copying
    out_1_aug = paddle.zeros([in_C, out_H, in_W + sym_len_Ws + sym_len_We])
    out_1_aug[:, :, sym_len_Ws:sym_len_Ws + in_W] = out_1

    sym_patch = out_1[:, :, :sym_len_Ws]
    inv_idx = paddle.arange(sym_patch.shape[2] - 1, -1, -1)
    sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 2)
    out_1_aug[:, :, 0:sym_len_Ws] = sym_patch_inv

    sym_patch = out_1[:, :, -sym_len_We:]
    inv_idx = paddle.arange(sym_patch.shape[2] - 1, -1, -1)
    sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 2)
    out_1_aug[:, :,
              sym_len_Ws + in_W:sym_len_Ws + in_W + sym_len_We] = sym_patch_inv

    out_2 = paddle.zeros([in_C, out_H, out_W])
    kernel_width = weights_W.shape[1]
    for i in range(out_W):
        idx = int(indices_W[i][0])
        out_2[0, :, i] = out_1_aug[0, :,
                                   idx:idx + kernel_width].mv(weights_W[i])
        out_2[1, :, i] = out_1_aug[1, :,
                                   idx:idx + kernel_width].mv(weights_W[i])
        out_2[2, :, i] = out_1_aug[2, :,
                                   idx:idx + kernel_width].mv(weights_W[i])

    return paddle.clip(out_2, 0, 1)


def to_pil_image(pic, mode=None):
    """Convert a tensor or an ndarray to PIL Image.

    Args:
        pic (paddle.Tensor or numpy.ndarray): Image to be converted to PIL Image.
        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).

    .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes

    Returns:
        PIL Image: Image converted to PIL Image.
    """
    if not (isinstance(pic, paddle.Tensor) or isinstance(pic, np.ndarray)):
        raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(
            type(pic)))

    elif isinstance(pic, paddle.Tensor):
        if len(pic.shape) not in {2, 3}:
            raise ValueError(
                'pic should be 2/3 dimensional. Got {} dimensions.'.format(
                    pic.ndimension()))

        elif len(pic.shape) == 2:
            # if 2D image, add channel dimension (CHW)
            pic = pic.unsqueeze(0)

    elif isinstance(pic, np.ndarray):
        if pic.ndim not in {2, 3}:
            raise ValueError(
                'pic should be 2/3 dimensional. Got {} dimensions.'.format(
                    pic.ndim))

        elif pic.ndim == 2:
            # if 2D image, add channel dimension (HWC)
            pic = np.expand_dims(pic, 2)

    npimg = pic
    if isinstance(pic, paddle.Tensor) and mode != 'F':
        pic = pic.numpy()

    if pic.dtype == 'float32':
        npimg = np.transpose((pic * 255.).astype('uint8'), (1, 2, 0))

    if not isinstance(npimg, np.ndarray):
        raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
                        'not {}'.format(type(npimg)))

    if npimg.shape[2] == 1:
        expected_mode = None
        npimg = npimg[:, :, 0]
        if npimg.dtype == np.uint8:
            expected_mode = 'L'
        elif npimg.dtype == np.int16:
            expected_mode = 'I;16'
        elif npimg.dtype == np.int32:
            expected_mode = 'I'
        elif npimg.dtype == np.float32:
            expected_mode = 'F'
        if mode is not None and mode != expected_mode:
            raise ValueError(
                "Incorrect mode ({}) supplied for input type {}. Should be {}".
                format(mode, np.dtype, expected_mode))
        mode = expected_mode

    elif npimg.shape[2] == 2:
        permitted_2_channel_modes = ['LA']
        if mode is not None and mode not in permitted_2_channel_modes:
            raise ValueError("Only modes {} are supported for 2D inputs".format(
                permitted_2_channel_modes))

        if mode is None and npimg.dtype == np.uint8:
            mode = 'LA'

    elif npimg.shape[2] == 4:
        permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX']
        if mode is not None and mode not in permitted_4_channel_modes:
            raise ValueError("Only modes {} are supported for 4D inputs".format(
                permitted_4_channel_modes))

        if mode is None and npimg.dtype == np.uint8:
            mode = 'RGBA'
    else:
        permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
        if mode is not None and mode not in permitted_3_channel_modes:
            raise ValueError("Only modes {} are supported for 3D inputs".format(
                permitted_3_channel_modes))
        if mode is None and npimg.dtype == np.uint8:
            mode = 'RGB'

    if mode is None:
        raise TypeError('Input type {} is not supported'.format(npimg.dtype))

    return Image.fromarray(npimg, mode=mode)