functional.py 36.0 KB
Newer Older
L
LielinJiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

L
LielinJiang 已提交
15 16
import math
import numbers
L
LielinJiang 已提交
17

18 19
import numpy as np
from PIL import Image
20

21
import paddle
L
LielinJiang 已提交
22

23
from . import functional_cv2 as F_cv2
24
from . import functional_pil as F_pil
25
from . import functional_tensor as F_t
L
LielinJiang 已提交
26

27
__all__ = []
L
LielinJiang 已提交
28

L
LielinJiang 已提交
29

30 31
def _is_pil_image(img):
    return isinstance(img, Image.Image)
L
LielinJiang 已提交
32 33


34 35
def _is_tensor_image(img):
    return isinstance(img, paddle.Tensor)
L
LielinJiang 已提交
36

37 38 39 40 41 42 43 44 45

def _is_numpy_image(img):
    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})


def to_tensor(pic, data_format='CHW'):
    """Converts a ``PIL.Image`` or ``numpy.ndarray`` to paddle.Tensor.

    See ``ToTensor`` for more details.
L
LielinJiang 已提交
46 47

    Args:
48
        pic (PIL.Image|np.ndarray): Image to be converted to tensor.
49
        data_format (str, optional): Data format of output tensor, should be 'HWC' or
50 51 52
            'CHW'. Default: 'CHW'.

    Returns:
L
LielinJiang 已提交
53
        Tensor: Converted image. Data type is same as input img.
L
LielinJiang 已提交
54 55 56 57 58

    Examples:
        .. code-block:: python

            import numpy as np
59
            from PIL import Image
60
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
61

62
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
63

64
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
65

66 67
            tensor = F.to_tensor(fake_img)
            print(tensor.shape)
L
LielinJiang 已提交
68 69

    """
70 71 72
    if not (
        _is_pil_image(pic) or _is_numpy_image(pic) or _is_tensor_image(pic)
    ):
73
        raise TypeError(
74 75 76 77
            'pic should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(pic)
            )
        )
78 79 80

    if _is_pil_image(pic):
        return F_pil.to_tensor(pic, data_format)
81
    elif _is_numpy_image(pic):
82
        return F_cv2.to_tensor(pic, data_format)
83 84
    else:
        return pic if data_format.lower() == 'chw' else pic.transpose((1, 2, 0))
L
LielinJiang 已提交
85 86


87
def resize(img, size, interpolation='bilinear'):
L
LielinJiang 已提交
88
    """
89
    Resizes the image to given size
L
LielinJiang 已提交
90 91

    Args:
92
        input (PIL.Image|np.ndarray): Image to be resized.
L
LielinJiang 已提交
93
        size (int|list|tuple): Target size of input data, with (height, width) shape.
94 95 96 97 98 99 100
        interpolation (int|str, optional): Interpolation method. when use pil backend,
            support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
            - "bicubic": Image.BICUBIC,
            - "box": Image.BOX,
            - "lanczos": Image.LANCZOS,
101
            - "hamming": Image.HAMMING
102 103 104 105 106
            when use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
            - "area": cv2.INTER_AREA,
            - "bicubic": cv2.INTER_CUBIC,
107 108 109 110
            - "lanczos": cv2.INTER_LANCZOS4

    Returns:
        PIL.Image or np.array: Resized image.
L
LielinJiang 已提交
111 112 113 114 115

    Examples:
        .. code-block:: python

            import numpy as np
116
            from PIL import Image
117
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
118

119
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
120

121
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
122

123 124
            converted_img = F.resize(fake_img, 224)
            print(converted_img.size)
125
            # (262, 224)
126 127 128

            converted_img = F.resize(fake_img, (200, 150))
            print(converted_img.size)
129
            # (150, 200)
L
LielinJiang 已提交
130
    """
131 132 133
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
134
        raise TypeError(
135 136 137 138
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
139 140 141

    if _is_pil_image(img):
        return F_pil.resize(img, size, interpolation)
142 143
    elif _is_tensor_image(img):
        return F_t.resize(img, size, interpolation)
L
LielinJiang 已提交
144
    else:
145
        return F_cv2.resize(img, size, interpolation)
L
LielinJiang 已提交
146 147


148 149 150
def pad(img, padding, fill=0, padding_mode='constant'):
    """
    Pads the given PIL.Image or numpy.array on all sides with specified padding mode and fill value.
L
LielinJiang 已提交
151 152

    Args:
153 154
        img (PIL.Image|np.array): Image to be padded.
        padding (int|list|tuple): Padding on each border. If a single int is provided this
155 156
            is used to pad all borders. If list/tuple of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a list/tuple of length 4 is provided
L
LielinJiang 已提交
157 158
            this is the padding for the left, top, right and bottom borders
            respectively.
159
        fill (float, optional): Pixel fill value for constant fill. If a tuple of
L
LielinJiang 已提交
160
            length 3, it is used to fill R, G, B channels respectively.
161
            This value is only used when the padding_mode is constant. Default: 0.
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default: 'constant'.

            - constant: pads with a constant value, this value is specified with fill

            - edge: pads with the last value on the edge of the image

            - reflect: pads with reflection of image (without repeating the last value on the edge)

                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
                       will result in [3, 2, 1, 2, 3, 4, 3, 2]

            - symmetric: pads with reflection of image (repeating the last value on the edge)

                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
L
LielinJiang 已提交
177 178

    Returns:
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
        PIL.Image or np.array: Padded image.

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            padded_img = F.pad(fake_img, padding=1)
            print(padded_img.size)

            padded_img = F.pad(fake_img, padding=(2, 1))
            print(padded_img.size)
    """
198 199 200
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
201
        raise TypeError(
202 203 204 205
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
206 207 208

    if _is_pil_image(img):
        return F_pil.pad(img, padding, fill, padding_mode)
209 210
    elif _is_tensor_image(img):
        return F_t.pad(img, padding, fill, padding_mode)
211 212 213 214 215 216 217 218
    else:
        return F_cv2.pad(img, padding, fill, padding_mode)


def crop(img, top, left, height, width):
    """Crops the given Image.

    Args:
219
        img (PIL.Image|np.array): Image to be cropped. (0,0) denotes the top left
220 221 222 223 224 225 226 227
            corner of the image.
        top (int): Vertical component of the top left corner of the crop box.
        left (int): Horizontal component of the top left corner of the crop box.
        height (int): Height of the crop box.
        width (int): Width of the crop box.

    Returns:
        PIL.Image or np.array: Cropped image.
L
LielinJiang 已提交
228 229 230 231 232

    Examples:
        .. code-block:: python

            import numpy as np
233 234
            from PIL import Image
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
235

236
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
237

238
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
239

240 241
            cropped_img = F.crop(fake_img, 56, 150, 200, 100)
            print(cropped_img.size)
L
LielinJiang 已提交
242 243

    """
244 245 246
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
247
        raise TypeError(
248 249 250 251
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
252 253 254

    if _is_pil_image(img):
        return F_pil.crop(img, top, left, height, width)
255 256
    elif _is_tensor_image(img):
        return F_t.crop(img, top, left, height, width)
257 258 259 260 261 262 263
    else:
        return F_cv2.crop(img, top, left, height, width)


def center_crop(img, output_size):
    """Crops the given Image and resize it to desired size.

264 265 266 267
    Args:
        img (PIL.Image|np.array): Image to be cropped. (0,0) denotes the top left corner of the image.
        output_size (sequence or int): (height, width) of the crop box. If int,
            it is used for both directions
268

269 270
    Returns:
        PIL.Image or np.array: Cropped image.
L
LielinJiang 已提交
271

272
    Examples:
I
Infinity_lee 已提交
273
        .. code-block:: python
274

I
Infinity_lee 已提交
275 276 277
            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F
278

I
Infinity_lee 已提交
279
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
280

I
Infinity_lee 已提交
281
            fake_img = Image.fromarray(fake_img)
282

I
Infinity_lee 已提交
283 284
            cropped_img = F.center_crop(fake_img, (150, 100))
            print(cropped_img.size)
285 286 287 288
    """
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
289
        raise TypeError(
290 291 292 293
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
294 295 296

    if _is_pil_image(img):
        return F_pil.center_crop(img, output_size)
297 298
    elif _is_tensor_image(img):
        return F_t.center_crop(img, output_size)
299 300 301 302
    else:
        return F_cv2.center_crop(img, output_size)


L
LielinJiang 已提交
303
def hflip(img):
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
    """Horizontally flips the given Image or np.array.

    Args:
        img (PIL.Image|np.array): Image to be flipped.

    Returns:
        PIL.Image or np.array:  Horizontall flipped image.

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            flpped_img = F.hflip(fake_img)
            print(flpped_img.size)

    """
327 328 329
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
330
        raise TypeError(
331 332 333 334
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
335 336 337

    if _is_pil_image(img):
        return F_pil.hflip(img)
338 339
    elif _is_tensor_image(img):
        return F_t.hflip(img)
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
    else:
        return F_cv2.hflip(img)


def vflip(img):
    """Vertically flips the given Image or np.array.

    Args:
        img (PIL.Image|np.array): Image to be flipped.

    Returns:
        PIL.Image or np.array:  Vertically flipped image.

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            flpped_img = F.vflip(fake_img)
            print(flpped_img.size)

    """
368 369 370
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
371
        raise TypeError(
372 373 374 375
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
376 377 378

    if _is_pil_image(img):
        return F_pil.vflip(img)
379 380
    elif _is_tensor_image(img):
        return F_t.vflip(img)
381 382 383 384 385 386 387 388
    else:
        return F_cv2.vflip(img)


def adjust_brightness(img, brightness_factor):
    """Adjusts brightness of an Image.

    Args:
J
JYChen 已提交
389
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
390 391 392 393 394
        brightness_factor (float): How much to adjust the brightness. Can be
            any non negative number. 0 gives a black image, 1 gives the
            original image while 2 increases the brightness by a factor of 2.

    Returns:
J
JYChen 已提交
395
        PIL.Image|np.array|paddle.Tensor: Brightness adjusted image.
396 397 398

    Examples:
        .. code-block:: python
399
           :name: code-example1
400 401 402 403 404 405 406 407

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)
408 409 410 411 412 413
            print(fake_img.size) # (300, 256)
            print(fake_img.load()[1,1]) # (95, 127, 202)
            converted_img = F.adjust_brightness(fake_img, 0.5)
            print(converted_img.size) # (300, 256)
            print(converted_img.load()[1,1]) # (47, 63, 101)

414 415

    """
416 417 418
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
419
        raise TypeError(
420 421 422 423
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
424 425 426

    if _is_pil_image(img):
        return F_pil.adjust_brightness(img, brightness_factor)
J
JYChen 已提交
427
    elif _is_numpy_image(img):
428
        return F_cv2.adjust_brightness(img, brightness_factor)
J
JYChen 已提交
429 430
    else:
        return F_t.adjust_brightness(img, brightness_factor)
431 432 433 434 435 436


def adjust_contrast(img, contrast_factor):
    """Adjusts contrast of an Image.

    Args:
J
JYChen 已提交
437
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
438 439 440 441 442
        contrast_factor (float): How much to adjust the contrast. Can be any
            non negative number. 0 gives a solid gray image, 1 gives the
            original image while 2 increases the contrast by a factor of 2.

    Returns:
J
JYChen 已提交
443
        PIL.Image|np.array|paddle.Tensor: Contrast adjusted image.
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_contrast(fake_img, 0.4)
            print(converted_img.size)
    """
459 460 461
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
462
        raise TypeError(
463 464 465 466
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
467 468 469

    if _is_pil_image(img):
        return F_pil.adjust_contrast(img, contrast_factor)
J
JYChen 已提交
470
    elif _is_numpy_image(img):
471
        return F_cv2.adjust_contrast(img, contrast_factor)
J
JYChen 已提交
472 473
    else:
        return F_t.adjust_contrast(img, contrast_factor)
474 475 476 477 478 479


def adjust_saturation(img, saturation_factor):
    """Adjusts color saturation of an image.

    Args:
J
JYChen 已提交
480
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
481 482 483 484 485
        saturation_factor (float):  How much to adjust the saturation. 0 will
            give a black and white image, 1 will give the original image while
            2 will enhance the saturation by a factor of 2.

    Returns:
J
JYChen 已提交
486
        PIL.Image|np.array|paddle.Tensor: Saturation adjusted image.
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_saturation(fake_img, 0.4)
            print(converted_img.size)

    """
503 504 505
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
506
        raise TypeError(
507 508 509 510
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
511 512 513

    if _is_pil_image(img):
        return F_pil.adjust_saturation(img, saturation_factor)
J
JYChen 已提交
514
    elif _is_numpy_image(img):
515
        return F_cv2.adjust_saturation(img, saturation_factor)
J
JYChen 已提交
516 517
    else:
        return F_t.adjust_saturation(img, saturation_factor)
518 519 520 521 522 523 524 525 526 527 528 529 530


def adjust_hue(img, hue_factor):
    """Adjusts hue of an image.

    The image hue is adjusted by converting the image to HSV and
    cyclically shifting the intensities in the hue channel (H).
    The image is then converted back to original image mode.

    `hue_factor` is the amount of shift in H channel and must be in the
    interval `[-0.5, 0.5]`.

    Args:
J
JYChen 已提交
531
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
532 533 534 535 536 537 538
        hue_factor (float):  How much to shift the hue channel. Should be in
            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
            HSV space in positive and negative direction respectively.
            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
            with complementary colors while 0 gives the original image.

    Returns:
J
JYChen 已提交
539
        PIL.Image|np.array|paddle.Tensor: Hue adjusted image.
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_hue(fake_img, 0.4)
            print(converted_img.size)

    """
556 557 558
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
559
        raise TypeError(
560 561 562 563
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
564 565 566

    if _is_pil_image(img):
        return F_pil.adjust_hue(img, hue_factor)
J
JYChen 已提交
567
    elif _is_numpy_image(img):
568
        return F_cv2.adjust_hue(img, hue_factor)
J
JYChen 已提交
569 570
    else:
        return F_t.adjust_hue(img, hue_factor)
571 572


573 574 575 576 577 578 579
def _get_affine_matrix(center, angle, translate, scale, shear):
    # Affine matrix is : M = T * C * RotateScaleShear * C^-1
    # Ihe inverse one is : M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1
    rot = math.radians(angle)
    sx = math.radians(shear[0])
    sy = math.radians(shear[1])

580
    # Rotate and Shear without scaling
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
    a = math.cos(rot - sy) / math.cos(sy)
    b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
    c = math.sin(rot - sy) / math.cos(sy)
    d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)

    # Center Translation
    cx, cy = center
    tx, ty = translate

    # Inverted rotation matrix with scale and shear
    # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
    matrix = [d, -b, 0.0, -c, a, 0.0]
    matrix = [x / scale for x in matrix]
    # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
    matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
    matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
    # Apply center translation: C * RSS^-1 * C^-1 * T^-1
    matrix[2] += cx
    matrix[5] += cy

    return matrix


604 605 606 607 608 609 610 611 612 613
def affine(
    img,
    angle,
    translate,
    scale,
    shear,
    interpolation="nearest",
    fill=0,
    center=None,
):
614 615 616 617 618 619 620 621
    """Apply affine transformation on the image.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): Image to be affined.
        angle (int|float): The angle of the random rotation in clockwise order.
        translate (list[float]): Maximum absolute fraction for horizontal and vertical translations.
        scale (float): Scale factor for the image, scale should be positive.
        shear (list[float]): Shear angle values which are parallel to the x-axis and y-axis in clockwise order.
622 623 624 625 626 627
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend.
            When use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
628
            - "bicubic": Image.BICUBIC
629 630 631
            When use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
            - "bicubic": cv2.INTER_CUBIC
        fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
            image. If given a number, the value is used for all bands respectively.
        center (2-tuple, optional): Optional center of rotation, (x, y).
            Origin is the upper left corner.
            Default is the center of the image.

    Returns:
        PIL.Image|np.array|paddle.Tensor: Affine Transformed image.

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.transforms import functional as F

            fake_img = paddle.randn((3, 256, 300)).astype(paddle.float32)

            affined_img = F.affine(fake_img, 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10])
            print(affined_img.shape)
    """

654 655 656
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
657
        raise TypeError(
658 659 660 661
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676

    if not isinstance(angle, (int, float)):
        raise TypeError("Argument angle should be int or float")

    if not isinstance(translate, (list, tuple)):
        raise TypeError("Argument translate should be a sequence")

    if len(translate) != 2:
        raise ValueError("Argument translate should be a sequence of length 2")

    if scale <= 0.0:
        raise ValueError("Argument scale should be positive")

    if not isinstance(shear, (numbers.Number, (list, tuple))):
        raise TypeError(
677 678
            "Shear should be either a single value or a sequence of two values"
        )
679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699

    if not isinstance(interpolation, str):
        raise TypeError("Argument interpolation should be a string")

    if isinstance(angle, int):
        angle = float(angle)

    if isinstance(translate, tuple):
        translate = list(translate)

    if isinstance(shear, numbers.Number):
        shear = [shear, 0.0]

    if isinstance(shear, tuple):
        shear = list(shear)

    if len(shear) == 1:
        shear = [shear[0], shear[0]]

    if len(shear) != 2:
        raise ValueError(
700 701
            f"Shear should be a sequence containing two values. Got {shear}"
        )
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723

    if center is not None and not isinstance(center, (list, tuple)):
        raise TypeError("Argument center should be a sequence")

    if _is_pil_image(img):
        width, height = img.size
        # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        # it is visually better to estimate the center without 0.5 offset
        # otherwise image rotated by 90 degrees is shifted vs output image of F_t.affine
        if center is None:
            center = [width * 0.5, height * 0.5]
        matrix = _get_affine_matrix(center, angle, translate, scale, shear)
        return F_pil.affine(img, matrix, interpolation, fill)

    if _is_numpy_image(img):
        # get affine_matrix in F_cv2.affine() using cv2's functions
        width, height = img.shape[0:2]
        # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        # it is visually better to estimate the center without 0.5 offset
        # otherwise image rotated by 90 degrees is shifted vs output image of F_t.affine
        if center is None:
            center = (width * 0.5, height * 0.5)
724 725 726
        return F_cv2.affine(
            img, angle, translate, scale, shear, interpolation, fill, center
        )
727 728 729 730 731 732 733 734 735 736 737 738 739 740

    if _is_tensor_image(img):
        center_f = [0.0, 0.0]
        if center is not None:
            height, width = img.shape[-1], img.shape[-2]
            # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
            center_f = [
                1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])
            ]
        translate_f = [1.0 * t for t in translate]
        matrix = _get_affine_matrix(center_f, angle, translate_f, scale, shear)
        return F_t.affine(img, matrix, interpolation, fill)


741 742 743
def rotate(
    img, angle, interpolation="nearest", expand=False, center=None, fill=0
):
L
LielinJiang 已提交
744 745
    """Rotates the image by angle.

746

L
LielinJiang 已提交
747
    Args:
748 749
        img (PIL.Image|np.array): Image to be rotated.
        angle (float or int): In degrees degrees counter clockwise order.
750 751 752 753 754
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend. when use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
755
            - "bicubic": Image.BICUBIC
756 757 758
            when use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
759 760
            - "bicubic": cv2.INTER_CUBIC
        expand (bool, optional): Optional expansion flag.
L
LielinJiang 已提交
761 762 763
            If true, expands the output image to make it large enough to hold the entire rotated image.
            If false or omitted, make the output image the same size as the input image.
            Note that the expand flag assumes rotation around the center and no translation.
764
        center (2-list|2-tuple, optional): Optional center of rotation.
L
LielinJiang 已提交
765 766
            Origin is the upper left corner.
            Default is the center of the image.
767
        fill (3-list|3-tuple or int): RGB pixel fill value for area outside the rotated image.
I
Infinity_lee 已提交
768
            If int, it is used for all channels respectively. Default value is 0.
769

L
LielinJiang 已提交
770 771

    Returns:
772
        PIL.Image or np.array: Rotated image.
L
LielinJiang 已提交
773 774 775 776 777

    Examples:
        .. code-block:: python

            import numpy as np
778 779 780 781
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
782

783
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
784

785 786
            rotated_img = F.rotate(fake_img, 90)
            print(rotated_img.size)
L
LielinJiang 已提交
787 788

    """
789 790 791
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
792
        raise TypeError(
793 794 795 796
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
797

798 799 800 801 802
    if isinstance(center, list):
        center = tuple(center)
    if isinstance(fill, list):
        fill = tuple(fill)

803
    if _is_pil_image(img):
804
        return F_pil.rotate(img, angle, interpolation, expand, center, fill)
805 806
    elif _is_tensor_image(img):
        return F_t.rotate(img, angle, interpolation, expand, center, fill)
L
LielinJiang 已提交
807
    else:
808
        return F_cv2.rotate(img, angle, interpolation, expand, center, fill)
L
LielinJiang 已提交
809 810


811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
def _get_perspective_coeffs(startpoints, endpoints):
    """
    get coefficients (a, b, c, d, e, f, g, h) of the perspective transforms.

    In Perspective Transform each pixel (x, y) in the original image gets transformed as,
     (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )

    Args:
        startpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the original image,
        endpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the transformed image.

    Returns:
        output (list): octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
    """
    a_matrix = np.zeros((2 * len(startpoints), 8))

    for i, (p1, p2) in enumerate(zip(endpoints, startpoints)):
        a_matrix[2 * i, :] = [
829 830 831 832 833 834 835 836
            p1[0],
            p1[1],
            1,
            0,
            0,
            0,
            -p2[0] * p1[0],
            -p2[0] * p1[1],
837 838
        ]
        a_matrix[2 * i + 1, :] = [
839 840 841 842 843 844 845 846
            0,
            0,
            0,
            p1[0],
            p1[1],
            1,
            -p2[1] * p1[0],
            -p2[1] * p1[1],
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
        ]

    b_matrix = np.array(startpoints).reshape([8])
    res = np.linalg.lstsq(a_matrix, b_matrix)[0]

    output = list(res)
    return output


def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
    """Perform perspective transform of the given image.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): Image to be transformed.
        startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
            ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
        endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
            ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
865 866 867 868 869 870
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend.
            When use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
871
            - "bicubic": Image.BICUBIC
872 873 874
            When use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
            - "bicubic": cv2.INTER_CUBIC
        fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
            image. If given a number, the value is used for all bands respectively.

    Returns:
        PIL.Image|np.array|paddle.Tensor: transformed Image.

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.transforms import functional as F

            fake_img = paddle.randn((3, 256, 300)).astype(paddle.float32)

            startpoints = [[0, 0], [33, 0], [33, 25], [0, 25]]
            endpoints = [[3, 2], [32, 3], [30, 24], [2, 25]]

            perspectived_img = F.perspective(fake_img, startpoints, endpoints)
            print(perspectived_img.shape)

    """
897 898 899
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
900
        raise TypeError(
901 902 903 904
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
905 906 907 908 909 910 911 912

    if _is_pil_image(img):
        coeffs = _get_perspective_coeffs(startpoints, endpoints)
        return F_pil.perspective(img, coeffs, interpolation, fill)
    elif _is_tensor_image(img):
        coeffs = _get_perspective_coeffs(startpoints, endpoints)
        return F_t.perspective(img, coeffs, interpolation, fill)
    else:
913 914 915
        return F_cv2.perspective(
            img, startpoints, endpoints, interpolation, fill
        )
916 917


L
LielinJiang 已提交
918 919 920 921
def to_grayscale(img, num_output_channels=1):
    """Converts image to grayscale version of image.

    Args:
922
        img (PIL.Image|np.array): Image to be converted to grayscale.
I
Infinity_lee 已提交
923 924
        num_output_channels (int, optional): The number of channels for the output
            image. Single channel. Default: 1.
L
LielinJiang 已提交
925
    Returns:
926 927 928 929
        PIL.Image or np.array: Grayscale version of the image.
            if num_output_channels = 1 : returned image is single channel

            if num_output_channels = 3 : returned image is 3 channel with r = g = b
930

L
LielinJiang 已提交
931
    Examples:
932 933 934 935 936 937 938 939 940 941 942 943 944 945
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            gray_img = F.to_grayscale(fake_img)
            print(gray_img.size)

    """
946 947 948
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
949
        raise TypeError(
950 951 952 953
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
954 955 956

    if _is_pil_image(img):
        return F_pil.to_grayscale(img, num_output_channels)
957 958
    elif _is_tensor_image(img):
        return F_t.to_grayscale(img, num_output_channels)
959 960 961 962 963 964 965 966 967 968 969
    else:
        return F_cv2.to_grayscale(img, num_output_channels)


def normalize(img, mean, std, data_format='CHW', to_rgb=False):
    """Normalizes a tensor or image with mean and standard deviation.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): input data to be normalized.
        mean (list|tuple): Sequence of means for each channel.
        std (list|tuple): Sequence of standard deviations for each channel.
970
        data_format (str, optional): Data format of input img, should be 'HWC' or
971
            'CHW'. Default: 'CHW'.
972
        to_rgb (bool, optional): Whether to convert to rgb. If input is tensor,
973 974 975
            this option will be igored. Default: False.

    Returns:
L
LielinJiang 已提交
976
        np.ndarray or Tensor: Normalized mage. Data format is same as input img.
977

978
    Examples:
L
LielinJiang 已提交
979 980 981
        .. code-block:: python

            import numpy as np
982 983 984 985 986 987
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
988

989 990
            mean = [127.5, 127.5, 127.5]
            std = [127.5, 127.5, 127.5]
L
LielinJiang 已提交
991

992 993
            normalized_img = F.normalize(fake_img, mean, std, data_format='HWC')
            print(normalized_img.max(), normalized_img.min())
L
LielinJiang 已提交
994 995 996

    """

997 998
    if _is_tensor_image(img):
        return F_t.normalize(img, mean, std, data_format)
L
LielinJiang 已提交
999
    else:
1000 1001
        if _is_pil_image(img):
            img = np.array(img).astype(np.float32)
L
LielinJiang 已提交
1002

1003
        return F_cv2.normalize(img, mean, std, data_format, to_rgb)
1004 1005 1006 1007


def erase(img, i, j, h, w, v, inplace=False):
    """Erase the pixels of selected area in input image with given value.
1008

1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
    Args:
        img (paddle.Tensor | np.array | PIL.Image): input Tensor image.
             For Tensor input, the shape should be (C, H, W). For np.array input,
             the shape should be (H, W, C).
        i (int): y coordinate of the top-left point of erased region.
        j (int): x coordinate of the top-left point of erased region.
        h (int): Height of the erased region.
        w (int): Width of the erased region.
        v (paddle.Tensor | np.array): value used to replace the pixels in erased region. It
            should be np.array when img is np.array or PIL.Image.
        inplace (bool, optional): Whether this transform is inplace. Default: False.

    Returns:
        paddle.Tensor | np.array | PIL.Image: Erased image. The type is same with input image.

    Examples:
        .. code-block:: python

            import paddle

            fake_img = paddle.randn((3, 2, 4)).astype(paddle.float32)
            print(fake_img)

            #Tensor(shape=[3, 2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #       [[[ 0.02169025, -0.97859967, -1.39175487, -1.07478464],
            #         [ 0.20654772,  1.74624777,  0.32268861, -0.13857445]],
            #
            #        [[-0.14993843,  1.10793507, -0.40056887, -1.94395220],
            #         [ 0.41686651,  0.44551995, -0.09356714, -0.60898107]],
            #
            #        [[-0.24998808, -1.47699273, -0.88838995,  0.42629015],
            #         [ 0.56948012, -0.96200180,  0.53355658,  3.20450878]]])

            values = paddle.zeros((1,1,1), dtype=paddle.float32)
            result = paddle.vision.transforms.erase(fake_img, 0, 1, 1, 2, values)

            print(result)

            #Tensor(shape=[3, 2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #       [[[ 0.02169025,  0.        ,  0.        , -1.07478464],
            #         [ 0.20654772,  1.74624777,  0.32268861, -0.13857445]],
            #
            #         [[-0.14993843,  0.        ,  0.        , -1.94395220],
            #           [ 0.41686651,  0.44551995, -0.09356714, -0.60898107]],
            #
            #         [[-0.24998808,  0.        ,  0.        ,  0.42629015],
            #          [ 0.56948012, -0.96200180,  0.53355658,  3.20450878]]])
1056 1057 1058 1059 1060 1061 1062 1063

    """
    if _is_tensor_image(img):
        return F_t.erase(img, i, j, h, w, v, inplace=inplace)
    elif _is_pil_image(img):
        return F_pil.erase(img, i, j, h, w, v, inplace=inplace)
    else:
        return F_cv2.erase(img, i, j, h, w, v, inplace=inplace)