functional.py 35.8 KB
Newer Older
L
LielinJiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

L
LielinJiang 已提交
15 16
import math
import numbers
L
LielinJiang 已提交
17

18 19 20
import numpy as np
from PIL import Image
import paddle
L
LielinJiang 已提交
21

22 23 24
from . import functional_pil as F_pil
from . import functional_cv2 as F_cv2
from . import functional_tensor as F_t
L
LielinJiang 已提交
25

26
__all__ = []
L
LielinJiang 已提交
27

L
LielinJiang 已提交
28

29 30
def _is_pil_image(img):
    return isinstance(img, Image.Image)
L
LielinJiang 已提交
31 32


33 34
def _is_tensor_image(img):
    return isinstance(img, paddle.Tensor)
L
LielinJiang 已提交
35

36 37 38 39 40 41 42 43 44

def _is_numpy_image(img):
    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})


def to_tensor(pic, data_format='CHW'):
    """Converts a ``PIL.Image`` or ``numpy.ndarray`` to paddle.Tensor.

    See ``ToTensor`` for more details.
L
LielinJiang 已提交
45 46

    Args:
47
        pic (PIL.Image|np.ndarray): Image to be converted to tensor.
48
        data_format (str, optional): Data format of output tensor, should be 'HWC' or
49 50 51
            'CHW'. Default: 'CHW'.

    Returns:
L
LielinJiang 已提交
52
        Tensor: Converted image. Data type is same as input img.
L
LielinJiang 已提交
53 54 55 56 57

    Examples:
        .. code-block:: python

            import numpy as np
58
            from PIL import Image
59
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
60

61
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
62

63
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
64

65 66
            tensor = F.to_tensor(fake_img)
            print(tensor.shape)
L
LielinJiang 已提交
67 68

    """
69 70 71
    if not (
        _is_pil_image(pic) or _is_numpy_image(pic) or _is_tensor_image(pic)
    ):
72
        raise TypeError(
73 74 75 76
            'pic should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(pic)
            )
        )
77 78 79

    if _is_pil_image(pic):
        return F_pil.to_tensor(pic, data_format)
80
    elif _is_numpy_image(pic):
81
        return F_cv2.to_tensor(pic, data_format)
82 83
    else:
        return pic if data_format.lower() == 'chw' else pic.transpose((1, 2, 0))
L
LielinJiang 已提交
84 85


86
def resize(img, size, interpolation='bilinear'):
L
LielinJiang 已提交
87
    """
88
    Resizes the image to given size
L
LielinJiang 已提交
89 90

    Args:
91
        input (PIL.Image|np.ndarray): Image to be resized.
L
LielinJiang 已提交
92
        size (int|list|tuple): Target size of input data, with (height, width) shape.
93 94 95 96 97 98 99
        interpolation (int|str, optional): Interpolation method. when use pil backend,
            support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
            - "bicubic": Image.BICUBIC,
            - "box": Image.BOX,
            - "lanczos": Image.LANCZOS,
100
            - "hamming": Image.HAMMING
101 102 103 104 105
            when use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
            - "area": cv2.INTER_AREA,
            - "bicubic": cv2.INTER_CUBIC,
106 107 108 109
            - "lanczos": cv2.INTER_LANCZOS4

    Returns:
        PIL.Image or np.array: Resized image.
L
LielinJiang 已提交
110 111 112 113 114

    Examples:
        .. code-block:: python

            import numpy as np
115
            from PIL import Image
116
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
117

118
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
119

120
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
121

122 123
            converted_img = F.resize(fake_img, 224)
            print(converted_img.size)
124
            # (262, 224)
125 126 127

            converted_img = F.resize(fake_img, (200, 150))
            print(converted_img.size)
128
            # (150, 200)
L
LielinJiang 已提交
129
    """
130 131 132
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
133
        raise TypeError(
134 135 136 137
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
138 139 140

    if _is_pil_image(img):
        return F_pil.resize(img, size, interpolation)
141 142
    elif _is_tensor_image(img):
        return F_t.resize(img, size, interpolation)
L
LielinJiang 已提交
143
    else:
144
        return F_cv2.resize(img, size, interpolation)
L
LielinJiang 已提交
145 146


147 148 149
def pad(img, padding, fill=0, padding_mode='constant'):
    """
    Pads the given PIL.Image or numpy.array on all sides with specified padding mode and fill value.
L
LielinJiang 已提交
150 151

    Args:
152 153
        img (PIL.Image|np.array): Image to be padded.
        padding (int|list|tuple): Padding on each border. If a single int is provided this
154 155
            is used to pad all borders. If list/tuple of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a list/tuple of length 4 is provided
L
LielinJiang 已提交
156 157
            this is the padding for the left, top, right and bottom borders
            respectively.
158
        fill (float, optional): Pixel fill value for constant fill. If a tuple of
L
LielinJiang 已提交
159
            length 3, it is used to fill R, G, B channels respectively.
160
            This value is only used when the padding_mode is constant. Default: 0.
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default: 'constant'.

            - constant: pads with a constant value, this value is specified with fill

            - edge: pads with the last value on the edge of the image

            - reflect: pads with reflection of image (without repeating the last value on the edge)

                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
                       will result in [3, 2, 1, 2, 3, 4, 3, 2]

            - symmetric: pads with reflection of image (repeating the last value on the edge)

                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
L
LielinJiang 已提交
176 177

    Returns:
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
        PIL.Image or np.array: Padded image.

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            padded_img = F.pad(fake_img, padding=1)
            print(padded_img.size)

            padded_img = F.pad(fake_img, padding=(2, 1))
            print(padded_img.size)
    """
197 198 199
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
200
        raise TypeError(
201 202 203 204
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
205 206 207

    if _is_pil_image(img):
        return F_pil.pad(img, padding, fill, padding_mode)
208 209
    elif _is_tensor_image(img):
        return F_t.pad(img, padding, fill, padding_mode)
210 211 212 213 214 215 216 217
    else:
        return F_cv2.pad(img, padding, fill, padding_mode)


def crop(img, top, left, height, width):
    """Crops the given Image.

    Args:
218
        img (PIL.Image|np.array): Image to be cropped. (0,0) denotes the top left
219 220 221 222 223 224 225 226
            corner of the image.
        top (int): Vertical component of the top left corner of the crop box.
        left (int): Horizontal component of the top left corner of the crop box.
        height (int): Height of the crop box.
        width (int): Width of the crop box.

    Returns:
        PIL.Image or np.array: Cropped image.
L
LielinJiang 已提交
227 228 229 230 231

    Examples:
        .. code-block:: python

            import numpy as np
232 233
            from PIL import Image
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
234

235
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
236

237
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
238

239 240
            cropped_img = F.crop(fake_img, 56, 150, 200, 100)
            print(cropped_img.size)
L
LielinJiang 已提交
241 242

    """
243 244 245
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
246
        raise TypeError(
247 248 249 250
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
251 252 253

    if _is_pil_image(img):
        return F_pil.crop(img, top, left, height, width)
254 255
    elif _is_tensor_image(img):
        return F_t.crop(img, top, left, height, width)
256 257 258 259 260 261 262
    else:
        return F_cv2.crop(img, top, left, height, width)


def center_crop(img, output_size):
    """Crops the given Image and resize it to desired size.

263 264 265 266
    Args:
        img (PIL.Image|np.array): Image to be cropped. (0,0) denotes the top left corner of the image.
        output_size (sequence or int): (height, width) of the crop box. If int,
            it is used for both directions
267

268 269
    Returns:
        PIL.Image or np.array: Cropped image.
L
LielinJiang 已提交
270

271 272
    Examples:
    .. code-block:: python
273

274 275 276
        import numpy as np
        from PIL import Image
        from paddle.vision.transforms import functional as F
277

278
        fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
279

280
        fake_img = Image.fromarray(fake_img)
281

282 283 284 285 286 287
        cropped_img = F.center_crop(fake_img, (150, 100))
        print(cropped_img.size)
    """
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
288
        raise TypeError(
289 290 291 292
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
293 294 295

    if _is_pil_image(img):
        return F_pil.center_crop(img, output_size)
296 297
    elif _is_tensor_image(img):
        return F_t.center_crop(img, output_size)
298 299 300 301
    else:
        return F_cv2.center_crop(img, output_size)


L
LielinJiang 已提交
302
def hflip(img):
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
    """Horizontally flips the given Image or np.array.

    Args:
        img (PIL.Image|np.array): Image to be flipped.

    Returns:
        PIL.Image or np.array:  Horizontall flipped image.

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            flpped_img = F.hflip(fake_img)
            print(flpped_img.size)

    """
326 327 328
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
329
        raise TypeError(
330 331 332 333
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
334 335 336

    if _is_pil_image(img):
        return F_pil.hflip(img)
337 338
    elif _is_tensor_image(img):
        return F_t.hflip(img)
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
    else:
        return F_cv2.hflip(img)


def vflip(img):
    """Vertically flips the given Image or np.array.

    Args:
        img (PIL.Image|np.array): Image to be flipped.

    Returns:
        PIL.Image or np.array:  Vertically flipped image.

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            flpped_img = F.vflip(fake_img)
            print(flpped_img.size)

    """
367 368 369
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
370
        raise TypeError(
371 372 373 374
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
375 376 377

    if _is_pil_image(img):
        return F_pil.vflip(img)
378 379
    elif _is_tensor_image(img):
        return F_t.vflip(img)
380 381 382 383 384 385 386 387
    else:
        return F_cv2.vflip(img)


def adjust_brightness(img, brightness_factor):
    """Adjusts brightness of an Image.

    Args:
J
JYChen 已提交
388
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
389 390 391 392 393
        brightness_factor (float): How much to adjust the brightness. Can be
            any non negative number. 0 gives a black image, 1 gives the
            original image while 2 increases the brightness by a factor of 2.

    Returns:
J
JYChen 已提交
394
        PIL.Image|np.array|paddle.Tensor: Brightness adjusted image.
395 396 397

    Examples:
        .. code-block:: python
398
           :name: code-example1
399 400 401 402 403 404 405 406

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)
407 408 409 410 411 412
            print(fake_img.size) # (300, 256)
            print(fake_img.load()[1,1]) # (95, 127, 202)
            converted_img = F.adjust_brightness(fake_img, 0.5)
            print(converted_img.size) # (300, 256)
            print(converted_img.load()[1,1]) # (47, 63, 101)

413 414

    """
415 416 417
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
418
        raise TypeError(
419 420 421 422
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
423 424 425

    if _is_pil_image(img):
        return F_pil.adjust_brightness(img, brightness_factor)
J
JYChen 已提交
426
    elif _is_numpy_image(img):
427
        return F_cv2.adjust_brightness(img, brightness_factor)
J
JYChen 已提交
428 429
    else:
        return F_t.adjust_brightness(img, brightness_factor)
430 431 432 433 434 435


def adjust_contrast(img, contrast_factor):
    """Adjusts contrast of an Image.

    Args:
J
JYChen 已提交
436
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
437 438 439 440 441
        contrast_factor (float): How much to adjust the contrast. Can be any
            non negative number. 0 gives a solid gray image, 1 gives the
            original image while 2 increases the contrast by a factor of 2.

    Returns:
J
JYChen 已提交
442
        PIL.Image|np.array|paddle.Tensor: Contrast adjusted image.
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_contrast(fake_img, 0.4)
            print(converted_img.size)
    """
458 459 460
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
461
        raise TypeError(
462 463 464 465
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
466 467 468

    if _is_pil_image(img):
        return F_pil.adjust_contrast(img, contrast_factor)
J
JYChen 已提交
469
    elif _is_numpy_image(img):
470
        return F_cv2.adjust_contrast(img, contrast_factor)
J
JYChen 已提交
471 472
    else:
        return F_t.adjust_contrast(img, contrast_factor)
473 474 475 476 477 478


def adjust_saturation(img, saturation_factor):
    """Adjusts color saturation of an image.

    Args:
J
JYChen 已提交
479
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
480 481 482 483 484
        saturation_factor (float):  How much to adjust the saturation. 0 will
            give a black and white image, 1 will give the original image while
            2 will enhance the saturation by a factor of 2.

    Returns:
J
JYChen 已提交
485
        PIL.Image|np.array|paddle.Tensor: Saturation adjusted image.
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_saturation(fake_img, 0.4)
            print(converted_img.size)

    """
502 503 504
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
505
        raise TypeError(
506 507 508 509
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
510 511 512

    if _is_pil_image(img):
        return F_pil.adjust_saturation(img, saturation_factor)
J
JYChen 已提交
513
    elif _is_numpy_image(img):
514
        return F_cv2.adjust_saturation(img, saturation_factor)
J
JYChen 已提交
515 516
    else:
        return F_t.adjust_saturation(img, saturation_factor)
517 518 519 520 521 522 523 524 525 526 527 528 529


def adjust_hue(img, hue_factor):
    """Adjusts hue of an image.

    The image hue is adjusted by converting the image to HSV and
    cyclically shifting the intensities in the hue channel (H).
    The image is then converted back to original image mode.

    `hue_factor` is the amount of shift in H channel and must be in the
    interval `[-0.5, 0.5]`.

    Args:
J
JYChen 已提交
530
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
531 532 533 534 535 536 537
        hue_factor (float):  How much to shift the hue channel. Should be in
            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
            HSV space in positive and negative direction respectively.
            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
            with complementary colors while 0 gives the original image.

    Returns:
J
JYChen 已提交
538
        PIL.Image|np.array|paddle.Tensor: Hue adjusted image.
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_hue(fake_img, 0.4)
            print(converted_img.size)

    """
555 556 557
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
558
        raise TypeError(
559 560 561 562
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
563 564 565

    if _is_pil_image(img):
        return F_pil.adjust_hue(img, hue_factor)
J
JYChen 已提交
566
    elif _is_numpy_image(img):
567
        return F_cv2.adjust_hue(img, hue_factor)
J
JYChen 已提交
568 569
    else:
        return F_t.adjust_hue(img, hue_factor)
570 571


572 573 574 575 576 577 578
def _get_affine_matrix(center, angle, translate, scale, shear):
    # Affine matrix is : M = T * C * RotateScaleShear * C^-1
    # Ihe inverse one is : M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1
    rot = math.radians(angle)
    sx = math.radians(shear[0])
    sy = math.radians(shear[1])

579
    # Rotate and Shear without scaling
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
    a = math.cos(rot - sy) / math.cos(sy)
    b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
    c = math.sin(rot - sy) / math.cos(sy)
    d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)

    # Center Translation
    cx, cy = center
    tx, ty = translate

    # Inverted rotation matrix with scale and shear
    # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
    matrix = [d, -b, 0.0, -c, a, 0.0]
    matrix = [x / scale for x in matrix]
    # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
    matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
    matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
    # Apply center translation: C * RSS^-1 * C^-1 * T^-1
    matrix[2] += cx
    matrix[5] += cy

    return matrix


603 604 605 606 607 608 609 610 611 612
def affine(
    img,
    angle,
    translate,
    scale,
    shear,
    interpolation="nearest",
    fill=0,
    center=None,
):
613 614 615 616 617 618 619 620
    """Apply affine transformation on the image.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): Image to be affined.
        angle (int|float): The angle of the random rotation in clockwise order.
        translate (list[float]): Maximum absolute fraction for horizontal and vertical translations.
        scale (float): Scale factor for the image, scale should be positive.
        shear (list[float]): Shear angle values which are parallel to the x-axis and y-axis in clockwise order.
621 622 623 624 625 626
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend.
            When use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
627
            - "bicubic": Image.BICUBIC
628 629 630
            When use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
            - "bicubic": cv2.INTER_CUBIC
        fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
            image. If given a number, the value is used for all bands respectively.
        center (2-tuple, optional): Optional center of rotation, (x, y).
            Origin is the upper left corner.
            Default is the center of the image.

    Returns:
        PIL.Image|np.array|paddle.Tensor: Affine Transformed image.

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.transforms import functional as F

            fake_img = paddle.randn((3, 256, 300)).astype(paddle.float32)

            affined_img = F.affine(fake_img, 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10])
            print(affined_img.shape)
    """

653 654 655
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
656
        raise TypeError(
657 658 659 660
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675

    if not isinstance(angle, (int, float)):
        raise TypeError("Argument angle should be int or float")

    if not isinstance(translate, (list, tuple)):
        raise TypeError("Argument translate should be a sequence")

    if len(translate) != 2:
        raise ValueError("Argument translate should be a sequence of length 2")

    if scale <= 0.0:
        raise ValueError("Argument scale should be positive")

    if not isinstance(shear, (numbers.Number, (list, tuple))):
        raise TypeError(
676 677
            "Shear should be either a single value or a sequence of two values"
        )
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698

    if not isinstance(interpolation, str):
        raise TypeError("Argument interpolation should be a string")

    if isinstance(angle, int):
        angle = float(angle)

    if isinstance(translate, tuple):
        translate = list(translate)

    if isinstance(shear, numbers.Number):
        shear = [shear, 0.0]

    if isinstance(shear, tuple):
        shear = list(shear)

    if len(shear) == 1:
        shear = [shear[0], shear[0]]

    if len(shear) != 2:
        raise ValueError(
699 700
            f"Shear should be a sequence containing two values. Got {shear}"
        )
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722

    if center is not None and not isinstance(center, (list, tuple)):
        raise TypeError("Argument center should be a sequence")

    if _is_pil_image(img):
        width, height = img.size
        # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        # it is visually better to estimate the center without 0.5 offset
        # otherwise image rotated by 90 degrees is shifted vs output image of F_t.affine
        if center is None:
            center = [width * 0.5, height * 0.5]
        matrix = _get_affine_matrix(center, angle, translate, scale, shear)
        return F_pil.affine(img, matrix, interpolation, fill)

    if _is_numpy_image(img):
        # get affine_matrix in F_cv2.affine() using cv2's functions
        width, height = img.shape[0:2]
        # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        # it is visually better to estimate the center without 0.5 offset
        # otherwise image rotated by 90 degrees is shifted vs output image of F_t.affine
        if center is None:
            center = (width * 0.5, height * 0.5)
723 724 725
        return F_cv2.affine(
            img, angle, translate, scale, shear, interpolation, fill, center
        )
726 727 728 729 730 731 732 733 734 735 736 737 738 739

    if _is_tensor_image(img):
        center_f = [0.0, 0.0]
        if center is not None:
            height, width = img.shape[-1], img.shape[-2]
            # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
            center_f = [
                1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])
            ]
        translate_f = [1.0 * t for t in translate]
        matrix = _get_affine_matrix(center_f, angle, translate_f, scale, shear)
        return F_t.affine(img, matrix, interpolation, fill)


740 741 742
def rotate(
    img, angle, interpolation="nearest", expand=False, center=None, fill=0
):
L
LielinJiang 已提交
743 744
    """Rotates the image by angle.

745

L
LielinJiang 已提交
746
    Args:
747 748
        img (PIL.Image|np.array): Image to be rotated.
        angle (float or int): In degrees degrees counter clockwise order.
749 750 751 752 753
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend. when use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
754
            - "bicubic": Image.BICUBIC
755 756 757
            when use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
758 759
            - "bicubic": cv2.INTER_CUBIC
        expand (bool, optional): Optional expansion flag.
L
LielinJiang 已提交
760 761 762
            If true, expands the output image to make it large enough to hold the entire rotated image.
            If false or omitted, make the output image the same size as the input image.
            Note that the expand flag assumes rotation around the center and no translation.
763
        center (2-list|2-tuple, optional): Optional center of rotation.
L
LielinJiang 已提交
764 765
            Origin is the upper left corner.
            Default is the center of the image.
766
        fill (3-list|3-tuple or int): RGB pixel fill value for area outside the rotated image.
767 768
            If int, it is used for all channels respectively.

L
LielinJiang 已提交
769 770

    Returns:
771
        PIL.Image or np.array: Rotated image.
L
LielinJiang 已提交
772 773 774 775 776

    Examples:
        .. code-block:: python

            import numpy as np
777 778 779 780
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
781

782
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
783

784 785
            rotated_img = F.rotate(fake_img, 90)
            print(rotated_img.size)
L
LielinJiang 已提交
786 787

    """
788 789 790
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
791
        raise TypeError(
792 793 794 795
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
796

797 798 799 800 801
    if isinstance(center, list):
        center = tuple(center)
    if isinstance(fill, list):
        fill = tuple(fill)

802
    if _is_pil_image(img):
803
        return F_pil.rotate(img, angle, interpolation, expand, center, fill)
804 805
    elif _is_tensor_image(img):
        return F_t.rotate(img, angle, interpolation, expand, center, fill)
L
LielinJiang 已提交
806
    else:
807
        return F_cv2.rotate(img, angle, interpolation, expand, center, fill)
L
LielinJiang 已提交
808 809


810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
def _get_perspective_coeffs(startpoints, endpoints):
    """
    get coefficients (a, b, c, d, e, f, g, h) of the perspective transforms.

    In Perspective Transform each pixel (x, y) in the original image gets transformed as,
     (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )

    Args:
        startpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the original image,
        endpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the transformed image.

    Returns:
        output (list): octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
    """
    a_matrix = np.zeros((2 * len(startpoints), 8))

    for i, (p1, p2) in enumerate(zip(endpoints, startpoints)):
        a_matrix[2 * i, :] = [
828 829 830 831 832 833 834 835
            p1[0],
            p1[1],
            1,
            0,
            0,
            0,
            -p2[0] * p1[0],
            -p2[0] * p1[1],
836 837
        ]
        a_matrix[2 * i + 1, :] = [
838 839 840 841 842 843 844 845
            0,
            0,
            0,
            p1[0],
            p1[1],
            1,
            -p2[1] * p1[0],
            -p2[1] * p1[1],
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863
        ]

    b_matrix = np.array(startpoints).reshape([8])
    res = np.linalg.lstsq(a_matrix, b_matrix)[0]

    output = list(res)
    return output


def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
    """Perform perspective transform of the given image.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): Image to be transformed.
        startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
            ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
        endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
            ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
864 865 866 867 868 869
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend.
            When use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
870
            - "bicubic": Image.BICUBIC
871 872 873
            When use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
            - "bicubic": cv2.INTER_CUBIC
        fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
            image. If given a number, the value is used for all bands respectively.

    Returns:
        PIL.Image|np.array|paddle.Tensor: transformed Image.

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.transforms import functional as F

            fake_img = paddle.randn((3, 256, 300)).astype(paddle.float32)

            startpoints = [[0, 0], [33, 0], [33, 25], [0, 25]]
            endpoints = [[3, 2], [32, 3], [30, 24], [2, 25]]

            perspectived_img = F.perspective(fake_img, startpoints, endpoints)
            print(perspectived_img.shape)

    """
896 897 898
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
899
        raise TypeError(
900 901 902 903
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
904 905 906 907 908 909 910 911

    if _is_pil_image(img):
        coeffs = _get_perspective_coeffs(startpoints, endpoints)
        return F_pil.perspective(img, coeffs, interpolation, fill)
    elif _is_tensor_image(img):
        coeffs = _get_perspective_coeffs(startpoints, endpoints)
        return F_t.perspective(img, coeffs, interpolation, fill)
    else:
912 913 914
        return F_cv2.perspective(
            img, startpoints, endpoints, interpolation, fill
        )
915 916


L
LielinJiang 已提交
917 918 919 920
def to_grayscale(img, num_output_channels=1):
    """Converts image to grayscale version of image.

    Args:
921
        img (PIL.Image|np.array): Image to be converted to grayscale.
L
LielinJiang 已提交
922 923

    Returns:
924 925 926 927
        PIL.Image or np.array: Grayscale version of the image.
            if num_output_channels = 1 : returned image is single channel

            if num_output_channels = 3 : returned image is 3 channel with r = g = b
928

L
LielinJiang 已提交
929
    Examples:
930 931 932 933 934 935 936 937 938 939 940 941 942 943
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            gray_img = F.to_grayscale(fake_img)
            print(gray_img.size)

    """
944 945 946
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
947
        raise TypeError(
948 949 950 951
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
952 953 954

    if _is_pil_image(img):
        return F_pil.to_grayscale(img, num_output_channels)
955 956
    elif _is_tensor_image(img):
        return F_t.to_grayscale(img, num_output_channels)
957 958 959 960 961 962 963 964 965 966 967
    else:
        return F_cv2.to_grayscale(img, num_output_channels)


def normalize(img, mean, std, data_format='CHW', to_rgb=False):
    """Normalizes a tensor or image with mean and standard deviation.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): input data to be normalized.
        mean (list|tuple): Sequence of means for each channel.
        std (list|tuple): Sequence of standard deviations for each channel.
968
        data_format (str, optional): Data format of input img, should be 'HWC' or
969
            'CHW'. Default: 'CHW'.
970
        to_rgb (bool, optional): Whether to convert to rgb. If input is tensor,
971 972 973
            this option will be igored. Default: False.

    Returns:
L
LielinJiang 已提交
974
        np.ndarray or Tensor: Normalized mage. Data format is same as input img.
975

976
    Examples:
L
LielinJiang 已提交
977 978 979
        .. code-block:: python

            import numpy as np
980 981 982 983 984 985
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
986

987 988
            mean = [127.5, 127.5, 127.5]
            std = [127.5, 127.5, 127.5]
L
LielinJiang 已提交
989

990 991
            normalized_img = F.normalize(fake_img, mean, std, data_format='HWC')
            print(normalized_img.max(), normalized_img.min())
L
LielinJiang 已提交
992 993 994

    """

995 996
    if _is_tensor_image(img):
        return F_t.normalize(img, mean, std, data_format)
L
LielinJiang 已提交
997
    else:
998 999
        if _is_pil_image(img):
            img = np.array(img).astype(np.float32)
L
LielinJiang 已提交
1000

1001
        return F_cv2.normalize(img, mean, std, data_format, to_rgb)
1002 1003 1004 1005


def erase(img, i, j, h, w, v, inplace=False):
    """Erase the pixels of selected area in input image with given value.
1006

1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
    Args:
        img (paddle.Tensor | np.array | PIL.Image): input Tensor image.
             For Tensor input, the shape should be (C, H, W). For np.array input,
             the shape should be (H, W, C).
        i (int): y coordinate of the top-left point of erased region.
        j (int): x coordinate of the top-left point of erased region.
        h (int): Height of the erased region.
        w (int): Width of the erased region.
        v (paddle.Tensor | np.array): value used to replace the pixels in erased region. It
            should be np.array when img is np.array or PIL.Image.
        inplace (bool, optional): Whether this transform is inplace. Default: False.

    Returns:
        paddle.Tensor | np.array | PIL.Image: Erased image. The type is same with input image.

    Examples:
        .. code-block:: python

            import paddle

            fake_img = paddle.randn((3, 2, 4)).astype(paddle.float32)
            print(fake_img)

            #Tensor(shape=[3, 2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #       [[[ 0.02169025, -0.97859967, -1.39175487, -1.07478464],
            #         [ 0.20654772,  1.74624777,  0.32268861, -0.13857445]],
            #
            #        [[-0.14993843,  1.10793507, -0.40056887, -1.94395220],
            #         [ 0.41686651,  0.44551995, -0.09356714, -0.60898107]],
            #
            #        [[-0.24998808, -1.47699273, -0.88838995,  0.42629015],
            #         [ 0.56948012, -0.96200180,  0.53355658,  3.20450878]]])

            values = paddle.zeros((1,1,1), dtype=paddle.float32)
            result = paddle.vision.transforms.erase(fake_img, 0, 1, 1, 2, values)

            print(result)

            #Tensor(shape=[3, 2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #       [[[ 0.02169025,  0.        ,  0.        , -1.07478464],
            #         [ 0.20654772,  1.74624777,  0.32268861, -0.13857445]],
            #
            #         [[-0.14993843,  0.        ,  0.        , -1.94395220],
            #           [ 0.41686651,  0.44551995, -0.09356714, -0.60898107]],
            #
            #         [[-0.24998808,  0.        ,  0.        ,  0.42629015],
            #          [ 0.56948012, -0.96200180,  0.53355658,  3.20450878]]])
1054 1055 1056 1057 1058 1059 1060 1061

    """
    if _is_tensor_image(img):
        return F_t.erase(img, i, j, h, w, v, inplace=inplace)
    elif _is_pil_image(img):
        return F_pil.erase(img, i, j, h, w, v, inplace=inplace)
    else:
        return F_cv2.erase(img, i, j, h, w, v, inplace=inplace)