functional.py 37.0 KB
Newer Older
L
LielinJiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

L
LielinJiang 已提交
15 16
import math
import numbers
L
LielinJiang 已提交
17

18 19
import numpy as np
from PIL import Image
20

21
import paddle
L
LielinJiang 已提交
22

23
from ...fluid.framework import Variable
24
from . import functional_cv2 as F_cv2
25
from . import functional_pil as F_pil
26
from . import functional_tensor as F_t
L
LielinJiang 已提交
27

28
__all__ = []
L
LielinJiang 已提交
29

L
LielinJiang 已提交
30

31 32
def _is_pil_image(img):
    return isinstance(img, Image.Image)
L
LielinJiang 已提交
33 34


35
def _is_tensor_image(img):
36
    """
37
    Return True if img is a Tensor for dynamic mode or Variable for static graph mode.
38 39
    """
    return isinstance(img, (paddle.Tensor, Variable))
L
LielinJiang 已提交
40

41 42 43 44 45 46 47 48

def _is_numpy_image(img):
    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})


def to_tensor(pic, data_format='CHW'):
    """Converts a ``PIL.Image`` or ``numpy.ndarray`` to paddle.Tensor.

I
Infinity_lee 已提交
49 50 51 52 53 54 55 56 57 58 59 60
    Converts a PIL.Image or numpy.ndarray (H x W x C) to a paddle.Tensor of shape (C x H x W).

    If input is a grayscale image (H x W), it will be converted to an image of shape (H x W x 1).
    And the shape of output tensor will be (1 x H x W).

    If you want to keep the shape of output tensor as (H x W x C), you can set data_format = ``HWC`` .

    Converts a PIL.Image or numpy.ndarray in the range [0, 255] to a paddle.Tensor in the
    range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr,
    RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8.

    In the other cases, tensors are returned without scaling.
L
LielinJiang 已提交
61 62

    Args:
63
        pic (PIL.Image|np.ndarray): Image to be converted to tensor.
64
        data_format (str, optional): Data format of output tensor, should be 'HWC' or
65 66 67
            'CHW'. Default: 'CHW'.

    Returns:
L
LielinJiang 已提交
68
        Tensor: Converted image. Data type is same as input img.
L
LielinJiang 已提交
69 70 71 72 73

    Examples:
        .. code-block:: python

            import numpy as np
74
            from PIL import Image
75
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
76

77
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
78

79
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
80

81 82
            tensor = F.to_tensor(fake_img)
            print(tensor.shape)
L
LielinJiang 已提交
83 84

    """
85 86 87
    if not (
        _is_pil_image(pic) or _is_numpy_image(pic) or _is_tensor_image(pic)
    ):
88
        raise TypeError(
89 90 91 92
            'pic should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(pic)
            )
        )
93 94 95

    if _is_pil_image(pic):
        return F_pil.to_tensor(pic, data_format)
96
    elif _is_numpy_image(pic):
97
        return F_cv2.to_tensor(pic, data_format)
98 99
    else:
        return pic if data_format.lower() == 'chw' else pic.transpose((1, 2, 0))
L
LielinJiang 已提交
100 101


102
def resize(img, size, interpolation='bilinear'):
L
LielinJiang 已提交
103
    """
104
    Resizes the image to given size
L
LielinJiang 已提交
105 106

    Args:
107
        input (PIL.Image|np.ndarray|paddle.Tensor): Image to be resized.
L
LielinJiang 已提交
108
        size (int|list|tuple): Target size of input data, with (height, width) shape.
109 110 111 112 113 114 115
        interpolation (int|str, optional): Interpolation method. when use pil backend,
            support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
            - "bicubic": Image.BICUBIC,
            - "box": Image.BOX,
            - "lanczos": Image.LANCZOS,
116
            - "hamming": Image.HAMMING
117 118 119 120 121
            when use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
            - "area": cv2.INTER_AREA,
            - "bicubic": cv2.INTER_CUBIC,
122 123 124
            - "lanczos": cv2.INTER_LANCZOS4

    Returns:
125
        PIL.Image|np.array|paddle.Tensor: Resized image.
L
LielinJiang 已提交
126 127 128 129 130

    Examples:
        .. code-block:: python

            import numpy as np
131
            from PIL import Image
132
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
133

134
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
135

136
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
137

138 139
            converted_img = F.resize(fake_img, 224)
            print(converted_img.size)
140
            # (262, 224)
141 142 143

            converted_img = F.resize(fake_img, (200, 150))
            print(converted_img.size)
144
            # (150, 200)
L
LielinJiang 已提交
145
    """
146 147 148
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
149
        raise TypeError(
150 151 152 153
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
154 155 156

    if _is_pil_image(img):
        return F_pil.resize(img, size, interpolation)
157 158
    elif _is_tensor_image(img):
        return F_t.resize(img, size, interpolation)
L
LielinJiang 已提交
159
    else:
160
        return F_cv2.resize(img, size, interpolation)
L
LielinJiang 已提交
161 162


163 164
def pad(img, padding, fill=0, padding_mode='constant'):
    """
165
    Pads the given PIL.Image or numpy.array or paddle.Tensor on all sides with specified padding mode and fill value.
L
LielinJiang 已提交
166 167

    Args:
168
        img (PIL.Image|np.array|paddle.Tensor): Image to be padded.
169
        padding (int|list|tuple): Padding on each border. If a single int is provided this
170 171
            is used to pad all borders. If list/tuple of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a list/tuple of length 4 is provided
L
LielinJiang 已提交
172 173
            this is the padding for the left, top, right and bottom borders
            respectively.
174
        fill (float, optional): Pixel fill value for constant fill. If a tuple of
L
LielinJiang 已提交
175
            length 3, it is used to fill R, G, B channels respectively.
176
            This value is only used when the padding_mode is constant. Default: 0.
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default: 'constant'.

            - constant: pads with a constant value, this value is specified with fill

            - edge: pads with the last value on the edge of the image

            - reflect: pads with reflection of image (without repeating the last value on the edge)

                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
                       will result in [3, 2, 1, 2, 3, 4, 3, 2]

            - symmetric: pads with reflection of image (repeating the last value on the edge)

                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
L
LielinJiang 已提交
192 193

    Returns:
194
        PIL.Image|np.array|paddle.Tensor: Padded image.
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            padded_img = F.pad(fake_img, padding=1)
            print(padded_img.size)

            padded_img = F.pad(fake_img, padding=(2, 1))
            print(padded_img.size)
    """
213 214 215
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
216
        raise TypeError(
217 218 219 220
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
221 222 223

    if _is_pil_image(img):
        return F_pil.pad(img, padding, fill, padding_mode)
224 225
    elif _is_tensor_image(img):
        return F_t.pad(img, padding, fill, padding_mode)
226 227 228 229 230 231 232 233
    else:
        return F_cv2.pad(img, padding, fill, padding_mode)


def crop(img, top, left, height, width):
    """Crops the given Image.

    Args:
234
        img (PIL.Image|np.array|paddle.Tensor): Image to be cropped. (0,0) denotes the top left
235 236 237 238 239 240 241
            corner of the image.
        top (int): Vertical component of the top left corner of the crop box.
        left (int): Horizontal component of the top left corner of the crop box.
        height (int): Height of the crop box.
        width (int): Width of the crop box.

    Returns:
242
        PIL.Image|np.array|paddle.Tensor: Cropped image.
L
LielinJiang 已提交
243 244 245 246 247

    Examples:
        .. code-block:: python

            import numpy as np
248 249
            from PIL import Image
            from paddle.vision.transforms import functional as F
L
LielinJiang 已提交
250

251
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
252

253
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
254

255 256
            cropped_img = F.crop(fake_img, 56, 150, 200, 100)
            print(cropped_img.size)
L
LielinJiang 已提交
257 258

    """
259 260 261
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
262
        raise TypeError(
263 264 265 266
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
267 268 269

    if _is_pil_image(img):
        return F_pil.crop(img, top, left, height, width)
270 271
    elif _is_tensor_image(img):
        return F_t.crop(img, top, left, height, width)
272 273 274 275 276 277 278
    else:
        return F_cv2.crop(img, top, left, height, width)


def center_crop(img, output_size):
    """Crops the given Image and resize it to desired size.

279
    Args:
280
        img (PIL.Image|np.array|paddle.Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
281 282
        output_size (sequence or int): (height, width) of the crop box. If int,
            it is used for both directions
283

284
    Returns:
285
        PIL.Image|np.array|paddle.Tensor: Cropped image.
L
LielinJiang 已提交
286

287
    Examples:
I
Infinity_lee 已提交
288
        .. code-block:: python
289

I
Infinity_lee 已提交
290 291 292
            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F
293

I
Infinity_lee 已提交
294
            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
295

I
Infinity_lee 已提交
296
            fake_img = Image.fromarray(fake_img)
297

I
Infinity_lee 已提交
298 299
            cropped_img = F.center_crop(fake_img, (150, 100))
            print(cropped_img.size)
300 301 302 303
    """
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
304
        raise TypeError(
305 306 307 308
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
309 310 311

    if _is_pil_image(img):
        return F_pil.center_crop(img, output_size)
312 313
    elif _is_tensor_image(img):
        return F_t.center_crop(img, output_size)
314 315 316 317
    else:
        return F_cv2.center_crop(img, output_size)


L
LielinJiang 已提交
318
def hflip(img):
319
    """Horizontally flips the given Image or np.array or paddle.Tensor.
320 321

    Args:
322
        img (PIL.Image|np.array|Tensor): Image to be flipped.
323 324

    Returns:
325
        PIL.Image|np.array|paddle.Tensor:  Horizontall flipped image.
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            flpped_img = F.hflip(fake_img)
            print(flpped_img.size)

    """
342 343 344
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
345
        raise TypeError(
346 347 348 349
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
350 351 352

    if _is_pil_image(img):
        return F_pil.hflip(img)
353 354
    elif _is_tensor_image(img):
        return F_t.hflip(img)
355 356 357 358 359
    else:
        return F_cv2.hflip(img)


def vflip(img):
360
    """Vertically flips the given Image or np.array or paddle.Tensor.
361 362

    Args:
363
        img (PIL.Image|np.array|paddle.Tensor): Image to be flipped.
364 365

    Returns:
366
        PIL.Image|np.array|paddle.Tensor:  Vertically flipped image.
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            flpped_img = F.vflip(fake_img)
            print(flpped_img.size)

    """
383 384 385
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
386
        raise TypeError(
387 388 389 390
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
391 392 393

    if _is_pil_image(img):
        return F_pil.vflip(img)
394 395
    elif _is_tensor_image(img):
        return F_t.vflip(img)
396 397 398 399 400 401 402 403
    else:
        return F_cv2.vflip(img)


def adjust_brightness(img, brightness_factor):
    """Adjusts brightness of an Image.

    Args:
J
JYChen 已提交
404
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
405 406 407 408 409
        brightness_factor (float): How much to adjust the brightness. Can be
            any non negative number. 0 gives a black image, 1 gives the
            original image while 2 increases the brightness by a factor of 2.

    Returns:
J
JYChen 已提交
410
        PIL.Image|np.array|paddle.Tensor: Brightness adjusted image.
411 412 413

    Examples:
        .. code-block:: python
414
           :name: code-example1
415 416 417 418 419 420 421 422

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)
423 424 425 426 427 428
            print(fake_img.size) # (300, 256)
            print(fake_img.load()[1,1]) # (95, 127, 202)
            converted_img = F.adjust_brightness(fake_img, 0.5)
            print(converted_img.size) # (300, 256)
            print(converted_img.load()[1,1]) # (47, 63, 101)

429 430

    """
431 432 433
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
434
        raise TypeError(
435 436 437 438
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
439 440 441

    if _is_pil_image(img):
        return F_pil.adjust_brightness(img, brightness_factor)
J
JYChen 已提交
442
    elif _is_numpy_image(img):
K
Kim 已提交
443
        return F_cv2.adjust_brightness(img.astype(np.uint8), brightness_factor)
J
JYChen 已提交
444 445
    else:
        return F_t.adjust_brightness(img, brightness_factor)
446 447 448 449 450 451


def adjust_contrast(img, contrast_factor):
    """Adjusts contrast of an Image.

    Args:
J
JYChen 已提交
452
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
453 454 455 456 457
        contrast_factor (float): How much to adjust the contrast. Can be any
            non negative number. 0 gives a solid gray image, 1 gives the
            original image while 2 increases the contrast by a factor of 2.

    Returns:
J
JYChen 已提交
458
        PIL.Image|np.array|paddle.Tensor: Contrast adjusted image.
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_contrast(fake_img, 0.4)
            print(converted_img.size)
    """
474 475 476
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
477
        raise TypeError(
478 479 480 481
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
482 483 484

    if _is_pil_image(img):
        return F_pil.adjust_contrast(img, contrast_factor)
J
JYChen 已提交
485
    elif _is_numpy_image(img):
486
        return F_cv2.adjust_contrast(img, contrast_factor)
J
JYChen 已提交
487 488
    else:
        return F_t.adjust_contrast(img, contrast_factor)
489 490 491 492 493 494


def adjust_saturation(img, saturation_factor):
    """Adjusts color saturation of an image.

    Args:
J
JYChen 已提交
495
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
496 497 498 499 500
        saturation_factor (float):  How much to adjust the saturation. 0 will
            give a black and white image, 1 will give the original image while
            2 will enhance the saturation by a factor of 2.

    Returns:
J
JYChen 已提交
501
        PIL.Image|np.array|paddle.Tensor: Saturation adjusted image.
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_saturation(fake_img, 0.4)
            print(converted_img.size)

    """
518 519 520
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
521
        raise TypeError(
522 523 524 525
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
526 527 528

    if _is_pil_image(img):
        return F_pil.adjust_saturation(img, saturation_factor)
J
JYChen 已提交
529
    elif _is_numpy_image(img):
530
        return F_cv2.adjust_saturation(img, saturation_factor)
J
JYChen 已提交
531 532
    else:
        return F_t.adjust_saturation(img, saturation_factor)
533 534 535 536 537 538 539 540 541 542 543 544 545


def adjust_hue(img, hue_factor):
    """Adjusts hue of an image.

    The image hue is adjusted by converting the image to HSV and
    cyclically shifting the intensities in the hue channel (H).
    The image is then converted back to original image mode.

    `hue_factor` is the amount of shift in H channel and must be in the
    interval `[-0.5, 0.5]`.

    Args:
J
JYChen 已提交
546
        img (PIL.Image|np.array|paddle.Tensor): Image to be adjusted.
547 548 549 550 551 552 553
        hue_factor (float):  How much to shift the hue channel. Should be in
            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
            HSV space in positive and negative direction respectively.
            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
            with complementary colors while 0 gives the original image.

    Returns:
J
JYChen 已提交
554
        PIL.Image|np.array|paddle.Tensor: Hue adjusted image.
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570

    Examples:
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            converted_img = F.adjust_hue(fake_img, 0.4)
            print(converted_img.size)

    """
571 572 573
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
574
        raise TypeError(
575 576 577 578
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
579 580 581

    if _is_pil_image(img):
        return F_pil.adjust_hue(img, hue_factor)
J
JYChen 已提交
582
    elif _is_numpy_image(img):
583
        return F_cv2.adjust_hue(img, hue_factor)
J
JYChen 已提交
584 585
    else:
        return F_t.adjust_hue(img, hue_factor)
586 587


588 589 590 591 592 593 594
def _get_affine_matrix(center, angle, translate, scale, shear):
    # Affine matrix is : M = T * C * RotateScaleShear * C^-1
    # Ihe inverse one is : M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1
    rot = math.radians(angle)
    sx = math.radians(shear[0])
    sy = math.radians(shear[1])

595
    # Rotate and Shear without scaling
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
    a = math.cos(rot - sy) / math.cos(sy)
    b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
    c = math.sin(rot - sy) / math.cos(sy)
    d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)

    # Center Translation
    cx, cy = center
    tx, ty = translate

    # Inverted rotation matrix with scale and shear
    # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
    matrix = [d, -b, 0.0, -c, a, 0.0]
    matrix = [x / scale for x in matrix]
    # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
    matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
    matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
    # Apply center translation: C * RSS^-1 * C^-1 * T^-1
    matrix[2] += cx
    matrix[5] += cy

    return matrix


619 620 621 622 623 624 625 626 627 628
def affine(
    img,
    angle,
    translate,
    scale,
    shear,
    interpolation="nearest",
    fill=0,
    center=None,
):
629 630 631 632 633 634 635 636
    """Apply affine transformation on the image.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): Image to be affined.
        angle (int|float): The angle of the random rotation in clockwise order.
        translate (list[float]): Maximum absolute fraction for horizontal and vertical translations.
        scale (float): Scale factor for the image, scale should be positive.
        shear (list[float]): Shear angle values which are parallel to the x-axis and y-axis in clockwise order.
637 638 639 640 641 642
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend.
            When use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
643
            - "bicubic": Image.BICUBIC
644 645 646
            When use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
            - "bicubic": cv2.INTER_CUBIC
        fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
            image. If given a number, the value is used for all bands respectively.
        center (2-tuple, optional): Optional center of rotation, (x, y).
            Origin is the upper left corner.
            Default is the center of the image.

    Returns:
        PIL.Image|np.array|paddle.Tensor: Affine Transformed image.

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.transforms import functional as F

            fake_img = paddle.randn((3, 256, 300)).astype(paddle.float32)

            affined_img = F.affine(fake_img, 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10])
            print(affined_img.shape)
    """

669 670 671
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
672
        raise TypeError(
673 674 675 676
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691

    if not isinstance(angle, (int, float)):
        raise TypeError("Argument angle should be int or float")

    if not isinstance(translate, (list, tuple)):
        raise TypeError("Argument translate should be a sequence")

    if len(translate) != 2:
        raise ValueError("Argument translate should be a sequence of length 2")

    if scale <= 0.0:
        raise ValueError("Argument scale should be positive")

    if not isinstance(shear, (numbers.Number, (list, tuple))):
        raise TypeError(
692 693
            "Shear should be either a single value or a sequence of two values"
        )
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714

    if not isinstance(interpolation, str):
        raise TypeError("Argument interpolation should be a string")

    if isinstance(angle, int):
        angle = float(angle)

    if isinstance(translate, tuple):
        translate = list(translate)

    if isinstance(shear, numbers.Number):
        shear = [shear, 0.0]

    if isinstance(shear, tuple):
        shear = list(shear)

    if len(shear) == 1:
        shear = [shear[0], shear[0]]

    if len(shear) != 2:
        raise ValueError(
715 716
            f"Shear should be a sequence containing two values. Got {shear}"
        )
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738

    if center is not None and not isinstance(center, (list, tuple)):
        raise TypeError("Argument center should be a sequence")

    if _is_pil_image(img):
        width, height = img.size
        # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        # it is visually better to estimate the center without 0.5 offset
        # otherwise image rotated by 90 degrees is shifted vs output image of F_t.affine
        if center is None:
            center = [width * 0.5, height * 0.5]
        matrix = _get_affine_matrix(center, angle, translate, scale, shear)
        return F_pil.affine(img, matrix, interpolation, fill)

    if _is_numpy_image(img):
        # get affine_matrix in F_cv2.affine() using cv2's functions
        width, height = img.shape[0:2]
        # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        # it is visually better to estimate the center without 0.5 offset
        # otherwise image rotated by 90 degrees is shifted vs output image of F_t.affine
        if center is None:
            center = (width * 0.5, height * 0.5)
739 740 741
        return F_cv2.affine(
            img, angle, translate, scale, shear, interpolation, fill, center
        )
742 743 744 745 746 747 748 749 750 751 752 753 754 755

    if _is_tensor_image(img):
        center_f = [0.0, 0.0]
        if center is not None:
            height, width = img.shape[-1], img.shape[-2]
            # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
            center_f = [
                1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])
            ]
        translate_f = [1.0 * t for t in translate]
        matrix = _get_affine_matrix(center_f, angle, translate_f, scale, shear)
        return F_t.affine(img, matrix, interpolation, fill)


756 757 758
def rotate(
    img, angle, interpolation="nearest", expand=False, center=None, fill=0
):
L
LielinJiang 已提交
759 760
    """Rotates the image by angle.

761

L
LielinJiang 已提交
762
    Args:
763
        img (PIL.Image|np.array|paddle.Tensor): Image to be rotated.
764
        angle (float or int): In degrees degrees counter clockwise order.
765 766 767 768 769
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend. when use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
770
            - "bicubic": Image.BICUBIC
771 772 773
            when use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
774 775
            - "bicubic": cv2.INTER_CUBIC
        expand (bool, optional): Optional expansion flag.
L
LielinJiang 已提交
776 777 778
            If true, expands the output image to make it large enough to hold the entire rotated image.
            If false or omitted, make the output image the same size as the input image.
            Note that the expand flag assumes rotation around the center and no translation.
779
        center (2-list|2-tuple, optional): Optional center of rotation.
L
LielinJiang 已提交
780 781
            Origin is the upper left corner.
            Default is the center of the image.
I
Infinity_lee 已提交
782
        fill (3-list|3-tuple or int, optional): RGB pixel fill value for area outside the rotated image.
I
Infinity_lee 已提交
783
            If int, it is used for all channels respectively. Default value is 0.
784

L
LielinJiang 已提交
785 786

    Returns:
787
        PIL.Image|np.array|paddle.Tensor: Rotated image.
L
LielinJiang 已提交
788 789 790 791 792

    Examples:
        .. code-block:: python

            import numpy as np
793 794 795 796
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
L
LielinJiang 已提交
797

798
            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
799

800 801
            rotated_img = F.rotate(fake_img, 90)
            print(rotated_img.size)
L
LielinJiang 已提交
802 803

    """
804 805 806
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
807
        raise TypeError(
808 809 810 811
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
812

813 814 815 816 817
    if isinstance(center, list):
        center = tuple(center)
    if isinstance(fill, list):
        fill = tuple(fill)

818
    if _is_pil_image(img):
819
        return F_pil.rotate(img, angle, interpolation, expand, center, fill)
820 821
    elif _is_tensor_image(img):
        return F_t.rotate(img, angle, interpolation, expand, center, fill)
L
LielinJiang 已提交
822
    else:
823
        return F_cv2.rotate(img, angle, interpolation, expand, center, fill)
L
LielinJiang 已提交
824 825


826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
def _get_perspective_coeffs(startpoints, endpoints):
    """
    get coefficients (a, b, c, d, e, f, g, h) of the perspective transforms.

    In Perspective Transform each pixel (x, y) in the original image gets transformed as,
     (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )

    Args:
        startpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the original image,
        endpoints (list[list[int]]): [top-left, top-right, bottom-right, bottom-left] of the transformed image.

    Returns:
        output (list): octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
    """
    a_matrix = np.zeros((2 * len(startpoints), 8))

    for i, (p1, p2) in enumerate(zip(endpoints, startpoints)):
        a_matrix[2 * i, :] = [
844 845 846 847 848 849 850 851
            p1[0],
            p1[1],
            1,
            0,
            0,
            0,
            -p2[0] * p1[0],
            -p2[0] * p1[1],
852 853
        ]
        a_matrix[2 * i + 1, :] = [
854 855 856 857 858 859 860 861
            0,
            0,
            0,
            p1[0],
            p1[1],
            1,
            -p2[1] * p1[0],
            -p2[1] * p1[1],
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
        ]

    b_matrix = np.array(startpoints).reshape([8])
    res = np.linalg.lstsq(a_matrix, b_matrix)[0]

    output = list(res)
    return output


def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
    """Perform perspective transform of the given image.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): Image to be transformed.
        startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
            ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
        endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
            ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
880 881 882 883 884 885
        interpolation (str, optional): Interpolation method. If omitted, or if the
            image has only one channel, it is set to PIL.Image.NEAREST or cv2.INTER_NEAREST
            according the backend.
            When use pil backend, support method are as following:
            - "nearest": Image.NEAREST,
            - "bilinear": Image.BILINEAR,
886
            - "bicubic": Image.BICUBIC
887 888 889
            When use cv2 backend, support method are as following:
            - "nearest": cv2.INTER_NEAREST,
            - "bilinear": cv2.INTER_LINEAR,
890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
            - "bicubic": cv2.INTER_CUBIC
        fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
            image. If given a number, the value is used for all bands respectively.

    Returns:
        PIL.Image|np.array|paddle.Tensor: transformed Image.

    Examples:
        .. code-block:: python

            import paddle
            from paddle.vision.transforms import functional as F

            fake_img = paddle.randn((3, 256, 300)).astype(paddle.float32)

            startpoints = [[0, 0], [33, 0], [33, 25], [0, 25]]
            endpoints = [[3, 2], [32, 3], [30, 24], [2, 25]]

            perspectived_img = F.perspective(fake_img, startpoints, endpoints)
            print(perspectived_img.shape)

    """
912 913 914
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
915
        raise TypeError(
916 917 918 919
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
920 921 922 923 924 925 926 927

    if _is_pil_image(img):
        coeffs = _get_perspective_coeffs(startpoints, endpoints)
        return F_pil.perspective(img, coeffs, interpolation, fill)
    elif _is_tensor_image(img):
        coeffs = _get_perspective_coeffs(startpoints, endpoints)
        return F_t.perspective(img, coeffs, interpolation, fill)
    else:
928 929 930
        return F_cv2.perspective(
            img, startpoints, endpoints, interpolation, fill
        )
931 932


L
LielinJiang 已提交
933 934 935 936
def to_grayscale(img, num_output_channels=1):
    """Converts image to grayscale version of image.

    Args:
937
        img (PIL.Image|np.array|paddle.Tensor): Image to be converted to grayscale.
I
Infinity_lee 已提交
938 939
        num_output_channels (int, optional): The number of channels for the output
            image. Single channel. Default: 1.
L
LielinJiang 已提交
940
    Returns:
941
        PIL.Image|np.array|paddle.Tensor: Grayscale version of the image.
942 943 944
            if num_output_channels = 1 : returned image is single channel

            if num_output_channels = 3 : returned image is 3 channel with r = g = b
945

L
LielinJiang 已提交
946
    Examples:
947 948 949 950 951 952 953 954 955 956 957 958 959 960
        .. code-block:: python

            import numpy as np
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)

            gray_img = F.to_grayscale(fake_img)
            print(gray_img.size)

    """
961 962 963
    if not (
        _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
    ):
964
        raise TypeError(
965 966 967 968
            'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'.format(
                type(img)
            )
        )
969 970 971

    if _is_pil_image(img):
        return F_pil.to_grayscale(img, num_output_channels)
972 973
    elif _is_tensor_image(img):
        return F_t.to_grayscale(img, num_output_channels)
974 975 976 977 978 979 980 981 982 983 984
    else:
        return F_cv2.to_grayscale(img, num_output_channels)


def normalize(img, mean, std, data_format='CHW', to_rgb=False):
    """Normalizes a tensor or image with mean and standard deviation.

    Args:
        img (PIL.Image|np.array|paddle.Tensor): input data to be normalized.
        mean (list|tuple): Sequence of means for each channel.
        std (list|tuple): Sequence of standard deviations for each channel.
985
        data_format (str, optional): Data format of input img, should be 'HWC' or
986
            'CHW'. Default: 'CHW'.
987
        to_rgb (bool, optional): Whether to convert to rgb. If input is tensor,
988 989 990
            this option will be igored. Default: False.

    Returns:
991
        PIL.Image|np.array|paddle.Tensor: Normalized mage. Data format is same as input img.
992

993
    Examples:
L
LielinJiang 已提交
994 995 996
        .. code-block:: python

            import numpy as np
997 998 999 1000 1001 1002
            from PIL import Image
            from paddle.vision.transforms import functional as F

            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')

            fake_img = Image.fromarray(fake_img)
L
LielinJiang 已提交
1003

1004 1005
            mean = [127.5, 127.5, 127.5]
            std = [127.5, 127.5, 127.5]
L
LielinJiang 已提交
1006

1007 1008
            normalized_img = F.normalize(fake_img, mean, std, data_format='HWC')
            print(normalized_img.max(), normalized_img.min())
L
LielinJiang 已提交
1009 1010 1011

    """

1012 1013
    if _is_tensor_image(img):
        return F_t.normalize(img, mean, std, data_format)
L
LielinJiang 已提交
1014
    else:
1015 1016
        if _is_pil_image(img):
            img = np.array(img).astype(np.float32)
L
LielinJiang 已提交
1017

1018
        return F_cv2.normalize(img, mean, std, data_format, to_rgb)
1019 1020 1021 1022


def erase(img, i, j, h, w, v, inplace=False):
    """Erase the pixels of selected area in input image with given value.
1023

1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
    Args:
        img (paddle.Tensor | np.array | PIL.Image): input Tensor image.
             For Tensor input, the shape should be (C, H, W). For np.array input,
             the shape should be (H, W, C).
        i (int): y coordinate of the top-left point of erased region.
        j (int): x coordinate of the top-left point of erased region.
        h (int): Height of the erased region.
        w (int): Width of the erased region.
        v (paddle.Tensor | np.array): value used to replace the pixels in erased region. It
            should be np.array when img is np.array or PIL.Image.
        inplace (bool, optional): Whether this transform is inplace. Default: False.

    Returns:
        paddle.Tensor | np.array | PIL.Image: Erased image. The type is same with input image.

    Examples:
        .. code-block:: python

            import paddle

            fake_img = paddle.randn((3, 2, 4)).astype(paddle.float32)
            print(fake_img)

            #Tensor(shape=[3, 2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #       [[[ 0.02169025, -0.97859967, -1.39175487, -1.07478464],
            #         [ 0.20654772,  1.74624777,  0.32268861, -0.13857445]],
            #
            #        [[-0.14993843,  1.10793507, -0.40056887, -1.94395220],
            #         [ 0.41686651,  0.44551995, -0.09356714, -0.60898107]],
            #
            #        [[-0.24998808, -1.47699273, -0.88838995,  0.42629015],
            #         [ 0.56948012, -0.96200180,  0.53355658,  3.20450878]]])

            values = paddle.zeros((1,1,1), dtype=paddle.float32)
            result = paddle.vision.transforms.erase(fake_img, 0, 1, 1, 2, values)

            print(result)

            #Tensor(shape=[3, 2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #       [[[ 0.02169025,  0.        ,  0.        , -1.07478464],
            #         [ 0.20654772,  1.74624777,  0.32268861, -0.13857445]],
            #
            #         [[-0.14993843,  0.        ,  0.        , -1.94395220],
            #           [ 0.41686651,  0.44551995, -0.09356714, -0.60898107]],
            #
            #         [[-0.24998808,  0.        ,  0.        ,  0.42629015],
            #          [ 0.56948012, -0.96200180,  0.53355658,  3.20450878]]])
1071 1072 1073 1074 1075 1076 1077 1078

    """
    if _is_tensor_image(img):
        return F_t.erase(img, i, j, h, w, v, inplace=inplace)
    elif _is_pil_image(img):
        return F_pil.erase(img, i, j, h, w, v, inplace=inplace)
    else:
        return F_cv2.erase(img, i, j, h, w, v, inplace=inplace)