Add pil backend for vision transforms (#28035)

* add pil backend

Add pil backend for vision transforms (#28035)
* add pil backend
74c8a811 · LielinJiang · GitHub · 135b62a4 · 74c8a811 · 74c8a811
10 changed file
--- a/python/paddle/tests/test_callbacks.py
+++ b/python/paddle/tests/test_callbacks.py
@@ -105,7 +105,7 @@ class TestCallbacks(unittest.TestCase):
        self.run_callback()

    def test_visualdl_callback(self):
-        # visualdl not support python3
+        # visualdl not support python2
        if sys.version_info < (3, ):
            return


--- a/python/paddle/tests/test_transforms.py
+++ b/python/paddle/tests/test_transforms.py
@@ -18,14 +18,19 @@ import tempfile
 import cv2
 import shutil
 import numpy as np
+from PIL import Image

+import paddle
+from paddle.vision import get_image_backend, set_image_backend, image_load
 from paddle.vision.datasets import DatasetFolder
 from paddle.vision.transforms import transforms
 import paddle.vision.transforms.functional as F


-class TestTransforms(unittest.TestCase):
+class TestTransformsCV2(unittest.TestCase):
    def setUp(self):
+        self.backend = self.get_backend()
+        set_image_backend(self.backend)
        self.data_dir = tempfile.mkdtemp()
        for i in range(2):
            sub_dir = os.path.join(self.data_dir, 'class_' + str(i))
@@ -40,6 +45,22 @@ class TestTransforms(unittest.TestCase):
                        (400, 300, 3)) * 255).astype('uint8')
                cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)

+    def get_backend(self):
+        return 'cv2'
+
+    def create_image(self, shape):
+        if self.backend == 'cv2':
+            return (np.random.rand(*shape) * 255).astype('uint8')
+        elif self.backend == 'pil':
+            return Image.fromarray((np.random.rand(*shape) * 255).astype(
+                'uint8'))
+
+    def get_shape(self, img):
+        if self.backend == 'pil':
+            return np.array(img).shape
+
+        return img.shape
+
    def tearDown(self):
        shutil.rmtree(self.data_dir)

@@ -51,27 +72,29 @@ class TestTransforms(unittest.TestCase):

    def test_trans_all(self):
        normalize = transforms.Normalize(
-            mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
+            mean=[123.675, 116.28, 103.53],
+            std=[58.395, 57.120, 57.375], )
        trans = transforms.Compose([
-            transforms.RandomResizedCrop(224), transforms.GaussianNoise(),
+            transforms.RandomResizedCrop(224),
            transforms.ColorJitter(
-                brightness=0.4, contrast=0.4, saturation=0.4,
-                hue=0.4), transforms.RandomHorizontalFlip(),
-            transforms.Permute(mode='CHW'), normalize
+                brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4),
+            transforms.RandomHorizontalFlip(),
+            transforms.Transpose(),
+            normalize,
        ])

        self.do_transform(trans)

    def test_normalize(self):
        normalize = transforms.Normalize(mean=0.5, std=0.5)
-        trans = transforms.Compose([transforms.Permute(mode='CHW'), normalize])
+        trans = transforms.Compose([transforms.Transpose(), normalize])
        self.do_transform(trans)

    def test_trans_resize(self):
        trans = transforms.Compose([
-            transforms.Resize(300, [0, 1]),
+            transforms.Resize(300),
            transforms.RandomResizedCrop((280, 280)),
-            transforms.Resize(280, [0, 1]),
+            transforms.Resize(280),
            transforms.Resize((256, 200)),
            transforms.Resize((180, 160)),
            transforms.CenterCrop(128),
@@ -79,13 +102,6 @@ class TestTransforms(unittest.TestCase):
        ])
        self.do_transform(trans)

-    def test_trans_centerCrop(self):
-        trans = transforms.Compose([
-            transforms.CenterCropResize(224),
-            transforms.CenterCropResize(128, 160),
-        ])
-        self.do_transform(trans)
-
    def test_flip(self):
        trans = transforms.Compose([
            transforms.RandomHorizontalFlip(1.0),
@@ -96,7 +112,7 @@ class TestTransforms(unittest.TestCase):
        self.do_transform(trans)

    def test_color_jitter(self):
-        trans = transforms.BatchCompose([
+        trans = transforms.Compose([
            transforms.BrightnessTransform(0.0),
            transforms.HueTransform(0.0),
            transforms.SaturationTransform(0.0),
@@ -106,11 +122,11 @@ class TestTransforms(unittest.TestCase):

    def test_rotate(self):
        trans = transforms.Compose([
-            transforms.RandomRotate(90),
-            transforms.RandomRotate([-10, 10]),
-            transforms.RandomRotate(
+            transforms.RandomRotation(90),
+            transforms.RandomRotation([-10, 10]),
+            transforms.RandomRotation(
                45, expand=True),
-            transforms.RandomRotate(
+            transforms.RandomRotation(
                10, expand=True, center=(60, 80)),
        ])
        self.do_transform(trans)
@@ -119,20 +135,15 @@ class TestTransforms(unittest.TestCase):
        trans = transforms.Compose([transforms.Pad(2)])
        self.do_transform(trans)

-        fake_img = np.random.rand(200, 150, 3).astype('float32')
+        fake_img = self.create_image((200, 150, 3))
        trans_pad = transforms.Pad(10)
        fake_img_padded = trans_pad(fake_img)
-        np.testing.assert_equal(fake_img_padded.shape, (220, 170, 3))
+        np.testing.assert_equal(self.get_shape(fake_img_padded), (220, 170, 3))
        trans_pad1 = transforms.Pad([1, 2])
        trans_pad2 = transforms.Pad([1, 2, 3, 4])
        img = trans_pad1(fake_img)
        img = trans_pad2(img)

-    def test_erase(self):
-        trans = transforms.Compose(
-            [transforms.RandomErasing(), transforms.RandomErasing(value=0.0)])
-        self.do_transform(trans)
-
    def test_random_crop(self):
        trans = transforms.Compose([
            transforms.RandomCrop(200),
@@ -143,18 +154,19 @@ class TestTransforms(unittest.TestCase):
        trans_random_crop1 = transforms.RandomCrop(224)
        trans_random_crop2 = transforms.RandomCrop((140, 160))

-        fake_img = np.random.rand(500, 400, 3).astype('float32')
+        fake_img = self.create_image((500, 400, 3))
        fake_img_crop1 = trans_random_crop1(fake_img)
        fake_img_crop2 = trans_random_crop2(fake_img_crop1)

-        np.testing.assert_equal(fake_img_crop1.shape, (224, 224, 3))
+        np.testing.assert_equal(self.get_shape(fake_img_crop1), (224, 224, 3))

-        np.testing.assert_equal(fake_img_crop2.shape, (140, 160, 3))
+        np.testing.assert_equal(self.get_shape(fake_img_crop2), (140, 160, 3))

        trans_random_crop_same = transforms.RandomCrop((140, 160))
        img = trans_random_crop_same(fake_img_crop2)

-        trans_random_crop_bigger = transforms.RandomCrop((180, 200))
+        trans_random_crop_bigger = transforms.RandomCrop(
+            (180, 200), pad_if_needed=True)
        img = trans_random_crop_bigger(img)

        trans_random_crop_pad = transforms.RandomCrop((224, 256), 2, True)
@@ -165,21 +177,38 @@ class TestTransforms(unittest.TestCase):
        self.do_transform(trans)

        trans_gray = transforms.Grayscale()
-        fake_img = np.random.rand(500, 400, 3).astype('float32')
+        fake_img = self.create_image((500, 400, 3))
        fake_img_gray = trans_gray(fake_img)

-        np.testing.assert_equal(len(fake_img_gray.shape), 3)
-        np.testing.assert_equal(fake_img_gray.shape[0], 500)
-        np.testing.assert_equal(fake_img_gray.shape[1], 400)
+        np.testing.assert_equal(self.get_shape(fake_img_gray)[0], 500)
+        np.testing.assert_equal(self.get_shape(fake_img_gray)[1], 400)

        trans_gray3 = transforms.Grayscale(3)
-        fake_img = np.random.rand(500, 400, 3).astype('float32')
+        fake_img = self.create_image((500, 400, 3))
        fake_img_gray = trans_gray3(fake_img)

+    def test_tranpose(self):
+        trans = transforms.Compose([transforms.Transpose()])
+        self.do_transform(trans)
+
+        fake_img = self.create_image((50, 100, 3))
+        converted_img = trans(fake_img)
+
+        np.testing.assert_equal(self.get_shape(converted_img), (3, 50, 100))
+
+    def test_to_tensor(self):
+        trans = transforms.Compose([transforms.ToTensor()])
+        fake_img = self.create_image((50, 100, 3))
+
+        tensor = trans(fake_img)
+
+        assert isinstance(tensor, paddle.Tensor)
+        np.testing.assert_equal(tensor.shape, (3, 50, 100))
+
    def test_exception(self):
        trans = transforms.Compose([transforms.Resize(-1)])

-        trans_batch = transforms.BatchCompose([transforms.Resize(-1)])
+        trans_batch = transforms.Compose([transforms.Resize(-1)])

        with self.assertRaises(Exception):
            self.do_transform(trans)
@@ -203,35 +232,211 @@ class TestTransforms(unittest.TestCase):
            transforms.Pad([1.0, 2.0, 3.0])

        with self.assertRaises(TypeError):
-            fake_img = np.random.rand(100, 120, 3).astype('float32')
+            fake_img = self.create_image((100, 120, 3))
            F.pad(fake_img, '1')

        with self.assertRaises(TypeError):
-            fake_img = np.random.rand(100, 120, 3).astype('float32')
+            fake_img = self.create_image((100, 120, 3))
            F.pad(fake_img, 1, {})

        with self.assertRaises(TypeError):
-            fake_img = np.random.rand(100, 120, 3).astype('float32')
+            fake_img = self.create_image((100, 120, 3))
            F.pad(fake_img, 1, padding_mode=-1)

        with self.assertRaises(ValueError):
-            fake_img = np.random.rand(100, 120, 3).astype('float32')
+            fake_img = self.create_image((100, 120, 3))
            F.pad(fake_img, [1.0, 2.0, 3.0])

        with self.assertRaises(ValueError):
-            transforms.RandomRotate(-2)
+            transforms.RandomRotation(-2)

        with self.assertRaises(ValueError):
-            transforms.RandomRotate([1, 2, 3])
+            transforms.RandomRotation([1, 2, 3])

        with self.assertRaises(ValueError):
            trans_gray = transforms.Grayscale(5)
-            fake_img = np.random.rand(100, 120, 3).astype('float32')
+            fake_img = self.create_image((100, 120, 3))
            trans_gray(fake_img)

+        with self.assertRaises(TypeError):
+            transform = transforms.RandomResizedCrop(64)
+            transform(1)
+
+        with self.assertRaises(ValueError):
+            transform = transforms.BrightnessTransform([-0.1, -0.2])
+
+        with self.assertRaises(TypeError):
+            transform = transforms.BrightnessTransform('0.1')
+
+        with self.assertRaises(ValueError):
+            transform = transforms.BrightnessTransform('0.1', keys=1)
+
+        with self.assertRaises(NotImplementedError):
+            transform = transforms.BrightnessTransform('0.1', keys='a')
+
    def test_info(self):
        str(transforms.Compose([transforms.Resize((224, 224))]))
-        str(transforms.BatchCompose([transforms.Resize((224, 224))]))
+        str(transforms.Compose([transforms.Resize((224, 224))]))
+
+
+class TestTransformsPIL(TestTransformsCV2):
+    def get_backend(self):
+        return 'pil'
+
+
+class TestFunctional(unittest.TestCase):
+    def test_errors(self):
+        with self.assertRaises(TypeError):
+            F.to_tensor(1)
+
+        with self.assertRaises(ValueError):
+            fake_img = Image.fromarray((np.random.rand(28, 28, 3) * 255).astype(
+                'uint8'))
+            F.to_tensor(fake_img, data_format=1)
+
+        with self.assertRaises(TypeError):
+            fake_img = Image.fromarray((np.random.rand(28, 28, 3) * 255).astype(
+                'uint8'))
+            F.resize(fake_img, '1')
+
+        with self.assertRaises(TypeError):
+            F.resize(1, 1)
+
+        with self.assertRaises(TypeError):
+            F.pad(1, 1)
+
+        with self.assertRaises(TypeError):
+            F.crop(1, 1, 1, 1, 1)
+
+        with self.assertRaises(TypeError):
+            F.hflip(1)
+
+        with self.assertRaises(TypeError):
+            F.vflip(1)
+
+        with self.assertRaises(TypeError):
+            F.adjust_brightness(1, 0.1)
+
+        with self.assertRaises(TypeError):
+            F.adjust_contrast(1, 0.1)
+
+        with self.assertRaises(TypeError):
+            F.adjust_hue(1, 0.1)
+
+        with self.assertRaises(TypeError):
+            F.adjust_saturation(1, 0.1)
+
+        with self.assertRaises(TypeError):
+            F.rotate(1, 0.1)
+
+        with self.assertRaises(TypeError):
+            F.to_grayscale(1)
+
+        with self.assertRaises(ValueError):
+            set_image_backend(1)
+
+        with self.assertRaises(ValueError):
+            image_load('tmp.jpg', backend=1)
+
+    def test_normalize(self):
+        np_img = (np.random.rand(28, 24, 3)).astype('uint8')
+        pil_img = Image.fromarray(np_img)
+        tensor_img = F.to_tensor(pil_img)
+        tensor_img_hwc = F.to_tensor(pil_img, data_format='HWC')
+
+        mean = [0.5, 0.5, 0.5]
+        std = [0.5, 0.5, 0.5]
+
+        normalized_img = F.normalize(tensor_img, mean, std)
+        normalized_img = F.normalize(
+            tensor_img_hwc, mean, std, data_format='HWC')
+
+        normalized_img = F.normalize(pil_img, mean, std, data_format='HWC')
+        normalized_img = F.normalize(
+            np_img, mean, std, data_format='HWC', to_rgb=True)
+
+    def test_center_crop(self):
+        np_img = (np.random.rand(28, 24, 3)).astype('uint8')
+        pil_img = Image.fromarray(np_img)
+
+        np_cropped_img = F.center_crop(np_img, 4)
+        pil_cropped_img = F.center_crop(pil_img, 4)
+
+        np.testing.assert_almost_equal(np_cropped_img,
+                                       np.array(pil_cropped_img))
+
+    def test_pad(self):
+        np_img = (np.random.rand(28, 24, 3)).astype('uint8')
+        pil_img = Image.fromarray(np_img)
+
+        np_padded_img = F.pad(np_img, [1, 2], padding_mode='reflect')
+        pil_padded_img = F.pad(pil_img, [1, 2], padding_mode='reflect')
+
+        np.testing.assert_almost_equal(np_padded_img, np.array(pil_padded_img))
+
+        pil_p_img = pil_img.convert('P')
+        pil_padded_img = F.pad(pil_p_img, [1, 2])
+        pil_padded_img = F.pad(pil_p_img, [1, 2], padding_mode='reflect')
+
+    def test_resize(self):
+        np_img = (np.zeros([28, 24, 3])).astype('uint8')
+        pil_img = Image.fromarray(np_img)
+
+        np_reseized_img = F.resize(np_img, 40)
+        pil_reseized_img = F.resize(pil_img, 40)
+
+        np.testing.assert_almost_equal(np_reseized_img,
+                                       np.array(pil_reseized_img))
+
+        gray_img = (np.zeros([28, 32])).astype('uint8')
+        gray_resize_img = F.resize(gray_img, 40)
+
+    def test_to_tensor(self):
+        np_img = (np.random.rand(28, 28) * 255).astype('uint8')
+        pil_img = Image.fromarray(np_img)
+
+        np_tensor = F.to_tensor(np_img, data_format='HWC')
+        pil_tensor = F.to_tensor(pil_img, data_format='HWC')
+
+        np.testing.assert_allclose(np_tensor.numpy(), pil_tensor.numpy())
+
+        # test float dtype 
+        float_img = np.random.rand(28, 28)
+        float_tensor = F.to_tensor(float_img)
+
+        pil_img = Image.fromarray(np_img).convert('I')
+        pil_tensor = F.to_tensor(pil_img)
+
+        pil_img = Image.fromarray(np_img).convert('I;16')
+        pil_tensor = F.to_tensor(pil_img)
+
+        pil_img = Image.fromarray(np_img).convert('F')
+        pil_tensor = F.to_tensor(pil_img)
+
+        pil_img = Image.fromarray(np_img).convert('1')
+        pil_tensor = F.to_tensor(pil_img)
+
+        pil_img = Image.fromarray(np_img).convert('YCbCr')
+        pil_tensor = F.to_tensor(pil_img)
+
+    def test_image_load(self):
+        fake_img = Image.fromarray((np.random.random((32, 32, 3)) * 255).astype(
+            'uint8'))
+
+        path = 'temp.jpg'
+        fake_img.save(path)
+
+        set_image_backend('pil')
+
+        pil_img = image_load(path).convert('RGB')
+
+        print(type(pil_img))
+
+        set_image_backend('cv2')
+
+        np_img = image_load(path)
+
+        os.remove(path)


 if __name__ == '__main__':

--- a/python/paddle/vision/__init__.py
+++ b/python/paddle/vision/__init__.py
@@ -21,6 +21,10 @@ from .transforms import *
 from . import datasets
 from .datasets import *

+from . import image
+from .image import *
+
 __all__ = models.__all__ \
        + transforms.__all__ \
-        + datasets.__all__
+        + datasets.__all__ \
+        + image.__all__
--- a/python/paddle/vision/datasets/folder.py
+++ b/python/paddle/vision/datasets/folder.py
@@ -14,6 +14,7 @@

 import os
 import sys
+from PIL import Image

 import paddle
 from paddle.io import Dataset
@@ -136,7 +137,7 @@ class DatasetFolder(Dataset):
                "Found 0 files in subfolders of: " + self.root + "\n"
                "Supported extensions are: " + ",".join(extensions)))

-        self.loader = cv2_loader if loader is None else loader
+        self.loader = default_loader if loader is None else loader
        self.extensions = extensions

        self.classes = classes
@@ -193,9 +194,23 @@ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
                  '.tiff', '.webp')


+def pil_loader(path):
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+
+
 def cv2_loader(path):
    cv2 = try_import('cv2')
-    return cv2.imread(path)
+    return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
+
+
+def default_loader(path):
+    from paddle.vision import get_image_backend
+    if get_image_backend() == 'cv2':
+        return cv2_loader(path)
+    else:
+        return pil_loader(path)


 class ImageFolder(Dataset):
@@ -280,7 +295,7 @@ class ImageFolder(Dataset):
                "Found 0 files in subfolders of: " + self.root + "\n"
                "Supported extensions are: " + ",".join(extensions)))

-        self.loader = cv2_loader if loader is None else loader
+        self.loader = default_loader if loader is None else loader
        self.extensions = extensions
        self.samples = samples
        self.transform = transform

--- a/python/paddle/vision/image.py
+++ b/python/paddle/vision/image.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from PIL import Image
+from paddle.utils import try_import
+
+__all__ = ['set_image_backend', 'get_image_backend', 'image_load']
+
+_image_backend = 'pil'
+
+
+def set_image_backend(backend):
+    """
+    Specifies the backend used to load images in class ``paddle.vision.datasets.ImageFolder`` 
+    and ``paddle.vision.datasets.DatasetFolder`` . Now support backends are pillow and opencv. 
+    If backend not set, will use 'pil' as default. 
+
+    Args:
+        backend (str): Name of the image load backend, should be one of {'pil', 'cv2'}.
+
+    Examples:
+    
+        .. code-block:: python
+
+            import os
+            import shutil
+            import tempfile
+            import numpy as np
+            from PIL import Image
+
+            from paddle.vision import DatasetFolder
+            from paddle.vision import set_image_backend
+
+            set_image_backend('pil')
+
+            def make_fake_dir():
+                data_dir = tempfile.mkdtemp()
+
+                for i in range(2):
+                    sub_dir = os.path.join(data_dir, 'class_' + str(i))
+                    if not os.path.exists(sub_dir):
+                        os.makedirs(sub_dir)
+                    for j in range(2):
+                        fake_img = Image.fromarray((np.random.random((32, 32, 3)) * 255).astype('uint8'))
+                        fake_img.save(os.path.join(sub_dir, str(j) + '.png'))
+                return data_dir
+
+            temp_dir = make_fake_dir()
+
+            pil_data_folder = DatasetFolder(temp_dir)
+
+            for items in pil_data_folder:
+                break
+
+            # should get PIL.Image.Image
+            print(type(items[0]))
+
+            # use opencv as backend
+            # set_image_backend('cv2')
+
+            # cv2_data_folder = DatasetFolder(temp_dir)
+
+            # for items in cv2_data_folder:
+            #     break
+
+            # should get numpy.ndarray
+            # print(type(items[0]))
+
+            shutil.rmtree(temp_dir)
+    """
+    global _image_backend
+    if backend not in ['pil', 'cv2']:
+        raise ValueError(
+            "Expected backend are one of ['pil', 'cv2'], but got {}"
+            .format(backend))
+    _image_backend = backend
+
+
+def get_image_backend():
+    """
+    Gets the name of the package used to load images
+
+    Returns:
+        str: backend of image load.
+
+    Examples:
+    
+        .. code-block:: python
+
+            from paddle.vision import get_image_backend
+
+            backend = get_image_backend()
+            print(backend)
+
+    """
+    return _image_backend
+
+
+def image_load(path, backend=None):
+    """Load an image.
+
+    Args:
+        path (str): Path of the image.
+        backend (str, optional): The image decoding backend type. Options are
+            `cv2`, `pil`, `None`. If backend is None, the global _imread_backend 
+            specified by ``paddle.vision.set_image_backend`` will be used. Default: None.
+
+    Returns:
+        PIL.Image or np.array: Loaded image.
+
+    Examples:
+    
+        .. code-block:: python
+
+            import numpy as np
+            from PIL import Image
+            from paddle.vision import image_load, set_image_backend
+
+            fake_img = Image.fromarray((np.random.random((32, 32, 3)) * 255).astype('uint8'))
+
+            path = 'temp.png'
+            fake_img.save(path)
+
+            set_image_backend('pil')
+            
+            pil_img = image_load(path).convert('RGB')
+
+            # should be PIL.Image.Image
+            print(type(pil_img))
+
+            # use opencv as backend
+            # set_image_backend('cv2')
+
+            # np_img = image_load(path)
+            # # should get numpy.ndarray
+            # print(type(np_img))
+    
+    """
+
+    if backend is None:
+        backend = _image_backend
+    if backend not in ['pil', 'cv2']:
+        raise ValueError(
+            "Expected backend are one of ['pil', 'cv2'], but got {}"
+            .format(backend))
+
+    if backend == 'pil':
+        return Image.open(path)
+    else:
+        cv2 = try_import('cv2')
+        return cv2.imread(path)
--- a/python/paddle/vision/transforms/functional.py
+++ b/python/paddle/vision/transforms/functional.py
--- a/python/paddle/vision/transforms/functional_cv2.py
+++ b/python/paddle/vision/transforms/functional_cv2.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import sys
+import numbers
+import warnings
+import collections
+
+import numpy as np
+from numpy import sin, cos, tan
+
+import paddle
+from paddle.utils import try_import
+
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+
+
+def to_tensor(pic, data_format='CHW'):
+    """Converts a ``numpy.ndarray`` to paddle.Tensor.
+
+    See ``ToTensor`` for more details.
+
+    Args:
+        pic (np.ndarray): Image to be converted to tensor.
+        data_format (str, optional): Data format of img, should be 'HWC' or 
+            'CHW'. Default: 'CHW'.
+
+    Returns:
+        Tensor: Converted image.
+
+    """
+
+    if not data_format in ['CHW', 'HWC']:
+        raise ValueError('data_format should be CHW or HWC. Got {}'.format(
+            data_format))
+
+    if pic.ndim == 2:
+        pic = pic[:, :, None]
+
+    if data_format == 'CHW':
+        img = paddle.to_tensor(pic.transpose((2, 0, 1)))
+    else:
+        img = paddle.to_tensor(pic)
+
+    if paddle.fluid.data_feeder.convert_dtype(img.dtype) == 'uint8':
+        return paddle.cast(img, np.float32) / 255.
+    else:
+        return img
+
+
+def resize(img, size, interpolation='bilinear'):
+    """
+    Resizes the image to given size
+
+    Args:
+        input (np.ndarray): Image to be resized.
+        size (int|list|tuple): Target size of input data, with (height, width) shape.
+        interpolation (int|str, optional): Interpolation method. when use cv2 backend, 
+            support method are as following: 
+            - "nearest": cv2.INTER_NEAREST, 
+            - "bilinear": cv2.INTER_LINEAR, 
+            - "area": cv2.INTER_AREA, 
+            - "bicubic": cv2.INTER_CUBIC, 
+            - "lanczos": cv2.INTER_LANCZOS4
+
+    Returns:
+        np.array: Resized image.
+
+    """
+    cv2 = try_import('cv2')
+    _cv2_interp_from_str = {
+        'nearest': cv2.INTER_NEAREST,
+        'bilinear': cv2.INTER_LINEAR,
+        'area': cv2.INTER_AREA,
+        'bicubic': cv2.INTER_CUBIC,
+        'lanczos': cv2.INTER_LANCZOS4
+    }
+
+    if not (isinstance(size, int) or
+            (isinstance(size, Iterable) and len(size) == 2)):
+        raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+    h, w = img.shape[:2]
+
+    if isinstance(size, int):
+        if (w <= h and w == size) or (h <= w and h == size):
+            return img
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+            output = cv2.resize(
+                img,
+                dsize=(ow, oh),
+                interpolation=_cv2_interp_from_str[interpolation])
+        else:
+            oh = size
+            ow = int(size * w / h)
+            output = cv2.resize(
+                img,
+                dsize=(ow, oh),
+                interpolation=_cv2_interp_from_str[interpolation])
+    else:
+        output = cv2.resize(
+            img,
+            dsize=(size[1], size[0]),
+            interpolation=_cv2_interp_from_str[interpolation])
+    if len(img.shape) == 3 and img.shape[2] == 1:
+        return output[:, :, np.newaxis]
+    else:
+        return output
+
+
+def pad(img, padding, fill=0, padding_mode='constant'):
+    """
+    Pads the given numpy.array on all sides with specified padding mode and fill value.
+
+    Args:
+        img (np.array): Image to be padded.
+        padding (int|list|tuple): Padding on each border. If a single int is provided this
+            is used to pad all borders. If tuple of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            this is the padding for the left, top, right and bottom borders
+            respectively.
+        fill (float, optional): Pixel fill value for constant fill. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant. Default: 0. 
+        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default: 'constant'.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value on the edge of the image
+
+            - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                       will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        np.array: Padded image.
+
+    """
+    cv2 = try_import('cv2')
+    _cv2_pad_from_str = {
+        'constant': cv2.BORDER_CONSTANT,
+        'edge': cv2.BORDER_REPLICATE,
+        'reflect': cv2.BORDER_REFLECT_101,
+        'symmetric': cv2.BORDER_REFLECT
+    }
+
+    if not isinstance(padding, (numbers.Number, list, tuple)):
+        raise TypeError('Got inappropriate padding arg')
+    if not isinstance(fill, (numbers.Number, str, list, tuple)):
+        raise TypeError('Got inappropriate fill arg')
+    if not isinstance(padding_mode, str):
+        raise TypeError('Got inappropriate padding_mode arg')
+
+    if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 2, or 4 element tuple, not a " +
+            "{} element tuple".format(len(padding)))
+
+    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \
+        'Padding mode should be either constant, edge, reflect or symmetric'
+
+    if isinstance(padding, list):
+        padding = tuple(padding)
+    if isinstance(padding, int):
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    if isinstance(padding, Sequence) and len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    if isinstance(padding, Sequence) and len(padding) == 4:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    if len(img.shape) == 3 and img.shape[2] == 1:
+        return cv2.copyMakeBorder(
+            img,
+            top=pad_top,
+            bottom=pad_bottom,
+            left=pad_left,
+            right=pad_right,
+            borderType=_cv2_pad_from_str[padding_mode],
+            value=fill)[:, :, np.newaxis]
+    else:
+        return cv2.copyMakeBorder(
+            img,
+            top=pad_top,
+            bottom=pad_bottom,
+            left=pad_left,
+            right=pad_right,
+            borderType=_cv2_pad_from_str[padding_mode],
+            value=fill)
+
+
+def crop(img, top, left, height, width):
+    """Crops the given image.
+
+    Args:
+        img (np.array): Image to be cropped. (0,0) denotes the top left 
+            corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+
+    Returns:
+        np.array: Cropped image.
+
+    """
+
+    return img[top:top + height, left:left + width, :]
+
+
+def center_crop(img, output_size):
+    """Crops the given image and resize it to desired size.
+
+        Args:
+            img (np.array): Image to be cropped. (0,0) denotes the top left corner of the image.
+            output_size (sequence or int): (height, width) of the crop box. If int,
+                it is used for both directions
+            backend (str, optional): The image proccess backend type. Options are `pil`, `cv2`. Default: 'pil'. 
+        
+        Returns:
+            np.array: Cropped image.
+
+        """
+
+    if isinstance(output_size, numbers.Number):
+        output_size = (int(output_size), int(output_size))
+
+    h, w = img.shape[0:2]
+    th, tw = output_size
+    i = int(round((h - th) / 2.))
+    j = int(round((w - tw) / 2.))
+    return crop(img, i, j, th, tw)
+
+
+def hflip(img):
+    """Horizontally flips the given image.
+
+    Args:
+        img (np.array): Image to be flipped.
+
+    Returns:
+        np.array:  Horizontall flipped image.
+
+    """
+    cv2 = try_import('cv2')
+
+    return cv2.flip(img, 1)
+
+
+def vflip(img):
+    """Vertically flips the given np.array.
+
+    Args:
+        img (np.array): Image to be flipped.
+
+    Returns:
+        np.array:  Vertically flipped image.
+
+    """
+    cv2 = try_import('cv2')
+
+    if len(img.shape) == 3 and img.shape[2] == 1:
+        return cv2.flip(img, 0)[:, :, np.newaxis]
+    else:
+        return cv2.flip(img, 0)
+
+
+def adjust_brightness(img, brightness_factor):
+    """Adjusts brightness of an image.
+
+    Args:
+        img (np.array): Image to be adjusted.
+        brightness_factor (float):  How much to adjust the brightness. Can be
+            any non negative number. 0 gives a black image, 1 gives the
+            original image while 2 increases the brightness by a factor of 2.
+
+    Returns:
+        np.array: Brightness adjusted image.
+
+    """
+    cv2 = try_import('cv2')
+
+    table = np.array([i * brightness_factor
+                      for i in range(0, 256)]).clip(0, 255).astype('uint8')
+
+    if len(img.shape) == 3 and img.shape[2] == 1:
+        return cv2.LUT(img, table)[:, :, np.newaxis]
+    else:
+        return cv2.LUT(img, table)
+
+
+def adjust_contrast(img, contrast_factor):
+    """Adjusts contrast of an image.
+
+    Args:
+        img (np.array): Image to be adjusted.
+        contrast_factor (float): How much to adjust the contrast. Can be any
+            non negative number. 0 gives a solid gray image, 1 gives the
+            original image while 2 increases the contrast by a factor of 2.
+
+    Returns:
+        np.array: Contrast adjusted image.
+
+    """
+    cv2 = try_import('cv2')
+
+    table = np.array([(i - 74) * contrast_factor + 74
+                      for i in range(0, 256)]).clip(0, 255).astype('uint8')
+    if len(img.shape) == 3 and img.shape[2] == 1:
+        return cv2.LUT(img, table)[:, :, np.newaxis]
+    else:
+        return cv2.LUT(img, table)
+
+
+def adjust_saturation(img, saturation_factor):
+    """Adjusts color saturation of an image.
+
+    Args:
+        img (np.array): Image to be adjusted.
+        saturation_factor (float):  How much to adjust the saturation. 0 will
+            give a black and white image, 1 will give the original image while
+            2 will enhance the saturation by a factor of 2.
+
+    Returns:
+        np.array: Saturation adjusted image.
+
+    """
+    cv2 = try_import('cv2')
+
+    dtype = img.dtype
+    img = img.astype(np.float32)
+    alpha = np.random.uniform(
+        max(0, 1 - saturation_factor), 1 + saturation_factor)
+    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    gray_img = gray_img[..., np.newaxis]
+    img = img * alpha + gray_img * (1 - alpha)
+    return img.clip(0, 255).astype(dtype)
+
+
+def adjust_hue(img, hue_factor):
+    """Adjusts hue of an image.
+
+    The image hue is adjusted by converting the image to HSV and
+    cyclically shifting the intensities in the hue channel (H).
+    The image is then converted back to original image mode.
+
+    `hue_factor` is the amount of shift in H channel and must be in the
+    interval `[-0.5, 0.5]`.
+
+    Args:
+        img (np.array): Image to be adjusted.
+        hue_factor (float):  How much to shift the hue channel. Should be in
+            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
+            HSV space in positive and negative direction respectively.
+            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+            with complementary colors while 0 gives the original image.
+
+    Returns:
+        np.array: Hue adjusted image.
+
+    """
+    cv2 = try_import('cv2')
+
+    if not (-0.5 <= hue_factor <= 0.5):
+        raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
+
+    dtype = img.dtype
+    img = img.astype(np.uint8)
+    hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL)
+    h, s, v = cv2.split(hsv_img)
+
+    alpha = np.random.uniform(hue_factor, hue_factor)
+    h = h.astype(np.uint8)
+    # uint8 addition take cares of rotation across boundaries
+    with np.errstate(over="ignore"):
+        h += np.uint8(alpha * 255)
+    hsv_img = cv2.merge([h, s, v])
+    return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype)
+
+
+def rotate(img, angle, resample=False, expand=False, center=None, fill=0):
+    """Rotates the image by angle.
+
+    Args:
+        img (np.array): Image to be rotated.
+        angle (float or int): In degrees degrees counter clockwise order.
+        resample (int|str, optional): An optional resampling filter. If omitted, or if the 
+            image has only one channel, it is set to cv2.INTER_NEAREST.
+            when use cv2 backend, support method are as following: 
+            - "nearest": cv2.INTER_NEAREST, 
+            - "bilinear": cv2.INTER_LINEAR, 
+            - "bicubic": cv2.INTER_CUBIC
+        expand (bool, optional): Optional expansion flag.
+            If true, expands the output image to make it large enough to hold the entire rotated image.
+            If false or omitted, make the output image the same size as the input image.
+            Note that the expand flag assumes rotation around the center and no translation.
+        center (2-tuple, optional): Optional center of rotation.
+            Origin is the upper left corner.
+            Default is the center of the image.
+        fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
+            If int, it is used for all channels respectively.
+
+    Returns:
+        np.array: Rotated image.
+
+    """
+    cv2 = try_import('cv2')
+
+    rows, cols = img.shape[0:2]
+    if center is None:
+        center = (cols / 2, rows / 2)
+    M = cv2.getRotationMatrix2D(center, angle, 1)
+    if len(img.shape) == 3 and img.shape[2] == 1:
+        return cv2.warpAffine(img, M, (cols, rows))[:, :, np.newaxis]
+    else:
+        return cv2.warpAffine(img, M, (cols, rows))
+
+
+def to_grayscale(img, num_output_channels=1):
+    """Converts image to grayscale version of image.
+
+    Args:
+        img (np.array): Image to be converted to grayscale.
+
+    Returns:
+        np.array: Grayscale version of the image.
+            if num_output_channels = 1 : returned image is single channel
+
+            if num_output_channels = 3 : returned image is 3 channel with r = g = b
+
+    """
+    cv2 = try_import('cv2')
+
+    if num_output_channels == 1:
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
+    elif num_output_channels == 3:
+        # much faster than doing cvtColor to go back to gray
+        img = np.broadcast_to(
+            cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis], img.shape)
+    else:
+        raise ValueError('num_output_channels should be either 1 or 3')
+
+    return img
+
+
+def normalize(img, mean, std, data_format='CHW', to_rgb=False):
+    """Normalizes a ndarray imge or image with mean and standard deviation.
+
+    Args:
+        img (np.array): input data to be normalized.
+        mean (list|tuple): Sequence of means for each channel.
+        std (list|tuple): Sequence of standard deviations for each channel.
+        data_format (str, optional): Data format of img, should be 'HWC' or 
+            'CHW'. Default: 'CHW'.
+        to_rgb (bool, optional): Whether to convert to rgb. Default: False.
+
+    Returns:
+        np.array: Normalized mage.
+
+    """
+
+    if data_format == 'CHW':
+        mean = np.float32(np.array(mean).reshape(-1, 1, 1))
+        std = np.float32(np.array(std).reshape(-1, 1, 1))
+    else:
+        mean = np.float32(np.array(mean).reshape(1, 1, -1))
+        std = np.float32(np.array(std).reshape(1, 1, -1))
+    if to_rgb:
+        cv2 = try_import('cv2')
+        # inplace
+        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+
+    img = (img - mean) / std
+    return img
--- a/python/paddle/vision/transforms/functional_pil.py
+++ b/python/paddle/vision/transforms/functional_pil.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import sys
+import math
+import numbers
+import warnings
+import collections
+from PIL import Image, ImageOps, ImageEnhance
+
+import numpy as np
+from numpy import sin, cos, tan
+import paddle
+
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+
+_pil_interp_from_str = {
+    'nearest': Image.NEAREST,
+    'bilinear': Image.BILINEAR,
+    'bicubic': Image.BICUBIC,
+    'box': Image.BOX,
+    'lanczos': Image.LANCZOS,
+    'hamming': Image.HAMMING
+}
+
+
+def to_tensor(pic, data_format='CHW'):
+    """Converts a ``PIL.Image`` to paddle.Tensor.
+
+    See ``ToTensor`` for more details.
+
+    Args:
+        pic (PIL.Image): Image to be converted to tensor.
+        data_format (str, optional): Data format of img, should be 'HWC' or 
+            'CHW'. Default: 'CHW'.
+
+    Returns:
+        Tensor: Converted image.
+
+    """
+
+    if not data_format in ['CHW', 'HWC']:
+        raise ValueError('data_format should be CHW or HWC. Got {}'.format(
+            data_format))
+
+    # PIL Image
+    if pic.mode == 'I':
+        img = paddle.to_tensor(np.array(pic, np.int32, copy=False))
+    elif pic.mode == 'I;16':
+        # cast and reshape not support int16
+        img = paddle.to_tensor(np.array(pic, np.int32, copy=False))
+    elif pic.mode == 'F':
+        img = paddle.to_tensor(np.array(pic, np.float32, copy=False))
+    elif pic.mode == '1':
+        img = 255 * paddle.to_tensor(np.array(pic, np.uint8, copy=False))
+    else:
+        img = paddle.to_tensor(np.array(pic, copy=False))
+
+    if pic.mode == 'YCbCr':
+        nchannel = 3
+    elif pic.mode == 'I;16':
+        nchannel = 1
+    else:
+        nchannel = len(pic.mode)
+
+    dtype = paddle.fluid.data_feeder.convert_dtype(img.dtype)
+    if dtype == 'uint8':
+        img = paddle.cast(img, np.float32) / 255.
+
+    img = img.reshape([pic.size[1], pic.size[0], nchannel])
+
+    if data_format == 'CHW':
+        img = img.transpose([2, 0, 1])
+
+    return img
+
+
+def resize(img, size, interpolation='bilinear'):
+    """
+    Resizes the image to given size
+
+    Args:
+        input (PIL.Image): Image to be resized.
+        size (int|list|tuple): Target size of input data, with (height, width) shape.
+        interpolation (int|str, optional): Interpolation method. when use pil backend, 
+            support method are as following: 
+            - "nearest": Image.NEAREST, 
+            - "bilinear": Image.BILINEAR, 
+            - "bicubic": Image.BICUBIC, 
+            - "box": Image.BOX, 
+            - "lanczos": Image.LANCZOS, 
+            - "hamming": Image.HAMMING
+
+    Returns:
+        PIL.Image: Resized image.
+
+    """
+
+    if not (isinstance(size, int) or
+            (isinstance(size, Iterable) and len(size) == 2)):
+        raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+    if isinstance(size, int):
+        w, h = img.size
+        if (w <= h and w == size) or (h <= w and h == size):
+            return img
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+            return img.resize((ow, oh), _pil_interp_from_str[interpolation])
+        else:
+            oh = size
+            ow = int(size * w / h)
+            return img.resize((ow, oh), _pil_interp_from_str[interpolation])
+    else:
+        return img.resize(size[::-1], _pil_interp_from_str[interpolation])
+
+
+def pad(img, padding, fill=0, padding_mode='constant'):
+    """
+    Pads the given PIL.Image on all sides with specified padding mode and fill value.
+
+    Args:
+        img (PIL.Image): Image to be padded.
+        padding (int|list|tuple): Padding on each border. If a single int is provided this
+            is used to pad all borders. If tuple of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            this is the padding for the left, top, right and bottom borders
+            respectively.
+        fill (float, optional): Pixel fill value for constant fill. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant. Default: 0. 
+        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default: 'constant'.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value on the edge of the image
+
+            - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                       will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        PIL.Image: Padded image.
+
+    """
+
+    if not isinstance(padding, (numbers.Number, list, tuple)):
+        raise TypeError('Got inappropriate padding arg')
+    if not isinstance(fill, (numbers.Number, str, list, tuple)):
+        raise TypeError('Got inappropriate fill arg')
+    if not isinstance(padding_mode, str):
+        raise TypeError('Got inappropriate padding_mode arg')
+
+    if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 2, or 4 element tuple, not a " +
+            "{} element tuple".format(len(padding)))
+
+    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \
+        'Padding mode should be either constant, edge, reflect or symmetric'
+
+    if isinstance(padding, list):
+        padding = tuple(padding)
+    if isinstance(padding, int):
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    if isinstance(padding, Sequence) and len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    if isinstance(padding, Sequence) and len(padding) == 4:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    if padding_mode == 'constant':
+        if img.mode == 'P':
+            palette = img.getpalette()
+            image = ImageOps.expand(img, border=padding, fill=fill)
+            image.putpalette(palette)
+            return image
+
+        return ImageOps.expand(img, border=padding, fill=fill)
+    else:
+        if img.mode == 'P':
+            palette = img.getpalette()
+            img = np.asarray(img)
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)),
+                         padding_mode)
+            img = Image.fromarray(img)
+            img.putpalette(palette)
+            return img
+
+        img = np.asarray(img)
+        # RGB image
+        if len(img.shape) == 3:
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right),
+                               (0, 0)), padding_mode)
+        # Grayscale image
+        if len(img.shape) == 2:
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)),
+                         padding_mode)
+
+        return Image.fromarray(img)
+
+
+def crop(img, top, left, height, width):
+    """Crops the given PIL Image.
+
+    Args:
+        img (PIL.Image): Image to be cropped. (0,0) denotes the top left 
+            corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+
+    Returns:
+        PIL.Image: Cropped image.
+
+    """
+    return img.crop((left, top, left + width, top + height))
+
+
+def center_crop(img, output_size):
+    """Crops the given PIL Image and resize it to desired size.
+
+        Args:
+            img (PIL.Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+            output_size (sequence or int): (height, width) of the crop box. If int,
+                it is used for both directions
+            backend (str, optional): The image proccess backend type. Options are `pil`, `cv2`. Default: 'pil'. 
+        
+        Returns:
+            PIL.Image: Cropped image.
+
+        """
+
+    if isinstance(output_size, numbers.Number):
+        output_size = (int(output_size), int(output_size))
+
+    image_width, image_height = img.size
+    crop_height, crop_width = output_size
+    crop_top = int(round((image_height - crop_height) / 2.))
+    crop_left = int(round((image_width - crop_width) / 2.))
+    return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def hflip(img):
+    """Horizontally flips the given PIL Image.
+
+    Args:
+        img (PIL.Image): Image to be flipped.
+
+    Returns:
+        PIL.Image:  Horizontall flipped image.
+
+    """
+
+    return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+def vflip(img):
+    """Vertically flips the given PIL Image.
+
+    Args:
+        img (PIL.Image): Image to be flipped.
+
+    Returns:
+        PIL.Image:  Vertically flipped image.
+
+    """
+
+    return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+def adjust_brightness(img, brightness_factor):
+    """Adjusts brightness of an Image.
+
+    Args:
+        img (PIL.Image): PIL Image to be adjusted.
+        brightness_factor (float):  How much to adjust the brightness. Can be
+            any non negative number. 0 gives a black image, 1 gives the
+            original image while 2 increases the brightness by a factor of 2.
+
+    Returns:
+        PIL.Image: Brightness adjusted image.
+
+    """
+
+    enhancer = ImageEnhance.Brightness(img)
+    img = enhancer.enhance(brightness_factor)
+    return img
+
+
+def adjust_contrast(img, contrast_factor):
+    """Adjusts contrast of an Image.
+
+    Args:
+        img (PIL.Image): PIL Image to be adjusted.
+        contrast_factor (float): How much to adjust the contrast. Can be any
+            non negative number. 0 gives a solid gray image, 1 gives the
+            original image while 2 increases the contrast by a factor of 2.
+
+    Returns:
+        PIL.Image: Contrast adjusted image.
+
+    """
+
+    enhancer = ImageEnhance.Contrast(img)
+    img = enhancer.enhance(contrast_factor)
+    return img
+
+
+def adjust_saturation(img, saturation_factor):
+    """Adjusts color saturation of an image.
+
+    Args:
+        img (PIL.Image): PIL Image to be adjusted.
+        saturation_factor (float):  How much to adjust the saturation. 0 will
+            give a black and white image, 1 will give the original image while
+            2 will enhance the saturation by a factor of 2.
+
+    Returns:
+        PIL.Image: Saturation adjusted image.
+
+    """
+
+    enhancer = ImageEnhance.Color(img)
+    img = enhancer.enhance(saturation_factor)
+    return img
+
+
+def adjust_hue(img, hue_factor):
+    """Adjusts hue of an image.
+
+    The image hue is adjusted by converting the image to HSV and
+    cyclically shifting the intensities in the hue channel (H).
+    The image is then converted back to original image mode.
+
+    `hue_factor` is the amount of shift in H channel and must be in the
+    interval `[-0.5, 0.5]`.
+
+    Args:
+        img (PIL.Image): PIL Image to be adjusted.
+        hue_factor (float):  How much to shift the hue channel. Should be in
+            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
+            HSV space in positive and negative direction respectively.
+            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+            with complementary colors while 0 gives the original image.
+
+    Returns:
+        PIL.Image: Hue adjusted image.
+
+    """
+    if not (-0.5 <= hue_factor <= 0.5):
+        raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
+
+    input_mode = img.mode
+    if input_mode in {'L', '1', 'I', 'F'}:
+        return img
+
+    h, s, v = img.convert('HSV').split()
+
+    np_h = np.array(h, dtype=np.uint8)
+    # uint8 addition take cares of rotation across boundaries
+    with np.errstate(over='ignore'):
+        np_h += np.uint8(hue_factor * 255)
+    h = Image.fromarray(np_h, 'L')
+
+    img = Image.merge('HSV', (h, s, v)).convert(input_mode)
+    return img
+
+
+def rotate(img, angle, resample=False, expand=False, center=None, fill=0):
+    """Rotates the image by angle.
+
+    Args:
+        img (PIL.Image): Image to be rotated.
+        angle (float or int): In degrees degrees counter clockwise order.
+        resample (int|str, optional): An optional resampling filter. If omitted, or if the 
+            image has only one channel, it is set to PIL.Image.NEAREST . when use pil backend, 
+            support method are as following: 
+            - "nearest": Image.NEAREST, 
+            - "bilinear": Image.BILINEAR, 
+            - "bicubic": Image.BICUBIC
+        expand (bool, optional): Optional expansion flag.
+            If true, expands the output image to make it large enough to hold the entire rotated image.
+            If false or omitted, make the output image the same size as the input image.
+            Note that the expand flag assumes rotation around the center and no translation.
+        center (2-tuple, optional): Optional center of rotation.
+            Origin is the upper left corner.
+            Default is the center of the image.
+        fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
+            If int, it is used for all channels respectively.
+
+    Returns:
+        PIL.Image: Rotated image.
+
+    """
+
+    if isinstance(fill, int):
+        fill = tuple([fill] * 3)
+
+    return img.rotate(angle, resample, expand, center, fillcolor=fill)
+
+
+def to_grayscale(img, num_output_channels=1):
+    """Converts image to grayscale version of image.
+
+    Args:
+        img (PIL.Image): Image to be converted to grayscale.
+        backend (str, optional): The image proccess backend type. Options are `pil`, 
+                    `cv2`. Default: 'pil'. 
+
+    Returns:
+        PIL.Image: Grayscale version of the image.
+            if num_output_channels = 1 : returned image is single channel
+
+            if num_output_channels = 3 : returned image is 3 channel with r = g = b
+
+    """
+
+    if num_output_channels == 1:
+        img = img.convert('L')
+    elif num_output_channels == 3:
+        img = img.convert('L')
+        np_img = np.array(img, dtype=np.uint8)
+        np_img = np.dstack([np_img, np_img, np_img])
+        img = Image.fromarray(np_img, 'RGB')
+    else:
+        raise ValueError('num_output_channels should be either 1 or 3')
+
+    return img
--- a/python/paddle/vision/transforms/functional_tensor.py
+++ b/python/paddle/vision/transforms/functional_tensor.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import paddle
+
+
+def normalize(img, mean, std, data_format='CHW'):
+    """Normalizes a tensor image with mean and standard deviation.
+
+    Args:
+        img (paddle.Tensor): input data to be normalized.
+        mean (list|tuple): Sequence of means for each channel.
+        std (list|tuple): Sequence of standard deviations for each channel.
+        data_format (str, optional): Data format of img, should be 'HWC' or 
+            'CHW'. Default: 'CHW'.
+
+    Returns:
+        Tensor: Normalized mage.
+
+    """
+    if data_format == 'CHW':
+        mean = paddle.to_tensor(mean).reshape([-1, 1, 1])
+        std = paddle.to_tensor(std).reshape([-1, 1, 1])
+    else:
+        mean = paddle.to_tensor(mean)
+        std = paddle.to_tensor(std)
+    return (img - mean) / std
--- a/python/paddle/vision/transforms/transforms.py
+++ b/python/paddle/vision/transforms/transforms.py