diff --git a/docs/source/index.rst b/docs/source/index.rst
index 05366a032b3a548263d1a1fa22b7226d8acbceab..8b4ab8f0b68697820f6ffab30e59a5921bf9cca2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,6 +20,8 @@ OneFlow API Reference
     linalg
     image
     optim
+    utils
+
 
 
 Indices and tables
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d9452aaa99a1a048ac0b7437c66ebb4013ab3e8a
--- /dev/null
+++ b/docs/source/utils.rst
@@ -0,0 +1,62 @@
+oneflow.utils
+===================================
+Utils
+----------------------------------
+.. currentmodule:: oneflow.utils
+.. automodule:: oneflow.utils.data
+    :members: DataLoader,
+        Dataset,
+        IterableDataset,
+        TensorDataset,
+        ConcatDataset,
+        Subset,
+        random_split,
+        Sampler,
+        SequentialSampler,
+        RandomSampler,
+        SubsetRandomSampler,
+        BatchSampler
+
+.. currentmodule:: oneflow.utils
+.. automodule:: oneflow.utils.data.distributed
+    :members: DistributedSampler
+
+.. currentmodule:: oneflow.utils
+.. automodule:: oneflow.utils.vision.datasets
+    :members: MNIST,
+        FashionMNIST,
+        CIFAR10,
+        CIFAR100,
+        ImageNet,
+        CocoCaptions,
+        CocoDetection,
+        VOCDetection,
+        VOCSegmentation,
+        DatasetFolder,
+        ImageFolder
+
+.. currentmodule:: oneflow.utils
+.. automodule:: oneflow.utils.vision.transforms
+    :members: Compose,
+        ToTensor,
+        PILToTensor,
+        ConvertImageDtype,
+        ToPILImage,
+        Normalize,
+        Resize,
+        Scale,
+        CenterCrop,
+        Pad,
+        Lambda,
+        RandomTransforms,
+        RandomApply,
+        RandomOrder,
+        RandomChoice,
+        RandomCrop,
+        RandomHorizontalFlip,
+        RandomVerticalFlip,
+        RandomResizedCrop,
+        RandomSizedCrop,
+        FiveCrop,
+        TenCrop,
+        InterpolationMode
diff --git a/python/oneflow/test/dataloader/data_utils.py b/python/oneflow/test/dataloader/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e747db5063820223c93bb2c2f2eb0fdb1d9e9b40
--- /dev/null
+++ b/python/oneflow/test/dataloader/data_utils.py
@@ -0,0 +1,148 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import oneflow as flow
+import oneflow.utils.vision.transforms as transforms
+
+
+def load_data_cifar10(
+    batch_size,
+    data_dir="./data-test/cifar10",
+    download=True,
+    transform=None,
+    source_url=None,
+    num_workers=0,
+):
+    cifar10_train = flow.utils.vision.datasets.CIFAR10(
+        root=data_dir,
+        train=True,
+        download=download,
+        transform=transform,
+        source_url=source_url,
+    )
+    cifar10_test = flow.utils.vision.datasets.CIFAR10(
+        root=data_dir,
+        train=False,
+        download=download,
+        transform=transform,
+        source_url=source_url,
+    )
+
+    train_iter = flow.utils.data.DataLoader(
+        cifar10_train, batch_size=batch_size, shuffle=True, num_workers=num_workers
+    )
+    test_iter = flow.utils.data.DataLoader(
+        cifar10_test, batch_size=batch_size, shuffle=False, num_workers=num_workers
+    )
+    return train_iter, test_iter
+
+
+def load_data_mnist(
+    batch_size, resize=None, root="./data/mnist", download=True, source_url=None
+):
+    """Download the MNIST dataset and then load into memory."""
+    root = os.path.expanduser(root)
+    transformer = []
+    if resize:
+        transformer += [transforms.Resize(resize)]
+    transformer += [transforms.ToTensor()]
+    transformer = transforms.Compose(transformer)
+
+    mnist_train = flow.utils.vision.datasets.MNIST(
+        root=root,
+        train=True,
+        transform=transformer,
+        download=download,
+        source_url=source_url,
+    )
+    mnist_test = flow.utils.vision.datasets.MNIST(
+        root=root,
+        train=False,
+        transform=transformer,
+        download=download,
+        source_url=source_url,
+    )
+    train_iter = flow.utils.data.DataLoader(mnist_train, batch_size, shuffle=True)
+    test_iter = flow.utils.data.DataLoader(mnist_test, batch_size, shuffle=False)
+    return train_iter, test_iter
+
+
+def get_fashion_mnist_dataset(
+    resize=None, root="./data-test/fashion-mnist", download=True, source_url=None,
+):
+    root = os.path.expanduser(root)
+    trans = []
+    if resize:
+        trans.append(transforms.Resize(resize))
+    trans.append(transforms.ToTensor())
+    transform = transforms.Compose(trans)
+
+    mnist_train = flow.utils.vision.datasets.FashionMNIST(
+        root=root,
+        train=True,
+        transform=transform,
+        download=download,
+        source_url=source_url,
+    )
+    mnist_test = flow.utils.vision.datasets.FashionMNIST(
+        root=root,
+        train=False,
+        transform=transform,
+        download=download,
+        source_url=source_url,
+    )
+    return mnist_train, mnist_test
+
+
+# reference: http://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.10_mlp-pytorch
+def load_data_fashion_mnist(
+    batch_size,
+    resize=None,
+    root="./data-test/fashion-mnist",
+    download=True,
+    source_url=None,
+    num_workers=0,
+):
+    """Download the Fashion-MNIST dataset and then load into memory."""
+    root = os.path.expanduser(root)
+    trans = []
+    if resize:
+        trans.append(transforms.Resize(resize))
+    trans.append(transforms.ToTensor())
+    transform = transforms.Compose(trans)
+
+    mnist_train = flow.utils.vision.datasets.FashionMNIST(
+        root=root,
+        train=True,
+        transform=transform,
+        download=download,
+        source_url=source_url,
+    )
+    mnist_test = flow.utils.vision.datasets.FashionMNIST(
+        root=root,
+        train=False,
+        transform=transform,
+        download=download,
+        source_url=source_url,
+    )
+
+    train_iter = flow.utils.data.DataLoader(
+        mnist_train, batch_size, shuffle=True, num_workers=num_workers
+    )
+    test_iter = flow.utils.data.DataLoader(
+        mnist_test, batch_size, shuffle=False, num_workers=num_workers
+    )
+    return train_iter, test_iter
diff --git a/python/oneflow/test/dataloader/test_cifar_dataset.py b/python/oneflow/test/dataloader/test_cifar_dataset.py
index dd057a587a440d41cfb20fbc2d05bb717596600f..f92a1f4f95b259b33b0d69ac14bdec8412c23802 100644
--- a/python/oneflow/test/dataloader/test_cifar_dataset.py
+++ b/python/oneflow/test/dataloader/test_cifar_dataset.py
@@ -20,6 +20,7 @@ import oneflow.unittest
 import oneflow as flow
 import oneflow.nn as nn
 import oneflow.optim as optim
+from data_utils import load_data_cifar10
 
 
 classes = (
@@ -81,21 +82,19 @@ def test(test_case):
         os.getenv("ONEFLOW_TEST_CACHE_DIR", "./data-test"), "cifar10"
     )
 
-    trainset = flow.utils.vision.datasets.CIFAR10(
-        root=data_dir,
-        train=True,
+    train_iter, test_iter = load_data_cifar10(
+        batch_size=batch_size,
+        data_dir=data_dir,
         download=True,
         transform=transform,
         source_url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/cifar/cifar-10-python.tar.gz",
-    )
-    trainloader = flow.utils.data.DataLoader(
-        trainset, batch_size=batch_size, shuffle=False, num_workers=0
+        num_workers=0,
     )
 
     final_loss = 0
     for epoch in range(1, train_epoch + 1):  # loop over the dataset multiple times
         running_loss = 0.0
-        for i, data in enumerate(trainloader, 1):
+        for i, data in enumerate(train_iter, 1):
             # get the inputs; data is a list of [inputs, labels]
             inputs, labels = data
             inputs = inputs.to(dtype=flow.float32, device=device)
@@ -130,10 +129,3 @@ class TestCifarDataset(flow.unittest.TestCase):
 
 if __name__ == "__main__":
     unittest.main()
-    # 1 epoch training log
-    # epoch: 1  step:  2000  loss: 2.107
-    # epoch: 1  step:  4000  loss: 1.838
-    # epoch: 1  step:  6000  loss: 1.644
-    # epoch: 1  step:  8000  loss: 1.535
-    # epoch: 1  step: 10000  loss: 1.528
-    # epoch: 1  step: 12000  loss: 1.476
diff --git a/python/oneflow/test/dataloader/test_fashion_mnist_dataset.py b/python/oneflow/test/dataloader/test_fashion_mnist_dataset.py
index 83dbc194b48a7d37dcb68f25b17004ad50029884..257391aa02e32ce41060a2e32acb451cf30b6401 100644
--- a/python/oneflow/test/dataloader/test_fashion_mnist_dataset.py
+++ b/python/oneflow/test/dataloader/test_fashion_mnist_dataset.py
@@ -20,42 +20,7 @@ import time
 import oneflow.unittest
 import oneflow as flow
 import oneflow.nn as nn
-
-
-# reference: http://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.10_mlp-pytorch
-def load_data_fashion_mnist(
-    batch_size, resize=None, root="./data/fashion-mnist", download=True, source_url=None
-):
-    """Download the Fashion-MNIST dataset and then load into memory."""
-    root = os.path.expanduser(root)
-    transformer = []
-    if resize:
-        transformer += [flow.utils.vision.transforms.Resize(resize)]
-    transformer += [flow.utils.vision.transforms.ToTensor()]
-    transformer = flow.utils.vision.transforms.Compose(transformer)
-
-    mnist_train = flow.utils.vision.datasets.FashionMNIST(
-        root=root,
-        train=True,
-        transform=transformer,
-        download=download,
-        source_url=source_url,
-    )
-    mnist_test = flow.utils.vision.datasets.FashionMNIST(
-        root=root,
-        train=False,
-        transform=transformer,
-        download=download,
-        source_url=source_url,
-    )
-    num_workers = 0
-    train_iter = flow.utils.data.DataLoader(
-        mnist_train, batch_size, shuffle=True, num_workers=num_workers
-    )
-    test_iter = flow.utils.data.DataLoader(
-        mnist_test, batch_size, shuffle=False, num_workers=num_workers
-    )
-    return train_iter, test_iter
+from data_utils import load_data_fashion_mnist
 
 
 def get_fashion_mnist_labels(labels):
@@ -124,7 +89,7 @@ def test(test_case):
     )
     source_url = "https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/mnist/Fashion-MNIST/"
     train_iter, test_iter = load_data_fashion_mnist(
-        batch_size, root=data_dir, download=True, source_url=source_url
+        batch_size, resize=None, root=data_dir, download=True, source_url=source_url
     )
     loss = nn.CrossEntropyLoss()
     loss.to(device)
@@ -174,6 +139,3 @@ class TestFashionMnistDataset(flow.unittest.TestCase):
 
 if __name__ == "__main__":
     unittest.main()
-    # 1 epoch training log
-    # epoch 1, loss 0.0034, train acc 0.718, test acc 0.771, cost >>>>>>> 158.32699990272522(s)
-    # epoch 2, loss 0.0022, train acc 0.807, test acc 0.726, cost >>>>>>> 159.64465260505676(s)
diff --git a/python/oneflow/test/dataloader/test_lenet.py b/python/oneflow/test/dataloader/test_lenet.py
index 51a929a518feafdf9435fe40dc95eb437441ca8f..1e831aab413ac2543e11f3d3e514a3cde4d214c5 100644
--- a/python/oneflow/test/dataloader/test_lenet.py
+++ b/python/oneflow/test/dataloader/test_lenet.py
@@ -20,6 +20,7 @@ import unittest
 import oneflow as flow
 import oneflow.nn as nn
 import oneflow.unittest
+from data_utils import load_data_fashion_mnist
 
 
 # reference: http://tangshusen.me/Dive-into-DL-PyTorch/#/chapter05_CNN/5.5_lenet
@@ -49,46 +50,6 @@ class LeNet(nn.Module):
         return output
 
 
-def load_data_fashion_mnist(
-    batch_size,
-    resize=None,
-    root="./data-test/fashion-mnist",
-    download=True,
-    source_url=None,
-    num_workers=0,
-):
-    """Download the Fashion-MNIST dataset and then load into memory."""
-    root = os.path.expanduser(root)
-    trans = []
-    if resize:
-        trans.append(flow.utils.vision.transforms.Resize(resize))
-    trans.append(flow.utils.vision.transforms.ToTensor())
-    transform = flow.utils.vision.transforms.Compose(trans)
-
-    mnist_train = flow.utils.vision.datasets.FashionMNIST(
-        root=root,
-        train=True,
-        transform=transform,
-        download=download,
-        source_url=source_url,
-    )
-    mnist_test = flow.utils.vision.datasets.FashionMNIST(
-        root=root,
-        train=False,
-        transform=transform,
-        download=download,
-        source_url=source_url,
-    )
-
-    train_iter = flow.utils.data.DataLoader(
-        mnist_train, batch_size, shuffle=True, num_workers=num_workers
-    )
-    test_iter = flow.utils.data.DataLoader(
-        mnist_test, batch_size, shuffle=False, num_workers=num_workers
-    )
-    return train_iter, test_iter
-
-
 def evaluate_accuracy(data_iter, net, device=None):
     if device is None and isinstance(net, nn.Module):
         device = list(net.parameters())[0].device
@@ -176,8 +137,3 @@ class TestLenet(flow.unittest.TestCase):
 
 if __name__ == "__main__":
     unittest.main()
-    # 1 epoch training log
-    # epoch 1, loss 1.1473, train acc 0.569, test acc 0.742, time 162.4 sec
-    # epoch 2, loss 0.5736, train acc 0.784, test acc 0.796, time 158.1 sec
-    # epoch 3, loss 0.4761, train acc 0.826, test acc 0.821, time 154.0 sec
-    # epoch 4, loss 0.4215, train acc 0.848, test acc 0.855, time 160.3 sec
diff --git a/python/oneflow/test/dataloader/test_mnist_dataset.py b/python/oneflow/test/dataloader/test_mnist_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..30cb54713df736185ab4cc4e3b3d60558e3dcc0c
--- /dev/null
+++ b/python/oneflow/test/dataloader/test_mnist_dataset.py
@@ -0,0 +1,119 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import unittest
+
+import oneflow.unittest
+import oneflow as flow
+import oneflow.nn as nn
+import oneflow.utils.vision.transforms as transforms
+from data_utils import load_data_mnist
+
+
+data_dir = os.path.join(
+    os.getenv("ONEFLOW_TEST_CACHE_DIR", "./data-test"), "mnist-dataset"
+)
+train_iter, test_iter = load_data_mnist(
+    batch_size=128,
+    download=True,
+    root=data_dir,
+    source_url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/mnist/MNIST/",
+)
+
+
+def evaluate_accuracy(data_iter, net, device=None):
+    n_correct, n_samples = 0.0, 0
+    net.to(device)
+    net.eval()
+    with flow.no_grad():
+        for images, labels in data_iter:
+            images = images.reshape(-1, 28 * 28)
+            images = images.to(device=device)
+            labels = labels.to(device=device)
+            n_correct += (net(images).argmax(dim=1).numpy() == labels.numpy()).sum()
+            n_samples += images.shape[0]
+    net.train()
+    return n_correct / n_samples
+
+
+class Net(nn.Module):
+    def __init__(
+        self, input_size=784, hidden_size1=128, hidden_size2=64, num_classes=10
+    ):
+        super(Net, self).__init__()
+        self.l1 = nn.Linear(input_size, hidden_size1)
+        self.relu1 = nn.ReLU()
+        self.l2 = nn.Linear(hidden_size1, hidden_size2)
+        self.relu2 = nn.ReLU()
+        self.l3 = nn.Linear(hidden_size2, num_classes)
+
+    def forward(self, x):
+        out = self.l1(x)
+        out = self.relu1(out)
+        out = self.l2(out)
+        out = self.relu2(out)
+        out = self.l3(out)
+        return out
+
+
+def test_train_and_eval(test_case):
+    if os.getenv("ONEFLOW_TEST_CPU_ONLY"):
+        device = flow.device("cpu")
+    else:
+        device = flow.device("cuda")
+
+    model = Net()
+    model.to(device)
+
+    loss = nn.CrossEntropyLoss().to(device)
+    optimizer = flow.optim.SGD(model.parameters(), lr=0.10)
+
+    num_epochs = 1
+    for epoch in range(num_epochs):
+        train_loss, n_correct, n_samples = 0.0, 0.0, 0
+        for images, labels in train_iter:
+            images = images.reshape(-1, 28 * 28)
+            images = images.to(device=device)
+            labels = labels.to(device=device)
+            features = model(images)
+            l = loss(features, labels).sum()
+            optimizer.zero_grad()
+            l.backward()
+            optimizer.step()
+
+            train_loss += l.numpy()
+            n_correct += (features.argmax(dim=1).numpy() == labels.numpy()).sum()
+            n_samples += images.shape[0]
+            if n_samples > 2000:
+                break
+
+        test_acc = evaluate_accuracy(test_iter, model, device)
+        train_acc = n_correct / n_samples
+        print(
+            "epoch %d, train loss %.4f, train acc %.3f, test acc %.3f"
+            % (epoch + 1, train_loss / n_samples, train_acc, test_acc)
+        )
+        # test_case.assertLess(0.8, test_acc)
+
+
+@flow.unittest.skip_unless_1n1d()
+class TestMnistDataset(flow.unittest.TestCase):
+    def test_mnist_dataset(test_case):
+        test_train_and_eval(test_case)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/oneflow/test/dataloader/test_transforms.py b/python/oneflow/test/dataloader/test_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d0737b00f5b414dcf2c318a79dd1575060bec94
--- /dev/null
+++ b/python/oneflow/test/dataloader/test_transforms.py
@@ -0,0 +1,123 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import unittest
+
+import oneflow as flow
+import oneflow.nn as nn
+import oneflow.optim as optim
+import oneflow.utils.vision.transforms as transforms
+import oneflow.unittest
+from data_utils import load_data_cifar10
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.pool(flow.F.relu(self.conv1(x)))
+        x = self.pool(flow.F.relu(self.conv2(x)))
+        x = flow.flatten(x, 1)  # flatten all dimensions except batch
+        x = flow.F.relu(self.fc1(x))
+        x = flow.F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+def test(test_case):
+    if os.getenv("ONEFLOW_TEST_CPU_ONLY"):
+        device = flow.device("cpu")
+    else:
+        device = flow.device("cuda")
+    net = Net()
+    net.to(device)
+
+    optimizer = optim.SGD(net.parameters(), lr=0.002, momentum=0.9)
+    criterion = nn.CrossEntropyLoss()
+    criterion.to(device)
+
+    transform = flow.utils.vision.transforms.Compose(
+        [
+            transforms.Pad(10),
+            transforms.RandomHorizontalFlip(p=0.5),
+            transforms.RandomVerticalFlip(p=0.5),
+            transforms.CenterCrop(32),
+            transforms.Resize([32, 32]),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+        ]
+    )
+
+    train_epoch = 1
+    batch_size = 4
+    data_dir = os.path.join(
+        os.getenv("ONEFLOW_TEST_CACHE_DIR", "./data-test"), "cifar10"
+    )
+
+    train_iter, test_iter = load_data_cifar10(
+        batch_size=batch_size,
+        data_dir=data_dir,
+        download=True,
+        transform=transform,
+        source_url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/cifar/cifar-10-python.tar.gz",
+        num_workers=0,
+    )
+
+    final_loss = 0
+    for epoch in range(1, train_epoch + 1):  # loop over the dataset multiple times
+        running_loss = 0.0
+        for i, data in enumerate(train_iter, 1):
+            # get the inputs; data is a list of [inputs, labels]
+            inputs, labels = data
+            inputs = inputs.to(dtype=flow.float32, device=device)
+            labels = labels.to(dtype=flow.int64, device=device)
+
+            # zero the parameter gradients
+            optimizer.zero_grad()
+
+            # forward + backward + optimize
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            # print statistics
+            running_loss += loss.numpy()
+            # print every 2000 mini-batches
+            if i % 2000 == 0:
+                final_loss = running_loss / 2000
+                print("epoch: %d  step: %5d  loss: %.3f " % (epoch, i, final_loss))
+                running_loss = 0.0
+
+    print("final loss : ", final_loss)
+    # test_case.assertLess(final_loss, 1.79)
+
+
+@flow.unittest.skip_unless_1n1d()
+class TestCifarDataset(flow.unittest.TestCase):
+    def test_cifar_dataset(test_case):
+        test(test_case)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/oneflow/utils/data/__init__.py b/python/oneflow/utils/data/__init__.py
index f8ff04ba35c444f98f3eeaf0dd31fcdec9df3e9d..fb8219c4846dad972d0fa707e4f14b135ddbc670 100644
--- a/python/oneflow/utils/data/__init__.py
+++ b/python/oneflow/utils/data/__init__.py
@@ -35,6 +35,7 @@ from oneflow.utils.data.decorator import (
     guaranteed_datapipes_determinism,
     non_deterministic,
 )
+from oneflow.utils.data.distributed import DistributedSampler
 
 
 __all__ = [
@@ -55,4 +56,5 @@ __all__ = [
     "functional_datapipe",
     "guaranteed_datapipes_determinism",
     "non_deterministic",
+    "DistributedSampler",
 ]
diff --git a/python/oneflow/utils/data/dataloader.py b/python/oneflow/utils/data/dataloader.py
index 6d2051c6e47c62c3d4c18758f004196509a05f7e..003353b4727054eec89d21f79427a721d5702e3d 100644
--- a/python/oneflow/utils/data/dataloader.py
+++ b/python/oneflow/utils/data/dataloader.py
@@ -163,9 +163,7 @@ class DataLoader(Generic[T_co]):
 
 
     .. warning:: If the ``spawn`` start method is used, :attr:`worker_init_fn`
-                 cannot be an unpicklable object, e.g., a lambda function. See
-                 :ref:`multiprocessing-best-practices` on more details related
-                 to multiprocessing in OneFlow.
+                 cannot be an unpicklable object, e.g., a lambda function.
 
     .. warning:: ``len(dataloader)`` heuristic is based on the length of the sampler used.
                  When :attr:`dataset` is an :class:`~flow.utils.data.IterableDataset`,
@@ -181,12 +179,6 @@ class DataLoader(Generic[T_co]):
                  dropped when :attr:`drop_last` is set. Unfortunately, OneFlow can not detect such
                  cases in general.
 
-                 See `Dataset Types`_ for more details on these two types of datasets and how
-                 :class:`~flow.utils.data.IterableDataset` interacts with
-                 `Multi-process data loading`_.
-
-    .. warning:: See :ref:`reproducibility`, and :ref:`dataloader-workers-random-seed`, and
-                 :ref:`data-loading-randomness` notes for random seed related questions.
     """
     dataset: Dataset[T_co]
     batch_size: Optional[int]
diff --git a/python/oneflow/utils/data/dataset.py b/python/oneflow/utils/data/dataset.py
index 3573fdf8a81a47cd05c777541ef2eeb0ff34f9b3..db792720aa26c806a483ea4f45b1b1d3f67546dd 100644
--- a/python/oneflow/utils/data/dataset.py
+++ b/python/oneflow/utils/data/dataset.py
@@ -195,7 +195,6 @@ class TensorDataset(Dataset[Tuple[Tensor, ...]]):
     Args:
         *tensors (Tensor): tensors that have the same size of the first dimension.
     """
-    tensors: Tuple[Tensor, ...]
 
     def __init__(self, *tensors: Tensor) -> None:
         assert all(
diff --git a/python/oneflow/utils/data/distributed.py b/python/oneflow/utils/data/distributed.py
new file mode 100644
index 0000000000000000000000000000000000000000..832c928f1d86c24c3515f7a2c9128e20db22af11
--- /dev/null
+++ b/python/oneflow/utils/data/distributed.py
@@ -0,0 +1,169 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+import numpy as np
+from typing import TypeVar, Optional, Iterator
+
+import oneflow as flow
+import oneflow.distributed as dist
+from oneflow.utils.data import Sampler, Dataset
+
+
+T_co = TypeVar("T_co", covariant=True)
+
+
+class DistributedSampler(Sampler[T_co]):
+    r"""Sampler that restricts data loading to a subset of the dataset.
+
+    It is especially useful in conjunction with
+    :class:`flow.nn.parallel.DistributedDataParallel`. In such a case, each
+    process can pass a :class:`~flow.utils.data.DistributedSampler` instance as a
+    :class:`~flow.utils.data.DataLoader` sampler, and load a subset of the
+    original dataset that is exclusive to it.
+
+    .. note::
+        Dataset is assumed to be of constant size.
+
+    Args:
+        dataset: Dataset used for sampling.
+        num_replicas (int, optional): Number of processes participating in
+            distributed training. By default, :attr:`world_size` is retrieved from the
+            current distributed group.
+        rank (int, optional): Rank of the current process within :attr:`num_replicas`.
+            By default, :attr:`rank` is retrieved from the current distributed
+            group.
+        shuffle (bool, optional): If ``True`` (default), sampler will shuffle the
+            indices.
+        seed (int, optional): random seed used to shuffle the sampler if
+            :attr:`shuffle=True`. This number should be identical across all
+            processes in the distributed group. Default: ``0``.
+        drop_last (bool, optional): if ``True``, then the sampler will drop the
+            tail of the data to make it evenly divisible across the number of
+            replicas. If ``False``, the sampler will add extra indices to make
+            the data evenly divisible across the replicas. Default: ``False``.
+
+    .. warning::
+        In distributed mode, calling the :meth:`set_epoch` method at
+        the beginning of each epoch **before** creating the :class:`DataLoader` iterator
+        is necessary to make shuffling work properly across multiple epochs. Otherwise,
+        the same ordering will be always used.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> sampler = DistributedSampler(dataset) if is_distributed else None
+        >>> loader = DataLoader(dataset, shuffle=(sampler is None), sampler=sampler)
+        >>> for epoch in range(start_epoch, n_epochs):
+        ...     if is_distributed:
+        ...         sampler.set_epoch(epoch)
+        ...     train(loader)
+    """
+
+    def __init__(
+        self,
+        dataset: Dataset,
+        num_replicas: Optional[int] = None,
+        rank: Optional[int] = None,
+        shuffle: bool = True,
+        seed: int = 0,
+        drop_last: bool = False,
+    ) -> None:
+        if not dist.is_multi_client():
+            raise RuntimeError("Requires multi-client env to be available")
+
+        if num_replicas is None:
+            num_replicas = dist.get_world_size()
+        if rank is None:
+            rank = dist.get_rank()
+        print(
+            "dist.get_world_size() >>>>> ",
+            dist.get_world_size(),
+            "dist.get_rank() >>>>>",
+            dist.get_rank(),
+        )
+        if rank >= num_replicas or rank < 0:
+            raise ValueError(
+                "Invalid rank {}, rank should be in the interval"
+                " [0, {}]".format(rank, num_replicas - 1)
+            )
+        self.dataset = dataset
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.epoch = 0
+        self.drop_last = drop_last
+        # If the dataset length is evenly divisible by # of replicas, then there
+        # is no need to drop any data, since the dataset will be split equally.
+        if self.drop_last and len(self.dataset) % self.num_replicas != 0:
+            # Split to nearest available length that is evenly divisible.
+            # This is to ensure each rank receives the same amount of data when
+            # using this Sampler.
+            self.num_samples = math.ceil(
+                # `type:ignore` is required because Dataset cannot provide a default __len__
+                (len(self.dataset) - self.num_replicas)
+                / self.num_replicas
+            )
+        else:
+            self.num_samples = math.ceil(len(self.dataset) / self.num_replicas)
+        self.total_size = self.num_samples * self.num_replicas
+        self.shuffle = shuffle
+        self.seed = seed
+
+    def __iter__(self) -> Iterator[T_co]:
+        if self.shuffle:
+            # deterministically shuffle based on epoch and seed
+            # TODO:replace with flow.randperm
+            g = flow.Generator()
+            g.manual_seed(self.seed + self.epoch)
+            # indices = flow.randperm(len(self.dataset), generator=g).tolist()
+            indices = np.random.permutation(len(self.dataset)).tolist()
+
+        else:
+            indices = list(range(len(self.dataset)))
+
+        if not self.drop_last:
+            # add extra samples to make it evenly divisible
+            padding_size = self.total_size - len(indices)
+            if padding_size <= len(indices):
+                indices += indices[:padding_size]
+            else:
+                indices += (indices * math.ceil(padding_size / len(indices)))[
+                    :padding_size
+                ]
+        else:
+            # remove tail of data to make it evenly divisible.
+            indices = indices[: self.total_size]
+        assert len(indices) == self.total_size
+
+        # subsample
+        indices = indices[self.rank : self.total_size : self.num_replicas]
+        assert len(indices) == self.num_samples
+
+        return iter(indices)
+
+    def __len__(self) -> int:
+        return self.num_samples
+
+    def set_epoch(self, epoch: int) -> None:
+        """Sets the epoch for this sampler. 
+        When :attr:`shuffle=True`, this ensures all replicas use a different random 
+        ordering for each epoch. Otherwise, the next iteration of this sampler 
+        will yield the same ordering.
+
+        Args:
+            epoch (int): Epoch number.
+        """
+        self.epoch = epoch
diff --git a/python/oneflow/utils/vision/__init__.py b/python/oneflow/utils/vision/__init__.py
index cae7c6a8dc7703b09d568b427e7e61b23065107e..dec08e4b05b41dd81d160d7e41ed3c728b977af9 100644
--- a/python/oneflow/utils/vision/__init__.py
+++ b/python/oneflow/utils/vision/__init__.py
@@ -15,3 +15,28 @@ limitations under the License.
 """
 from oneflow.utils.vision import datasets
 from oneflow.utils.vision import transforms
+
+_image_backend = "PIL"
+
+
+def set_image_backend(backend):
+    """
+    Specifies the package used to load images.
+    Args:
+        backend (string): Name of the image backend. one of {'PIL', 'accimage'}.
+            The :mod:`accimage` package uses the Intel IPP library. It is
+            generally faster than PIL, but does not support as many operations.
+    """
+    global _image_backend
+    if backend not in ["PIL", "accimage"]:
+        raise ValueError(
+            "Invalid backend '{}'. Options are 'PIL' and 'accimage'".format(backend)
+        )
+    _image_backend = backend
+
+
+def get_image_backend():
+    """
+    Gets the name of the package used to load images
+    """
+    return _image_backend
diff --git a/python/oneflow/utils/vision/datasets/__init__.py b/python/oneflow/utils/vision/datasets/__init__.py
index a7227ea8bad6296d7bcb8e4cd04ad5a7bf7d272a..abfacb0ee26b4cd338d8631e527e842de8ce0e4b 100644
--- a/python/oneflow/utils/vision/datasets/__init__.py
+++ b/python/oneflow/utils/vision/datasets/__init__.py
@@ -15,5 +15,21 @@ limitations under the License.
 """
 from .mnist import MNIST, FashionMNIST
 from .cifar import CIFAR10, CIFAR100
+from .coco import CocoCaptions, CocoDetection
+from .imagenet import ImageNet
+from .voc import VOCDetection, VOCSegmentation
+from .folder import DatasetFolder, ImageFolder
 
-__all__ = ["MNIST", "FashionMNIST", "CIFAR10", "CIFAR100"]
+__all__ = [
+    "MNIST",
+    "FashionMNIST",
+    "CIFAR10",
+    "CIFAR100",
+    "CocoCaptions",
+    "CocoDetection",
+    "ImageNet",
+    "VOCDetection",
+    "VOCSegmentation",
+    "DatasetFolder",
+    "ImageFolder",
+]
diff --git a/python/oneflow/utils/vision/datasets/cifar.py b/python/oneflow/utils/vision/datasets/cifar.py
index a9aa715c2df0f19ad3ac5cf1dca441173389aad9..5f1670693a19154c0b3c14d7bccf4ddeaf54c7bb 100644
--- a/python/oneflow/utils/vision/datasets/cifar.py
+++ b/python/oneflow/utils/vision/datasets/cifar.py
@@ -25,8 +25,10 @@ from .utils import check_integrity, download_and_extract_archive
 
 
 class CIFAR10(VisionDataset):
-    """`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+    r""" `CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+
     Args:
+
         root (string): Root directory of dataset where directory
             ``cifar-10-batches-py`` exists or will be saved to if download is set to True.
         train (bool, optional): If True, creates dataset from training set, otherwise
@@ -39,7 +41,6 @@ class CIFAR10(VisionDataset):
             puts it in root directory. If dataset is already downloaded, it is not
             downloaded again.
     """
-
     base_folder = "cifar-10-batches-py"
     url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
     filename = "cifar-10-python.tar.gz"
@@ -128,6 +129,7 @@ class CIFAR10(VisionDataset):
         """
         Args:
             index (int): Index
+
         Returns:
             tuple: (image, target) where target is index of the target class.
         """
@@ -170,10 +172,10 @@ class CIFAR10(VisionDataset):
 
 
 class CIFAR100(CIFAR10):
-    """`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+    r""" `CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+
     This is a subclass of the `CIFAR10` Dataset.
     """
-
     base_folder = "cifar-100-python"
     url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
     filename = "cifar-100-python.tar.gz"
diff --git a/python/oneflow/utils/vision/datasets/coco.py b/python/oneflow/utils/vision/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..c468d2924ae918ef88b56131a72c6332b2df0394
--- /dev/null
+++ b/python/oneflow/utils/vision/datasets/coco.py
@@ -0,0 +1,112 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from PIL import Image
+import os
+import os.path
+from typing import Any, Callable, Optional, Tuple, List
+
+from .vision import VisionDataset
+
+
+class CocoDetection(VisionDataset):
+    r"""`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.
+
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    """
+
+    def __init__(
+        self,
+        root: str,
+        annFile: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        transforms: Optional[Callable] = None,
+    ):
+        super().__init__(root, transforms, transform, target_transform)
+        from pycocotools.coco import COCO
+
+        self.coco = COCO(annFile)
+        self.ids = list(sorted(self.coco.imgs.keys()))
+
+    def _load_image(self, id: int) -> Image.Image:
+        path = self.coco.loadImgs(id)[0]["file_name"]
+        return Image.open(os.path.join(self.root, path)).convert("RGB")
+
+    def _load_target(self, id) -> List[Any]:
+        return self.coco.loadAnns(self.coco.getAnnIds(id))
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        id = self.ids[index]
+        image = self._load_image(id)
+        target = self._load_target(id)
+
+        if self.transforms is not None:
+            image, target = self.transforms(image, target)
+
+        return image, target
+
+    def __len__(self) -> int:
+        return len(self.ids)
+
+
+class CocoCaptions(CocoDetection):
+    r"""`MS Coco Captions <https://cocodataset.org/#captions-2015>`_ Dataset.
+
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+
+    Example:
+
+        .. code:: python
+
+            import oneflow.utils.vision.datasets as dset
+            import oneflow.utils.vision.transforms as transforms
+            cap = dset.CocoCaptions(root = 'dir where images are',
+                                    annFile = 'json annotation file',
+                                    transform=transforms.ToTensor())
+            print('Number of samples: ', len(cap))
+            img, target = cap[3] # load 4th sample
+            print("Image Size: ", img.size())
+            print(target)
+
+        Output: ::
+
+            Number of samples: 82783
+            Image Size: (3L, 427L, 640L)
+            [u'A plane emitting smoke stream flying over a mountain.',
+            u'A plane darts across a bright blue sky behind a mountain covered in snow',
+            u'A plane leaves a contrail above the snowy mountain top.',
+            u'A mountain that has a plane flying overheard in the distance.',
+            u'A mountain view with a plume of smoke in the background']
+    """
+
+    def _load_target(self, id) -> List[str]:
+        return [ann["caption"] for ann in super()._load_target(id)]
diff --git a/python/oneflow/utils/vision/datasets/folder.py b/python/oneflow/utils/vision/datasets/folder.py
new file mode 100644
index 0000000000000000000000000000000000000000..d23722b8428ec2054d426d69e708ae497b04d788
--- /dev/null
+++ b/python/oneflow/utils/vision/datasets/folder.py
@@ -0,0 +1,346 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import os.path
+
+from PIL import Image
+from typing import Any, Callable, cast, Dict, List, Optional, Tuple
+
+from .vision import VisionDataset
+
+
+def has_file_allowed_extension(filename: str, extensions: Tuple[str, ...]) -> bool:
+    """Checks if a file is an allowed extension.
+    Args:
+        filename (string): path to a file
+        extensions (tuple of strings): extensions to consider (lowercase)
+    Returns:
+        bool: True if the filename ends with one of given extensions
+    """
+    return filename.lower().endswith(extensions)
+
+
+def is_image_file(filename: str) -> bool:
+    """Checks if a file is an allowed image extension.
+    Args:
+        filename (string): path to a file
+    Returns:
+        bool: True if the filename ends with a known image extension
+    """
+    return has_file_allowed_extension(filename, IMG_EXTENSIONS)
+
+
+def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
+    """Finds the class folders in a dataset.
+
+    See :class:`DatasetFolder` for details.
+    """
+    classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
+    if not classes:
+        raise FileNotFoundError(f"Couldn't find any class folder in {directory}.")
+
+    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
+    return classes, class_to_idx
+
+
+def make_dataset(
+    directory: str,
+    class_to_idx: Optional[Dict[str, int]] = None,
+    extensions: Optional[Tuple[str, ...]] = None,
+    is_valid_file: Optional[Callable[[str], bool]] = None,
+) -> List[Tuple[str, int]]:
+    """Generates a list of samples of a form (path_to_sample, class).
+
+    See :class:`DatasetFolder` for details.
+    Note: The class_to_idx parameter is here optional and will use the logic of the ``find_classes`` function
+    by default.
+    """
+    directory = os.path.expanduser(directory)
+
+    if class_to_idx is None:
+        _, class_to_idx = find_classes(directory)
+    elif not class_to_idx:
+        raise ValueError(
+            "'class_to_index' must have at least one entry to collect any samples."
+        )
+
+    both_none = extensions is None and is_valid_file is None
+    both_something = extensions is not None and is_valid_file is not None
+    if both_none or both_something:
+        raise ValueError(
+            "Both extensions and is_valid_file cannot be None or not None at the same time"
+        )
+
+    if extensions is not None:
+
+        def is_valid_file(x: str) -> bool:
+            return has_file_allowed_extension(x, cast(Tuple[str, ...], extensions))
+
+    is_valid_file = cast(Callable[[str], bool], is_valid_file)
+
+    instances = []
+    available_classes = set()
+    for target_class in sorted(class_to_idx.keys()):
+        class_index = class_to_idx[target_class]
+        target_dir = os.path.join(directory, target_class)
+        if not os.path.isdir(target_dir):
+            continue
+        for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
+            for fname in sorted(fnames):
+                if is_valid_file(fname):
+                    path = os.path.join(root, fname)
+                    item = path, class_index
+                    instances.append(item)
+
+                    if target_class not in available_classes:
+                        available_classes.add(target_class)
+
+    empty_classes = set(class_to_idx.keys()) - available_classes
+    if empty_classes:
+        msg = (
+            f"Found no valid file for the classes {', '.join(sorted(empty_classes))}. "
+        )
+        if extensions is not None:
+            msg += f"Supported extensions are: {', '.join(extensions)}"
+        raise FileNotFoundError(msg)
+
+    return instances
+
+
+class DatasetFolder(VisionDataset):
+    r"""A generic data loader.
+    This default directory structure can be customized by overriding the
+    :meth:`find_classes` method.
+
+    Args:
+        root (string): Root directory path.
+        loader (callable): A function to load a sample given its path.
+        extensions (tuple[string]): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable, optional): A function/transform that takes in
+            a sample and returns a transformed version.
+            E.g, ``transforms.RandomCrop`` for images.
+        target_transform (callable, optional): A function/transform that takes
+            in the target and transforms it.
+        is_valid_file (callable, optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+
+     Attributes:
+        classes (list): List of the class names sorted alphabetically.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        samples (list): List of (sample path, class_index) tuples
+        targets (list): The class_index value for each image in the dataset
+    """
+
+    def __init__(
+        self,
+        root: str,
+        loader: Callable[[str], Any],
+        extensions: Optional[Tuple[str, ...]] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        is_valid_file: Optional[Callable[[str], bool]] = None,
+    ) -> None:
+        super(DatasetFolder, self).__init__(
+            root, transform=transform, target_transform=target_transform
+        )
+        classes, class_to_idx = self.find_classes(self.root)
+        samples = self.make_dataset(self.root, class_to_idx, extensions, is_valid_file)
+
+        self.loader = loader
+        self.extensions = extensions
+
+        self.classes = classes
+        self.class_to_idx = class_to_idx
+        self.samples = samples
+        self.targets = [s[1] for s in samples]
+
+    @staticmethod
+    def make_dataset(
+        directory: str,
+        class_to_idx: Dict[str, int],
+        extensions: Optional[Tuple[str, ...]] = None,
+        is_valid_file: Optional[Callable[[str], bool]] = None,
+    ) -> List[Tuple[str, int]]:
+        """Generates a list of samples of a form (path_to_sample, class).
+        This can be overridden to e.g. read files from a compressed zip file instead of from the disk.
+
+        Args:
+            directory (str): root dataset directory, corresponding to ``self.root``.
+            class_to_idx (Dict[str, int]): Dictionary mapping class name to class index.
+            extensions (optional): A list of allowed extensions.
+                Either extensions or is_valid_file should be passed. Defaults to None.
+            is_valid_file (optional): A function that takes path of a file
+                and checks if the file is a valid file
+                (used to check of corrupt files) both extensions and
+                is_valid_file should not be passed. Defaults to None.
+
+        Raises:
+            ValueError: In case ``class_to_idx`` is empty.
+            ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None.
+            FileNotFoundError: In case no valid file was found for any class.
+
+        Returns:
+            List[Tuple[str, int]]: samples of a form (path_to_sample, class)
+        """
+        if class_to_idx is None:
+            # prevent potential bug since make_dataset() would use the class_to_idx logic of the
+            # find_classes() function, instead of using that of the find_classes() method, which
+            # is potentially overridden and thus could have a different logic.
+            raise ValueError("The class_to_idx parameter cannot be None.")
+        return make_dataset(
+            directory, class_to_idx, extensions=extensions, is_valid_file=is_valid_file
+        )
+
+    def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]:
+        """Find the class folders in a dataset structured as follows:
+
+        .. code-block:: shell
+
+            directory/
+            ├── class_x
+            │   ├── xxx.ext
+            │   ├── xxy.ext
+            │   └── ...
+            │       └── xxz.ext
+            └── class_y
+                ├── 123.ext
+                ├── nsdf3.ext
+                └── ...
+                └── asd932_.ext
+
+        This method can be overridden to only consider
+        a subset of classes, or to adapt to a different dataset directory structure.
+
+        Args:
+            directory(str): Root directory path, corresponding to ``self.root``
+
+        Raises:
+            FileNotFoundError: If ``dir`` has no class folders.
+
+        Returns:
+            (Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index.
+        """
+        return find_classes(directory)
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return sample, target
+
+    def __len__(self) -> int:
+        return len(self.samples)
+
+
+IMG_EXTENSIONS = (
+    ".jpg",
+    ".jpeg",
+    ".png",
+    ".ppm",
+    ".bmp",
+    ".pgm",
+    ".tif",
+    ".tiff",
+    ".webp",
+)
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, "rb") as f:
+        img = Image.open(f)
+        return img.convert("RGB")
+
+
+# TODO: specify the return type
+def accimage_loader(path: str) -> Any:
+    import accimage
+
+    try:
+        return accimage.Image(path)
+    except IOError:
+        # Potentially a decoding problem, fall back to PIL.Image
+        return pil_loader(path)
+
+
+def default_loader(path: str) -> Any:
+    from oneflow.utils.vision import get_image_backend
+
+    if get_image_backend() == "accimage":
+        return accimage_loader(path)
+    else:
+        return pil_loader(path)
+
+
+class ImageFolder(DatasetFolder):
+    r"""A generic data loader where the images are arranged in this way by default:
+
+    .. code-block:: shell 
+
+        root/dog/xxx.png
+        root/dog/xxy.png
+        root/dog/[...]/xxz.png
+        root/cat/123.png
+        root/cat/nsdf3.png
+        root/cat/[...]/asd932_.png
+
+    This class inherits from :class:`~vision.datasets.DatasetFolder` so
+    the same methods can be overridden to customize the dataset.
+
+    Args:
+        root (string): Root directory path.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        loader (callable, optional): A function to load an image given its path.
+        is_valid_file (callable, optional): A function that takes path of an Image file
+            and check if the file is a valid file (used to check of corrupt files)
+     Attributes:
+        classes (list): List of the class names sorted alphabetically.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        imgs (list): List of (image path, class_index) tuples
+    """
+
+    def __init__(
+        self,
+        root: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        loader: Callable[[str], Any] = default_loader,
+        is_valid_file: Optional[Callable[[str], bool]] = None,
+    ):
+        super(ImageFolder, self).__init__(
+            root,
+            loader,
+            IMG_EXTENSIONS if is_valid_file is None else None,
+            transform=transform,
+            target_transform=target_transform,
+            is_valid_file=is_valid_file,
+        )
+        self.imgs = self.samples
diff --git a/python/oneflow/utils/vision/datasets/imagenet.py b/python/oneflow/utils/vision/datasets/imagenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..a89b0925fb6680278d3f5383e20380570228fa6b
--- /dev/null
+++ b/python/oneflow/utils/vision/datasets/imagenet.py
@@ -0,0 +1,259 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import warnings
+from contextlib import contextmanager
+import os
+import shutil
+import tempfile
+from typing import Any, Dict, List, Iterator, Optional, Tuple
+
+import oneflow as flow
+from .folder import ImageFolder
+from .utils import check_integrity, extract_archive, verify_str_arg
+
+ARCHIVE_META = {
+    "train": ("ILSVRC2012_img_train.tar", "1d675b47d978889d74fa0da5fadfb00e"),
+    "val": ("ILSVRC2012_img_val.tar", "29b22e2961454d5413ddabcf34fc5622"),
+    "devkit": ("ILSVRC2012_devkit_t12.tar.gz", "fa75699e90414af021442c21a62c3abf"),
+}
+
+META_FILE = "meta.bin"
+
+
+class ImageNet(ImageFolder):
+    r""" `ImageNet <http://image-net.org/>`_ 2012 Classification Dataset.
+
+    Args:
+        root (string): Root directory of the ImageNet Dataset.
+        split (string, optional): The dataset split, supports ``train``, or ``val``.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        loader (callable, optional): A function to load an image given its path.
+
+     Attributes:
+        classes (list): List of the class name tuples.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        wnids (list): List of the WordNet IDs.
+        wnid_to_idx (dict): Dict with items (wordnet_id, class_index).
+        imgs (list): List of (image path, class_index) tuples
+        targets (list): The class_index value for each image in the dataset
+    """
+
+    def __init__(
+        self,
+        root: str,
+        split: str = "train",
+        download: Optional[str] = None,
+        **kwargs: Any
+    ) -> None:
+        if download is True:
+            msg = (
+                "The dataset is no longer publicly accessible. You need to "
+                "download the archives externally and place them in the root "
+                "directory."
+            )
+            raise RuntimeError(msg)
+        elif download is False:
+            msg = (
+                "The use of the download flag is deprecated, since the dataset "
+                "is no longer publicly accessible."
+            )
+            warnings.warn(msg, RuntimeWarning)
+
+        root = self.root = os.path.expanduser(root)
+        self.split = verify_str_arg(split, "split", ("train", "val"))
+
+        self.parse_archives()
+        wnid_to_classes = load_meta_file(self.root)[0]
+
+        super(ImageNet, self).__init__(self.split_folder, **kwargs)
+        self.root = root
+
+        self.wnids = self.classes
+        self.wnid_to_idx = self.class_to_idx
+        self.classes = [wnid_to_classes[wnid] for wnid in self.wnids]
+        self.class_to_idx = {
+            cls: idx for idx, clss in enumerate(self.classes) for cls in clss
+        }
+
+    def parse_archives(self) -> None:
+        if not check_integrity(os.path.join(self.root, META_FILE)):
+            parse_devkit_archive(self.root)
+
+        if not os.path.isdir(self.split_folder):
+            if self.split == "train":
+                parse_train_archive(self.root)
+            elif self.split == "val":
+                parse_val_archive(self.root)
+
+    @property
+    def split_folder(self) -> str:
+        return os.path.join(self.root, self.split)
+
+    def extra_repr(self) -> str:
+        return "Split: {split}".format(**self.__dict__)
+
+
+def load_meta_file(
+    root: str, file: Optional[str] = None
+) -> Tuple[Dict[str, str], List[str]]:
+    if file is None:
+        file = META_FILE
+    file = os.path.join(root, file)
+
+    if check_integrity(file):
+        return flow.load(file)
+    else:
+        msg = (
+            "The meta file {} is not present in the root directory or is corrupted. "
+            "This file is automatically created by the ImageNet dataset."
+        )
+        raise RuntimeError(msg.format(file, root))
+
+
+def _verify_archive(root: str, file: str, md5: str) -> None:
+    if not check_integrity(os.path.join(root, file), md5):
+        msg = (
+            "The archive {} is not present in the root directory or is corrupted. "
+            "You need to download it externally and place it in {}."
+        )
+        raise RuntimeError(msg.format(file, root))
+
+
+def parse_devkit_archive(root: str, file: Optional[str] = None) -> None:
+    """Parse the devkit archive of the ImageNet2012 classification dataset and save
+    the meta information in a binary file.
+    Args:
+        root (str): Root directory containing the devkit archive
+        file (str, optional): Name of devkit archive. Defaults to
+            'ILSVRC2012_devkit_t12.tar.gz'
+    """
+    import scipy.io as sio
+
+    def parse_meta_mat(devkit_root: str) -> Tuple[Dict[int, str], Dict[str, str]]:
+        metafile = os.path.join(devkit_root, "data", "meta.mat")
+        meta = sio.loadmat(metafile, squeeze_me=True)["synsets"]
+        nums_children = list(zip(*meta))[4]
+        meta = [
+            meta[idx]
+            for idx, num_children in enumerate(nums_children)
+            if num_children == 0
+        ]
+        idcs, wnids, classes = list(zip(*meta))[:3]
+        classes = [tuple(clss.split(", ")) for clss in classes]
+        idx_to_wnid = {idx: wnid for idx, wnid in zip(idcs, wnids)}
+        wnid_to_classes = {wnid: clss for wnid, clss in zip(wnids, classes)}
+        return idx_to_wnid, wnid_to_classes
+
+    def parse_val_groundtruth_txt(devkit_root: str) -> List[int]:
+        file = os.path.join(
+            devkit_root, "data", "ILSVRC2012_validation_ground_truth.txt"
+        )
+        with open(file, "r") as txtfh:
+            val_idcs = txtfh.readlines()
+        return [int(val_idx) for val_idx in val_idcs]
+
+    @contextmanager
+    def get_tmp_dir() -> Iterator[str]:
+        tmp_dir = tempfile.mkdtemp()
+        try:
+            yield tmp_dir
+        finally:
+            shutil.rmtree(tmp_dir)
+
+    archive_meta = ARCHIVE_META["devkit"]
+    if file is None:
+        file = archive_meta[0]
+    md5 = archive_meta[1]
+
+    _verify_archive(root, file, md5)
+
+    with get_tmp_dir() as tmp_dir:
+        extract_archive(os.path.join(root, file), tmp_dir)
+
+        devkit_root = os.path.join(tmp_dir, "ILSVRC2012_devkit_t12")
+        idx_to_wnid, wnid_to_classes = parse_meta_mat(devkit_root)
+        val_idcs = parse_val_groundtruth_txt(devkit_root)
+        val_wnids = [idx_to_wnid[idx] for idx in val_idcs]
+
+        flow.save((wnid_to_classes, val_wnids), os.path.join(root, META_FILE))
+
+
+def parse_train_archive(
+    root: str, file: Optional[str] = None, folder: str = "train"
+) -> None:
+    """Parse the train images archive of the ImageNet2012 classification dataset and
+    prepare it for usage with the ImageNet dataset.
+    Args:
+        root (str): Root directory containing the train images archive
+        file (str, optional): Name of train images archive. Defaults to
+            'ILSVRC2012_img_train.tar'
+        folder (str, optional): Optional name for train images folder. Defaults to
+            'train'
+    """
+    archive_meta = ARCHIVE_META["train"]
+    if file is None:
+        file = archive_meta[0]
+    md5 = archive_meta[1]
+
+    _verify_archive(root, file, md5)
+
+    train_root = os.path.join(root, folder)
+    extract_archive(os.path.join(root, file), train_root)
+
+    archives = [os.path.join(train_root, archive) for archive in os.listdir(train_root)]
+    for archive in archives:
+        extract_archive(archive, os.path.splitext(archive)[0], remove_finished=True)
+
+
+def parse_val_archive(
+    root: str,
+    file: Optional[str] = None,
+    wnids: Optional[List[str]] = None,
+    folder: str = "val",
+) -> None:
+    """Parse the validation images archive of the ImageNet2012 classification dataset
+    and prepare it for usage with the ImageNet dataset.
+    Args:
+        root (str): Root directory containing the validation images archive
+        file (str, optional): Name of validation images archive. Defaults to
+            'ILSVRC2012_img_val.tar'
+        wnids (list, optional): List of WordNet IDs of the validation images. If None
+            is given, the IDs are loaded from the meta file in the root directory
+        folder (str, optional): Optional name for validation images folder. Defaults to
+            'val'
+    """
+    archive_meta = ARCHIVE_META["val"]
+    if file is None:
+        file = archive_meta[0]
+    md5 = archive_meta[1]
+    if wnids is None:
+        wnids = load_meta_file(root)[1]
+
+    _verify_archive(root, file, md5)
+
+    val_root = os.path.join(root, folder)
+    extract_archive(os.path.join(root, file), val_root)
+
+    images = sorted([os.path.join(val_root, image) for image in os.listdir(val_root)])
+
+    for wnid in set(wnids):
+        os.mkdir(os.path.join(val_root, wnid))
+
+    for wnid, img_file in zip(wnids, images):
+        shutil.move(img_file, os.path.join(val_root, wnid, os.path.basename(img_file)))
diff --git a/python/oneflow/utils/vision/datasets/mnist.py b/python/oneflow/utils/vision/datasets/mnist.py
index 76f1eb1a9300a3d64d3a801da3e70b3419248146..9ac513c23e6ee58d847a2549f54ca42f29d5ac63 100644
--- a/python/oneflow/utils/vision/datasets/mnist.py
+++ b/python/oneflow/utils/vision/datasets/mnist.py
@@ -29,7 +29,8 @@ from oneflow.framework.tensor import Tensor
 
 
 class MNIST(VisionDataset):
-    """`MNIST <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
+    r""" `MNIST <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
+
     Args:
         root (string): Root directory of dataset where ``MNIST/processed/training.pt``
             and  ``MNIST/processed/test.pt`` exist.
@@ -43,7 +44,6 @@ class MNIST(VisionDataset):
         target_transform (callable, optional): A function/transform that takes in the
             target and transforms it.
     """
-
     mirrors = [
         "http://yann.lecun.com/exdb/mnist/",
         "https://ossci-datasets.s3.amazonaws.com/mnist/",
@@ -222,7 +222,8 @@ class MNIST(VisionDataset):
 
 
 class FashionMNIST(MNIST):
-    """`Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ Dataset.
+    r""" `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ Dataset.
+
     Args:
         root (string): Root directory of dataset where ``FashionMNIST/processed/training.pt``
             and  ``FashionMNIST/processed/test.pt`` exist.
@@ -236,7 +237,6 @@ class FashionMNIST(MNIST):
         target_transform (callable, optional): A function/transform that takes in the
             target and transforms it.
     """
-
     mirrors = ["http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"]
 
     resources = [
diff --git a/python/oneflow/utils/vision/datasets/voc.py b/python/oneflow/utils/vision/datasets/voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba7d11eb477d52a170cd8339205517e4ac4b2c9d
--- /dev/null
+++ b/python/oneflow/utils/vision/datasets/voc.py
@@ -0,0 +1,256 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import warnings
+import collections
+
+from xml.etree.ElementTree import Element as ET_Element
+
+try:
+    from defusedxml.ElementTree import parse as ET_parse
+except ImportError:
+    from xml.etree.ElementTree import parse as ET_parse
+from PIL import Image
+from typing import Any, Callable, Dict, Optional, Tuple, List
+
+from .utils import download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+
+
+DATASET_YEAR_DICT = {
+    "2012": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar",
+        "filename": "VOCtrainval_11-May-2012.tar",
+        "md5": "6cd6e144f989b92b3379bac3b3de84fd",
+        "base_dir": os.path.join("VOCdevkit", "VOC2012"),
+    },
+    "2011": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar",
+        "filename": "VOCtrainval_25-May-2011.tar",
+        "md5": "6c3384ef61512963050cb5d687e5bf1e",
+        "base_dir": os.path.join("TrainVal", "VOCdevkit", "VOC2011"),
+    },
+    "2010": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar",
+        "filename": "VOCtrainval_03-May-2010.tar",
+        "md5": "da459979d0c395079b5c75ee67908abb",
+        "base_dir": os.path.join("VOCdevkit", "VOC2010"),
+    },
+    "2009": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar",
+        "filename": "VOCtrainval_11-May-2009.tar",
+        "md5": "59065e4b188729180974ef6572f6a212",
+        "base_dir": os.path.join("VOCdevkit", "VOC2009"),
+    },
+    "2008": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar",
+        "filename": "VOCtrainval_11-May-2012.tar",
+        "md5": "2629fa636546599198acfcfbfcf1904a",
+        "base_dir": os.path.join("VOCdevkit", "VOC2008"),
+    },
+    "2007": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar",
+        "filename": "VOCtrainval_06-Nov-2007.tar",
+        "md5": "c52e279531787c972589f7e41ab4ae64",
+        "base_dir": os.path.join("VOCdevkit", "VOC2007"),
+    },
+    "2007-test": {
+        "url": "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar",
+        "filename": "VOCtest_06-Nov-2007.tar",
+        "md5": "b6e924de25625d8de591ea690078ad9f",
+        "base_dir": os.path.join("VOCdevkit", "VOC2007"),
+    },
+}
+
+
+class _VOCBase(VisionDataset):
+    _SPLITS_DIR: str
+    _TARGET_DIR: str
+    _TARGET_FILE_EXT: str
+
+    def __init__(
+        self,
+        root: str,
+        year: str = "2012",
+        image_set: str = "train",
+        download: bool = False,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        transforms: Optional[Callable] = None,
+    ):
+        super().__init__(root, transforms, transform, target_transform)
+        if year == "2007-test":
+            if image_set == "test":
+                warnings.warn(
+                    "Acessing the test image set of the year 2007 with year='2007-test' is deprecated. "
+                    "Please use the combination year='2007' and image_set='test' instead."
+                )
+                year = "2007"
+            else:
+                raise ValueError(
+                    "In the test image set of the year 2007 only image_set='test' is allowed. "
+                    "For all other image sets use year='2007' instead."
+                )
+        self.year = year
+
+        valid_image_sets = ["train", "trainval", "val"]
+        if year == "2007":
+            valid_image_sets.append("test")
+        self.image_set = verify_str_arg(image_set, "image_set", valid_image_sets)
+
+        key = "2007-test" if year == "2007" and image_set == "test" else year
+        dataset_year_dict = DATASET_YEAR_DICT[key]
+
+        self.url = dataset_year_dict["url"]
+        self.filename = dataset_year_dict["filename"]
+        self.md5 = dataset_year_dict["md5"]
+
+        base_dir = dataset_year_dict["base_dir"]
+        voc_root = os.path.join(self.root, base_dir)
+
+        if download:
+            download_and_extract_archive(
+                self.url, self.root, filename=self.filename, md5=self.md5
+            )
+
+        if not os.path.isdir(voc_root):
+            raise RuntimeError(
+                "Dataset not found or corrupted. You can use download=True to download it"
+            )
+
+        splits_dir = os.path.join(voc_root, "ImageSets", self._SPLITS_DIR)
+        split_f = os.path.join(splits_dir, image_set.rstrip("\n") + ".txt")
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+
+        image_dir = os.path.join(voc_root, "JPEGImages")
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+
+        target_dir = os.path.join(voc_root, self._TARGET_DIR)
+        self.targets = [
+            os.path.join(target_dir, x + self._TARGET_FILE_EXT) for x in file_names
+        ]
+
+        assert len(self.images) == len(self.targets)
+
+    def __len__(self) -> int:
+        return len(self.images)
+
+
+class VOCSegmentation(_VOCBase):
+    r""" `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years ``"2007"`` to ``"2012"``.
+        image_set (string, optional): Select the image_set to use, ``"train"``, ``"trainval"`` or ``"val"``. If
+            ``year=="2007"``, can also be ``"test"``.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    """
+    _SPLITS_DIR = "Segmentation"
+    _TARGET_DIR = "SegmentationClass"
+    _TARGET_FILE_EXT = ".png"
+
+    @property
+    def masks(self) -> List[str]:
+        return self.targets
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert("RGB")
+        target = Image.open(self.masks[index])
+
+        if self.transforms is not None:
+            img, target = self.transforms(img, target)
+
+        return img, target
+
+
+class VOCDetection(_VOCBase):
+    r""" `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Detection Dataset.
+
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years ``"2007"`` to ``"2012"``.
+        image_set (string, optional): Select the image_set to use, ``"train"``, ``"trainval"`` or ``"val"``. If
+            ``year=="2007"``, can also be ``"test"``.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+            (default: alphabetic indexing of VOC's 20 classes).
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, required): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    """
+    _SPLITS_DIR = "Main"
+    _TARGET_DIR = "Annotations"
+    _TARGET_FILE_EXT = ".xml"
+
+    @property
+    def annotations(self) -> List[str]:
+        return self.targets
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is a dictionary of the XML tree.
+        """
+        img = Image.open(self.images[index]).convert("RGB")
+        target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot())
+
+        if self.transforms is not None:
+            img, target = self.transforms(img, target)
+
+        return img, target
+
+    def parse_voc_xml(self, node: ET_Element) -> Dict[str, Any]:
+        voc_dict: Dict[str, Any] = {}
+        children = list(node)
+        if children:
+            def_dic: Dict[str, Any] = collections.defaultdict(list)
+            for dc in map(self.parse_voc_xml, children):
+                for ind, v in dc.items():
+                    def_dic[ind].append(v)
+            if node.tag == "annotation":
+                def_dic["object"] = [def_dic["object"]]
+            voc_dict = {
+                node.tag: {
+                    ind: v[0] if len(v) == 1 else v for ind, v in def_dic.items()
+                }
+            }
+        if node.text:
+            text = node.text.strip()
+            if not children:
+                voc_dict[node.tag] = text
+        return voc_dict
diff --git a/python/oneflow/utils/vision/transforms/__init__.py b/python/oneflow/utils/vision/transforms/__init__.py
index aa8fae46e6fe9a684c597cd410e480e90ff3fb3a..1424e5744d5200c44a313a0f6336230020a10449 100644
--- a/python/oneflow/utils/vision/transforms/__init__.py
+++ b/python/oneflow/utils/vision/transforms/__init__.py
@@ -13,6 +13,54 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from .transforms import Normalize, Compose, ToTensor, Resize
+from .transforms import (
+    Compose,
+    ToTensor,
+    PILToTensor,
+    ConvertImageDtype,
+    ToPILImage,
+    Normalize,
+    Resize,
+    Scale,
+    CenterCrop,
+    Pad,
+    Lambda,
+    RandomTransforms,
+    RandomApply,
+    RandomOrder,
+    RandomChoice,
+    RandomCrop,
+    RandomHorizontalFlip,
+    RandomVerticalFlip,
+    RandomResizedCrop,
+    RandomSizedCrop,
+    FiveCrop,
+    TenCrop,
+    InterpolationMode,
+)
 
-__all__ = ["Normalize", "Compose", "ToTensor", "Resize"]
+__all__ = [
+    "Compose",
+    "ToTensor",
+    "PILToTensor",
+    "ConvertImageDtype",
+    "ToPILImage",
+    "Normalize",
+    "Resize",
+    "Scale",
+    "CenterCrop",
+    "Pad",
+    "Lambda",
+    "RandomTransforms",
+    "RandomApply",
+    "RandomOrder",
+    "RandomChoice",
+    "RandomCrop",
+    "RandomHorizontalFlip",
+    "RandomVerticalFlip",
+    "RandomResizedCrop",
+    "RandomSizedCrop",
+    "FiveCrop",
+    "TenCrop",
+    "InterpolationMode",
+]
diff --git a/python/oneflow/utils/vision/transforms/functional.py b/python/oneflow/utils/vision/transforms/functional.py
index d3092113dbd7ca8dd03100fbb6c311a0975849b7..b77e564f01912cedaa7b35edeafdfb0ff59fb34c 100644
--- a/python/oneflow/utils/vision/transforms/functional.py
+++ b/python/oneflow/utils/vision/transforms/functional.py
@@ -14,11 +14,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import warnings
+import numbers
 from enum import Enum
-
+from typing import List, Any, Tuple, Optional
 import numpy as np
 from PIL import Image
-from typing import List, Any
+import math
 
 try:
     import accimage
@@ -27,13 +28,12 @@ except ImportError:
 
 import oneflow as flow
 from oneflow.framework.tensor import Tensor
-
 from . import functional_pil as F_pil
 from . import functional_tensor as F_t
 
 
 class InterpolationMode(Enum):
-    """Interpolation modes
+    r"""Interpolation modes
     """
 
     NEAREST = "nearest"
@@ -67,6 +67,24 @@ pil_modes_mapping = {
 }
 
 
+def _get_image_size(img: Tensor) -> List[int]:
+    """Returns image size as [w, h]
+    """
+    if isinstance(img, flow.Tensor):
+        return F_t._get_image_size(img)
+
+    return F_pil._get_image_size(img)
+
+
+def _get_image_num_channels(img: Tensor) -> int:
+    """Returns number of image channels
+    """
+    if isinstance(img, flow.Tensor):
+        return F_t._get_image_num_channels(img)
+
+    return F_pil._get_image_num_channels(img)
+
+
 def _is_pil_image(img: Any) -> bool:
     if accimage is not None:
         return isinstance(img, (Image.Image, accimage.Image))
@@ -140,6 +158,64 @@ def to_tensor(pic):
     return res
 
 
+def pil_to_tensor(pic):
+    """Convert a ``PIL Image`` to a tensor of the same type.
+
+    See :class:`~vision.transforms.PILToTensor` for more details.
+
+    Args:
+        pic (PIL Image): Image to be converted to tensor.
+
+    Returns:
+        Tensor: Converted image.
+    """
+    if not F_pil._is_pil_image(pic):
+        raise TypeError("pic should be PIL Image. Got {}".format(type(pic)))
+
+    if accimage is not None and isinstance(pic, accimage.Image):
+        # accimage format is always uint8 internally, so always return uint8 here
+        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.uint8)
+        pic.copyto(nppic)
+        return flow.tensor(nppic)
+
+    # handle PIL Image
+    img = flow.tensor(np.asarray(pic))
+    img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
+    # put it from HWC to CHW format
+    img = img.permute((2, 0, 1))
+    return img
+
+
+def convert_image_dtype(
+    image: flow.Tensor, dtype: flow.dtype = flow.float
+) -> flow.Tensor:
+    """Convert a tensor image to the given ``dtype`` and scale the values accordingly
+    This function does not support PIL Image.
+
+    Args:
+        image (flow.Tensor): Image to be converted
+        dtype (flow.dtype): Desired data type of the output
+
+    Returns:
+        Tensor: Converted image
+
+    .. note::
+
+        When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
+        If converted back and forth, this mismatch has no effect.
+
+    Raises:
+        RuntimeError: When trying to cast :class:`flow.float32` to :class:`flow.int32` or :class:`flow.int64` as
+            well as for trying to cast :class:`flow.float64` to :class:`flow.int64`. These conversions might lead to
+            overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
+            of the integer ``dtype``.
+    """
+    if not isinstance(image, flow.Tensor):
+        raise TypeError("Input img should be Tensor Image")
+
+    return F_t.convert_image_dtype(image, dtype)
+
+
 def normalize(
     tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False
 ) -> Tensor:
@@ -156,9 +232,7 @@ def normalize(
     Returns:
         Tensor: Normalized Tensor image.
     """
-    if not isinstance(tensor, flow.Tensor) and not isinstance(
-        tensor, flow._oneflow_internal.Tensor
-    ):
+    if not isinstance(tensor, flow.Tensor):
         raise TypeError(
             "Input tensor should be a oneflow tensor. Got {}.".format(type(tensor))
         )
@@ -238,3 +312,423 @@ def resize(
         return F_pil.resize(img, size=size, interpolation=pil_interpolation)
 
     return F_t.resize(img, size=size, interpolation=interpolation.value)
+
+
+def scale(*args, **kwargs):
+    warnings.warn(
+        "The use of the transforms.Scale transform is deprecated, "
+        + "please use transforms.Resize instead."
+    )
+    return resize(*args, **kwargs)
+
+
+def pad(
+    img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant"
+) -> Tensor:
+    r"""Pad the given image on all sides with the given "pad" value.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+    at most 3 leading dimensions for mode edge,
+    and an arbitrary number of leading dimensions for mode constant
+
+    Args:
+        img (PIL Image or Tensor): Image to be padded.
+        padding (int or sequence): Padding on each border. If a single int is provided this
+            is used to pad all borders. If sequence of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a sequence of length 4 is provided
+            this is the padding for the left, top, right and bottom borders respectively.
+
+        fill (number or str or tuple): Pixel fill value for constant fill. Default is 0.
+            If a tuple of length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant.
+            Only number is supported for oneflow Tensor.
+            Only int or str or tuple value is supported for PIL Image.
+        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+            Default is constant.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value at the edge of the image.
+              If input a 5D oneflow Tensor, the last 3 dimensions will be padded instead of the last 2
+
+            - reflect: pads with reflection of image without repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+              will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+              will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        PIL Image or Tensor: Padded image.
+    """
+    if not isinstance(img, flow.Tensor):
+        return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
+
+    return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
+
+
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+    """Crop the given image at specified location and output size.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then cropped.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+
+    Returns:
+        PIL Image or Tensor: Cropped image.
+    """
+
+    if not isinstance(img, flow.Tensor):
+        return F_pil.crop(img, top, left, height, width)
+
+    return F_t.crop(img, top, left, height, width)
+
+
+def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
+    """Crops the given image at the center.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped.
+        output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
+            it is used for both directions.
+
+    Returns:
+        PIL Image or Tensor: Cropped image.
+    """
+    if isinstance(output_size, numbers.Number):
+        output_size = (int(output_size), int(output_size))
+    elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
+        output_size = (output_size[0], output_size[0])
+
+    image_width, image_height = _get_image_size(img)
+    crop_height, crop_width = output_size
+
+    if crop_width > image_width or crop_height > image_height:
+        padding_ltrb = [
+            (crop_width - image_width) // 2 if crop_width > image_width else 0,
+            (crop_height - image_height) // 2 if crop_height > image_height else 0,
+            (crop_width - image_width + 1) // 2 if crop_width > image_width else 0,
+            (crop_height - image_height + 1) // 2 if crop_height > image_height else 0,
+        ]
+        img = pad(img, padding_ltrb, fill=0)  # PIL uses fill value 0
+        image_width, image_height = _get_image_size(img)
+        if crop_width == image_width and crop_height == image_height:
+            return img
+
+    crop_top = int(round((image_height - crop_height) / 2.0))
+    crop_left = int(round((image_width - crop_width) / 2.0))
+    return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def resized_crop(
+    img: Tensor,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    size: List[int],
+    interpolation: InterpolationMode = InterpolationMode.BILINEAR,
+) -> Tensor:
+    """Crop the given image and resize it to desired size.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    Notably used in :class:`~vision.transforms.RandomResizedCrop`.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+        size (sequence or int): Desired output size. Same semantics as ``resize``.
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`vision.transforms.InterpolationMode`.
+            Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
+            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+
+    Returns:
+        PIL Image or Tensor: Cropped image.
+    """
+    img = crop(img, top, left, height, width)
+    img = resize(img, size, interpolation)
+    return img
+
+
+def hflip(img: Tensor) -> Tensor:
+    """Horizontally flip the given image.
+
+    Args:
+        img (PIL Image or Tensor): Image to be flipped. If img
+            is a Tensor, it is expected to be in [..., H, W] format,
+            where ... means it can have an arbitrary number of leading
+            dimensions.
+
+    Returns:
+        PIL Image or Tensor:  Horizontally flipped image.
+    """
+    if not isinstance(img, flow.Tensor):
+        return F_pil.hflip(img)
+
+    return F_t.hflip(img)
+
+
+def vflip(img: Tensor) -> Tensor:
+    """Vertically flip the given image.
+
+    Args:
+        img (PIL Image or Tensor): Image to be flipped. If img
+            is a Tensor, it is expected to be in [..., H, W] format,
+            where ... means it can have an arbitrary number of leading
+            dimensions.
+
+    Returns:
+        PIL Image or Tensor:  Vertically flipped image.
+    """
+    if not isinstance(img, flow.Tensor):
+        return F_pil.vflip(img)
+
+    return F_t.vflip(img)
+
+
+def five_crop(
+    img: Tensor, size: List[int]
+) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+    """Crop the given image into four corners and the central crop.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    .. Note::
+        This transform returns a tuple of images and there may be a
+        mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+
+    Returns:
+       tuple: tuple (tl, tr, bl, br, center)
+       Corresponding top left, top right, bottom left, bottom right and center crop.
+    """
+    if isinstance(size, numbers.Number):
+        size = (int(size), int(size))
+    elif isinstance(size, (tuple, list)) and len(size) == 1:
+        size = (size[0], size[0])
+
+    if len(size) != 2:
+        raise ValueError("Please provide only two dimensions (h, w) for size.")
+
+    image_width, image_height = _get_image_size(img)
+    crop_height, crop_width = size
+    if crop_width > image_width or crop_height > image_height:
+        msg = "Requested crop size {} is bigger than input size {}"
+        raise ValueError(msg.format(size, (image_height, image_width)))
+
+    tl = crop(img, 0, 0, crop_height, crop_width)
+    tr = crop(img, 0, image_width - crop_width, crop_height, crop_width)
+    bl = crop(img, image_height - crop_height, 0, crop_height, crop_width)
+    br = crop(
+        img,
+        image_height - crop_height,
+        image_width - crop_width,
+        crop_height,
+        crop_width,
+    )
+
+    center = center_crop(img, [crop_height, crop_width])
+
+    return tl, tr, bl, br, center
+
+
+def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[Tensor]:
+    """Generate ten cropped images from the given image.
+    Crop the given image into four corners and the central crop plus the
+    flipped version of these (horizontal flipping is used by default).
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    .. Note::
+        This transform returns a tuple of images and there may be a
+        mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+        vertical_flip (bool): Use vertical flipping instead of horizontal
+
+    Returns:
+        tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
+        Corresponding top left, top right, bottom left, bottom right and
+        center crop and same for the flipped image.
+    """
+    if isinstance(size, numbers.Number):
+        size = (int(size), int(size))
+    elif isinstance(size, (tuple, list)) and len(size) == 1:
+        size = (size[0], size[0])
+
+    if len(size) != 2:
+        raise ValueError("Please provide only two dimensions (h, w) for size.")
+
+    first_five = five_crop(img, size)
+
+    if vertical_flip:
+        img = vflip(img)
+    else:
+        img = hflip(img)
+
+    second_five = five_crop(img, size)
+    return first_five + second_five
+
+
+def _get_inverse_affine_matrix(
+    center: List[float],
+    angle: float,
+    translate: List[float],
+    scale: float,
+    shear: List[float],
+) -> List[float]:
+    # Helper method to compute inverse matrix for affine transformation
+
+    # As it is explained in PIL.Image.rotate
+    # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1
+    # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
+    #       C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
+    #       RSS is rotation with scale and shear matrix
+    #       RSS(a, s, (sx, sy)) =
+    #       = R(a) * S(s) * SHy(sy) * SHx(sx)
+    #       = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
+    #         [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
+    #         [ 0                    , 0                                      , 1 ]
+    #
+    # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
+    # SHx(s) = [1, -tan(s)] and SHy(s) = [1      , 0]
+    #          [0, 1      ]              [-tan(s), 1]
+    #
+    # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1
+
+    rot = math.radians(angle)
+    sx, sy = [math.radians(s) for s in shear]
+
+    cx, cy = center
+    tx, ty = translate
+
+    # RSS without scaling
+    a = math.cos(rot - sy) / math.cos(sy)
+    b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
+    c = math.sin(rot - sy) / math.cos(sy)
+    d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)
+
+    # Inverted rotation matrix with scale and shear
+    # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
+    matrix = [d, -b, 0.0, -c, a, 0.0]
+    matrix = [x / scale for x in matrix]
+
+    # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
+    matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
+    matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
+
+    # Apply center translation: C * RSS^-1 * C^-1 * T^-1
+    matrix[2] += cx
+    matrix[5] += cy
+
+    return matrix
+
+
+def rotate(
+    img: Tensor,
+    angle: float,
+    interpolation: InterpolationMode = InterpolationMode.NEAREST,
+    expand: bool = False,
+    center: Optional[List[int]] = None,
+    fill: Optional[List[float]] = None,
+    resample: Optional[int] = None,
+) -> Tensor:
+    """Rotate the image by angle.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+    Args:
+        img (PIL Image or Tensor): image to be rotated.
+        angle (number): rotation angle value in degrees, counter-clockwise.
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`flow.utils.vision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+        expand (bool, optional): Optional expansion flag.
+            If true, expands the output image to make it large enough to hold the entire rotated image.
+            If false or omitted, make the output image the same size as the input image.
+            Note that the expand flag assumes rotation around the center and no translation.
+        center (sequence, optional): Optional center of rotation. Origin is the upper left corner.
+            Default is the center of the image.
+        fill (sequence or number, optional): Pixel fill value for the area outside the transformed
+            image. If given a number, the value is used for all bands respectively.
+
+
+    Returns:
+        PIL Image or Tensor: Rotated image.
+
+    .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+    """
+    if resample is not None:
+        warnings.warn(
+            "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
+        )
+        interpolation = _interpolation_modes_from_int(resample)
+
+    # Backward compatibility with integer value
+    if isinstance(interpolation, int):
+        warnings.warn(
+            "Argument interpolation should be of type InterpolationMode instead of int. "
+            "Please, use InterpolationMode enum."
+        )
+        interpolation = _interpolation_modes_from_int(interpolation)
+
+    if not isinstance(angle, (int, float)):
+        raise TypeError("Argument angle should be int or float")
+
+    if center is not None and not isinstance(center, (list, tuple)):
+        raise TypeError("Argument center should be a sequence")
+
+    if not isinstance(interpolation, InterpolationMode):
+        raise TypeError("Argument interpolation should be a InterpolationMode")
+
+    if not isinstance(img, flow.Tensor):
+        pil_interpolation = pil_modes_mapping[interpolation]
+        return F_pil.rotate(
+            img,
+            angle=angle,
+            interpolation=pil_interpolation,
+            expand=expand,
+            center=center,
+            fill=fill,
+        )
+
+    center_f = [0.0, 0.0]
+    if center is not None:
+        img_size = _get_image_size(img)
+        # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
+        center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, img_size)]
+
+    # due to current incoherence of rotation angle direction between affine and rotate implementations
+    # we need to set -angle.
+    matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
+    raise NotImplementedError("Tensor rotate is not implemented yet!")
+    return F_t.rotate(
+        img, matrix=matrix, interpolation=interpolation.value, expand=expand, fill=fill
+    )
diff --git a/python/oneflow/utils/vision/transforms/functional_pil.py b/python/oneflow/utils/vision/transforms/functional_pil.py
index 19e12ea1ec3d661f75e2c50adb76579c22cd0b6c..532d1fe36b2522d27e9a4da3e4e02162375f0695 100644
--- a/python/oneflow/utils/vision/transforms/functional_pil.py
+++ b/python/oneflow/utils/vision/transforms/functional_pil.py
@@ -13,9 +13,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+import numbers
 from typing import Any, List, Sequence
+import numpy as np
+from PIL import Image, ImageOps
 
-from PIL import Image
+import oneflow as flow
 
 try:
     import accimage
@@ -30,6 +33,126 @@ def _is_pil_image(img: Any) -> bool:
         return isinstance(img, Image.Image)
 
 
+def _get_image_size(img: Any) -> List[int]:
+    if _is_pil_image(img):
+        return img.size
+    raise TypeError("Unexpected type {}".format(type(img)))
+
+
+def _get_image_num_channels(img: Any) -> int:
+    if _is_pil_image(img):
+        return 1 if img.mode == "L" else 3
+
+
+def hflip(img):
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+def vflip(img):
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+def pad(img, padding, fill=0, padding_mode="constant"):
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    if not isinstance(padding, (numbers.Number, tuple, list)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (numbers.Number, str, tuple)):
+        raise TypeError("Got inappropriate fill arg")
+    if not isinstance(padding_mode, str):
+        raise TypeError("Got inappropriate padding_mode arg")
+
+    if isinstance(padding, list):
+        padding = tuple(padding)
+
+    if isinstance(padding, tuple) and len(padding) not in [1, 2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 1, 2, or 4 element tuple, not a "
+            + "{} element tuple".format(len(padding))
+        )
+
+    if isinstance(padding, tuple) and len(padding) == 1:
+        # Compatibility with `functional_tensor.pad`
+        padding = padding[0]
+
+    if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+        raise ValueError(
+            "Padding mode should be either constant, edge, reflect or symmetric"
+        )
+
+    if padding_mode == "constant":
+        opts = _parse_fill(fill, img, name="fill")
+        if img.mode == "P":
+            palette = img.getpalette()
+            image = ImageOps.expand(img, border=padding, **opts)
+            image.putpalette(palette)
+            return image
+
+        return ImageOps.expand(img, border=padding, **opts)
+    else:
+        if isinstance(padding, int):
+            pad_left = pad_right = pad_top = pad_bottom = padding
+        if isinstance(padding, tuple) and len(padding) == 2:
+            pad_left = pad_right = padding[0]
+            pad_top = pad_bottom = padding[1]
+        if isinstance(padding, tuple) and len(padding) == 4:
+            pad_left = padding[0]
+            pad_top = padding[1]
+            pad_right = padding[2]
+            pad_bottom = padding[3]
+
+        p = [pad_left, pad_top, pad_right, pad_bottom]
+        cropping = -np.minimum(p, 0)
+
+        if cropping.any():
+            crop_left, crop_top, crop_right, crop_bottom = cropping
+            img = img.crop(
+                (crop_left, crop_top, img.width - crop_right, img.height - crop_bottom)
+            )
+
+        pad_left, pad_top, pad_right, pad_bottom = np.maximum(p, 0)
+
+        if img.mode == "P":
+            palette = img.getpalette()
+            img = np.asarray(img)
+            img = np.pad(
+                img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode
+            )
+            img = Image.fromarray(img)
+            img.putpalette(palette)
+            return img
+
+        img = np.asarray(img)
+        # RGB image
+        if len(img.shape) == 3:
+            img = np.pad(
+                img,
+                ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)),
+                padding_mode,
+            )
+        # Grayscale image
+        if len(img.shape) == 2:
+            img = np.pad(
+                img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode
+            )
+
+        return Image.fromarray(img)
+
+
+def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Image.Image:
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.crop((left, top, left + width, top + height))
+
+
 def resize(img, size, interpolation=Image.BILINEAR):
     if not _is_pil_image(img):
         raise TypeError("img should be PIL Image. Got {}".format(type(img)))
@@ -54,3 +177,31 @@ def resize(img, size, interpolation=Image.BILINEAR):
             return img.resize((ow, oh), interpolation)
     else:
         return img.resize(size[::-1], interpolation)
+
+
+def _parse_fill(fill, img, name="fillcolor"):
+    # Process fill color for affine transforms
+    num_bands = len(img.getbands())
+    if fill is None:
+        fill = 0
+    if isinstance(fill, (int, float)) and num_bands > 1:
+        fill = tuple([fill] * num_bands)
+    if isinstance(fill, (list, tuple)):
+        if len(fill) != num_bands:
+            msg = (
+                "The number of elements in 'fill' does not match the number of "
+                "bands of the image ({} != {})"
+            )
+            raise ValueError(msg.format(len(fill), num_bands))
+
+        fill = tuple(fill)
+
+    return {name: fill}
+
+
+def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None):
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    opts = _parse_fill(fill, img)
+    return img.rotate(angle, interpolation, expand, center, **opts)
diff --git a/python/oneflow/utils/vision/transforms/functional_tensor.py b/python/oneflow/utils/vision/transforms/functional_tensor.py
index 3c53907c87138c901344ee82f7bbde8141cf6352..01f3fd3bc13550e421526a516eba3c0f291abab7 100644
--- a/python/oneflow/utils/vision/transforms/functional_tensor.py
+++ b/python/oneflow/utils/vision/transforms/functional_tensor.py
@@ -13,7 +13,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from typing import Tuple, List
+import warnings
+from typing import Optional, Tuple, List
 
 from oneflow.framework.tensor import Tensor
 import oneflow as flow
@@ -43,6 +44,24 @@ def _get_image_num_channels(img: Tensor) -> int:
     raise TypeError("Input ndim should be 2 or more. Got {}".format(img.ndim))
 
 
+def _max_value(dtype: flow.dtype) -> float:
+
+    a = flow.tensor(2, dtype=dtype)
+    # TODO:Tensor.is_signed()
+    # signed = 1 if flow.tensor(0, dtype=dtype).is_signed() else 0
+    signed = 1
+    bits = 1
+    max_value = flow.tensor(-signed, dtype=flow.long)
+    while True:
+        next_value = a.pow(bits - signed).sub(1)
+        if next_value > max_value:
+            max_value = next_value
+            bits *= 2
+        else:
+            break
+    return max_value.item()
+
+
 def _cast_squeeze_in(
     img: Tensor, req_dtypes: List[flow.dtype]
 ) -> Tuple[Tensor, bool, bool, flow.dtype]:
@@ -76,6 +95,191 @@ def _cast_squeeze_out(
     return img
 
 
+def convert_image_dtype(
+    image: flow.Tensor, dtype: flow.dtype = flow.float
+) -> flow.Tensor:
+    if image.dtype == dtype:
+        return image
+
+    if image.is_floating_point():
+        # TODO:Tensor.is_floating_point()
+        if flow.tensor(0, dtype=dtype).is_floating_point():
+            return image.to(dtype)
+
+        # float to int
+        if (image.dtype == flow.float32 and dtype in (flow.int32, flow.int64)) or (
+            image.dtype == flow.float64 and dtype == flow.int64
+        ):
+            msg = f"The cast from {image.dtype} to {dtype} cannot be performed safely."
+            raise RuntimeError(msg)
+
+        # https://github.com/pytorch/vision/pull/2078#issuecomment-612045321
+        # For data in the range 0-1, (float * 255).to(uint) is only 255
+        # when float is exactly 1.0.
+        # `max + 1 - epsilon` provides more evenly distributed mapping of
+        # ranges of floats to ints.
+        eps = 1e-3
+        max_val = _max_value(dtype)
+        result = image.mul(max_val + 1.0 - eps)
+        return result.to(dtype)
+    else:
+        input_max = _max_value(image.dtype)
+
+        # int to float
+        if flow.tensor(0, dtype=dtype).is_floating_point():
+            image = image.to(dtype)
+            return image / input_max
+
+        output_max = _max_value(dtype)
+
+        # int to int
+        if input_max > output_max:
+            factor = int((input_max + 1) // (output_max + 1))
+            image = flow.div(image, factor, rounding_mode="floor")
+            return image.to(dtype)
+        else:
+            factor = int((output_max + 1) // (input_max + 1))
+            image = image.to(dtype)
+            return image * factor
+
+
+def vflip(img: Tensor) -> Tensor:
+    _assert_image_tensor(img)
+
+    return img.flip(-2)
+
+
+def hflip(img: Tensor) -> Tensor:
+    _assert_image_tensor(img)
+
+    return img.flip(-1)
+
+
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+    _assert_image_tensor(img)
+
+    w, h = _get_image_size(img)
+    right = left + width
+    bottom = top + height
+
+    if left < 0 or top < 0 or right > w or bottom > h:
+        padding_ltrb = [
+            max(-left, 0),
+            max(-top, 0),
+            max(right - w, 0),
+            max(bottom - h, 0),
+        ]
+        return pad(
+            img[..., max(top, 0) : bottom, max(left, 0) : right], padding_ltrb, fill=0
+        )
+    return img[..., top:bottom, left:right]
+
+
+def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
+    # padding is left, right, top, bottom
+
+    # crop if needed
+    if padding[0] < 0 or padding[1] < 0 or padding[2] < 0 or padding[3] < 0:
+        crop_left, crop_right, crop_top, crop_bottom = [-min(x, 0) for x in padding]
+        img = img[
+            ...,
+            crop_top : img.shape[-2] - crop_bottom,
+            crop_left : img.shape[-1] - crop_right,
+        ]
+        padding = [max(x, 0) for x in padding]
+
+    in_sizes = img.size()
+
+    x_indices = [i for i in range(in_sizes[-1])]  # [0, 1, 2, 3, ...]
+    left_indices = [i for i in range(padding[0] - 1, -1, -1)]  # e.g. [3, 2, 1, 0]
+    right_indices = [-(i + 1) for i in range(padding[1])]  # e.g. [-1, -2, -3]
+    x_indices = flow.tensor(left_indices + x_indices + right_indices, device=img.device)
+
+    y_indices = [i for i in range(in_sizes[-2])]
+    top_indices = [i for i in range(padding[2] - 1, -1, -1)]
+    bottom_indices = [-(i + 1) for i in range(padding[3])]
+    y_indices = flow.tensor(top_indices + y_indices + bottom_indices, device=img.device)
+
+    ndim = img.ndim
+    if ndim == 3:
+        return img[:, y_indices[:, None], x_indices[None, :]]
+    elif ndim == 4:
+        return img[:, :, y_indices[:, None], x_indices[None, :]]
+    else:
+        raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")
+
+
+def pad(
+    img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant"
+) -> Tensor:
+    _assert_image_tensor(img)
+
+    if not isinstance(padding, (int, tuple, list)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (int, float)):
+        raise TypeError("Got inappropriate fill arg")
+    if not isinstance(padding_mode, str):
+        raise TypeError("Got inappropriate padding_mode arg")
+
+    if isinstance(padding, tuple):
+        padding = list(padding)
+
+    if isinstance(padding, list) and len(padding) not in [1, 2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 1, 2, or 4 element tuple, not a "
+            + "{} element tuple".format(len(padding))
+        )
+
+    if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+        raise ValueError(
+            "Padding mode should be either constant, edge, reflect or symmetric"
+        )
+
+    if isinstance(padding, int):
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    elif len(padding) == 1:
+        pad_left = pad_right = pad_top = pad_bottom = padding[0]
+    elif len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    else:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    p = [pad_left, pad_right, pad_top, pad_bottom]
+
+    if padding_mode == "edge":
+        # remap padding_mode str
+        padding_mode = "replicate"
+    elif padding_mode == "symmetric":
+        # route to another implementation
+        return _pad_symmetric(img, p)
+
+    need_squeeze = False
+    if img.ndim < 4:
+        img = img.unsqueeze(dim=0)
+        need_squeeze = True
+
+    out_dtype = img.dtype
+    need_cast = False
+    if (padding_mode != "constant") and img.dtype not in (flow.float32, flow.float64):
+        # Here we temporary cast input tensor to float
+        # until pytorch issue is resolved :
+        # https://github.com/pytorch/pytorch/issues/40763
+        need_cast = True
+        img = img.to(flow.float32)
+    img = flow.F.pad(img, pad=p, mode=padding_mode, value=float(fill))
+
+    if need_squeeze:
+        img = img.squeeze(dim=0)
+
+    if need_cast:
+        img = img.to(out_dtype)
+    return img
+
+
 def resize(img: Tensor, size: List[int], interpolation: str = "bilinear") -> Tensor:
     _assert_image_tensor(img)
 
@@ -121,7 +325,7 @@ def resize(img: Tensor, size: List[int], interpolation: str = "bilinear") -> Ten
     # Define align_corners to avoid warnings
     align_corners = False if interpolation in ["bilinear", "bicubic"] else None
 
-    img = flow.F.interpolate(
+    img = flow.nn.functional.interpolate(
         img, size=[size_h, size_w], mode=interpolation, align_corners=align_corners
     )
 
@@ -133,3 +337,48 @@ def resize(img: Tensor, size: List[int], interpolation: str = "bilinear") -> Ten
     )
 
     return img
+
+
+def _assert_grid_transform_inputs(
+    img: Tensor,
+    matrix: Optional[List[float]],
+    interpolation: str,
+    fill: Optional[List[float]],
+    supported_interpolation_modes: List[str],
+    coeffs: Optional[List[float]] = None,
+):
+
+    if not (isinstance(img, flow.Tensor)):
+        raise TypeError("Input img should be Tensor")
+
+    _assert_image_tensor(img)
+
+    if matrix is not None and not isinstance(matrix, list):
+        raise TypeError("Argument matrix should be a list")
+
+    if matrix is not None and len(matrix) != 6:
+        raise ValueError("Argument matrix should have 6 float values")
+
+    if coeffs is not None and len(coeffs) != 8:
+        raise ValueError("Argument coeffs should have 8 float values")
+
+    if fill is not None and not isinstance(fill, (int, float, tuple, list)):
+        warnings.warn("Argument fill should be either int, float, tuple or list")
+
+    # Check fill
+    num_channels = _get_image_num_channels(img)
+    if isinstance(fill, (tuple, list)) and (
+        len(fill) > 1 and len(fill) != num_channels
+    ):
+        msg = (
+            "The number of elements in 'fill' cannot broadcast to match the number of "
+            "channels of the image ({} != {})"
+        )
+        raise ValueError(msg.format(len(fill), num_channels))
+
+    if interpolation not in supported_interpolation_modes:
+        raise ValueError(
+            "Interpolation mode '{}' is unsupported with Tensor input".format(
+                interpolation
+            )
+        )
diff --git a/python/oneflow/utils/vision/transforms/transforms.py b/python/oneflow/utils/vision/transforms/transforms.py
index 58d5d1644cc9b7e2c3022adc234b4b98fe4881b9..b2def901ab7b99f550a5dc65ad9710c4b3239f68 100644
--- a/python/oneflow/utils/vision/transforms/transforms.py
+++ b/python/oneflow/utils/vision/transforms/transforms.py
@@ -14,52 +14,21 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import warnings
+import numbers
 from collections.abc import Sequence
+from typing import Tuple, List
+
+import numpy as np
+import random
+import math
 
 from . import functional as F
 from .functional import InterpolationMode, _interpolation_modes_from_int
-
-
-from oneflow.nn.module import Module
+import oneflow as flow
+from oneflow.nn import Module
 from oneflow.framework.tensor import Tensor
 
 
-class Normalize(Module):
-    """Normalize a tensor image with mean and standard deviation.
-    This transform does not support PIL Image.
-    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
-    channels, this transform will normalize each channel of the input
-    ``flow.*Tensor`` i.e.,
-    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
-    .. note::
-        This transform acts out of place, i.e., it does not mutate the input tensor.
-    Args:
-        mean (sequence): Sequence of means for each channel.
-        std (sequence): Sequence of standard deviations for each channel.
-        inplace(bool,optional): Bool to make this operation in-place.
-    """
-
-    def __init__(self, mean, std, inplace=False):
-        super().__init__()
-        self.mean = mean
-        self.std = std
-        self.inplace = inplace
-
-    def forward(self, tensor: Tensor) -> Tensor:
-        """
-        Args:
-            tensor (Tensor): Tensor image to be normalized.
-        Returns:
-            Tensor: Normalized Tensor image.
-        """
-        return F.normalize(tensor, self.mean, self.std, self.inplace)
-
-    def __repr__(self):
-        return self.__class__.__name__ + "(mean={0}, std={1})".format(
-            self.mean, self.std
-        )
-
-
 class Compose:
     """Composes several transforms together.
     Please, see the note below.
@@ -98,16 +67,18 @@ class Compose:
 
 
 class ToTensor:
-    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    r"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
     Converts a PIL Image or numpy.ndarray (H x W x C) in the range
     [0, 255] to a flow.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
     if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
     or if the numpy.ndarray has dtype = np.uint8
     In the other cases, tensors are returned without scaling.
+
     .. note::
+
         Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
-        transforming target image masks. See the `references`_ for implementing the transforms for image masks.
-    .. _references: https://github.com/pytorch/vision/tree/master/references/segmentation
+        transforming target image masks.
     """
 
     def __call__(self, pic):
@@ -123,8 +94,133 @@ class ToTensor:
         return self.__class__.__name__ + "()"
 
 
+class PILToTensor:
+    """Convert a ``PIL Image`` to a tensor of the same type
+
+    Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
+    """
+
+    def __call__(self, pic):
+        """
+        Args:
+            pic (PIL Image): Image to be converted to tensor.
+
+        Returns:
+            Tensor: Converted image.
+        """
+        return F.pil_to_tensor(pic)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class ConvertImageDtype(Module):
+    """Convert a tensor image to the given ``dtype`` and scale the values accordingly
+    This function does not support PIL Image.
+
+    Args:
+        dtype (flow.dtype): Desired data type of the output
+
+    .. note::
+
+        When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
+        If converted back and forth, this mismatch has no effect.
+
+    Raises:
+        RuntimeError: When trying to cast :class:`flow.float32` to :class:`flow.int32` or :class:`flow.int64` as
+            well as for trying to cast :class:`flow.float64` to :class:`flow.int64`. These conversions might lead to
+            overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
+            of the integer ``dtype``.
+    """
+
+    def __init__(self, dtype: flow.dtype) -> None:
+        super().__init__()
+        self.dtype = dtype
+
+    def forward(self, image):
+        return F.convert_image_dtype(image, self.dtype)
+
+
+class ToPILImage:
+    """Convert a tensor or an ndarray to PIL Image.
+
+    Converts a flow.Tensor of shape C x H x W or a numpy ndarray of shape
+    H x W x C to a PIL Image while preserving the value range.
+
+    Args:
+        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+            If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
+            - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
+            - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
+            - If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
+            - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``,
+            ``short``).
+
+    .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
+    """
+
+    def __init__(self, mode=None):
+        self.mode = mode
+
+    def __call__(self, pic):
+        """
+        Args:
+            pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+
+        Returns:
+            PIL Image: Image converted to PIL Image.
+
+        """
+        return F.to_pil_image(pic, self.mode)
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        if self.mode is not None:
+            format_string += "mode={0}".format(self.mode)
+        format_string += ")"
+        return format_string
+
+
+class Normalize(Module):
+    r"""Normalize a tensor image with mean and standard deviation.
+    This transform does not support PIL Image.
+    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
+    channels, this transform will normalize each channel of the input
+    ``flow.*Tensor`` i.e.,
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+    .. note::
+        This transform acts out of place, i.e., it does not mutate the input tensor.
+
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+        inplace(bool,optional): Bool to make this operation in-place.
+    """
+
+    def __init__(self, mean, std, inplace=False):
+        super().__init__()
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+
+    def forward(self, tensor: Tensor) -> Tensor:
+        """
+        Args:
+            tensor (Tensor): Tensor image to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(tensor, self.mean, self.std, self.inplace)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(mean={0}, std={1})".format(
+            self.mean, self.std
+        )
+
+
 class Resize(Module):
-    """Resize the input image to the given size.
+    r"""Resize the input image to the given size.
     If the image is oneflow Tensor, it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
@@ -175,3 +271,826 @@ class Resize(Module):
         return self.__class__.__name__ + "(size={0}, interpolation={1})".format(
             self.size, interpolate_str
         )
+
+
+class Scale(Resize):
+    r"""
+    Note: This transform is deprecated in favor of Resize.
+    """
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The use of the transforms.Scale transform is deprecated, "
+            + "please use transforms.Resize instead."
+        )
+        super(Scale, self).__init__(*args, **kwargs)
+
+
+class CenterCrop(Module):
+    r"""Crops the given image at the center.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+    """
+
+    def __init__(self, size):
+        super().__init__()
+        self.size = _setup_size(
+            size, error_msg="Please provide only two dimensions (h, w) for size."
+        )
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            PIL Image or Tensor: Cropped image.
+        """
+        return F.center_crop(img, self.size)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(size={0})".format(self.size)
+
+
+class Pad(Module):
+    r"""Pad the given image on all sides with the given "pad" value.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+    at most 3 leading dimensions for mode edge,
+    and an arbitrary number of leading dimensions for mode constant
+
+    Args:
+        padding (int or sequence): Padding on each border. If a single int is provided this
+            is used to pad all borders. If sequence of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a sequence of length 4 is provided
+            this is the padding for the left, top, right and bottom borders respectively.
+
+        fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant.
+            Only number is supported for oneflow Tensor.
+            Only int or str or tuple value is supported for PIL Image.
+        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+            Default is constant.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value at the edge of the image.
+              If input a 5D oneflow Tensor, the last 3 dimensions will be padded instead of the last 2
+
+            - reflect: pads with reflection of image without repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+              will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+              will result in [2, 1, 1, 2, 3, 4, 4, 3]
+    """
+
+    def __init__(self, padding, fill=0, padding_mode="constant"):
+        super().__init__()
+        if not isinstance(padding, (numbers.Number, tuple, list)):
+            raise TypeError("Got inappropriate padding arg")
+
+        if not isinstance(fill, (numbers.Number, str, tuple)):
+            raise TypeError("Got inappropriate fill arg")
+
+        if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+            raise ValueError(
+                "Padding mode should be either constant, edge, reflect or symmetric"
+            )
+
+        if isinstance(padding, Sequence) and len(padding) not in [1, 2, 4]:
+            raise ValueError(
+                "Padding must be an int or a 1, 2, or 4 element tuple, not a "
+                + "{} element tuple".format(len(padding))
+            )
+
+        self.padding = padding
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be padded.
+
+        Returns:
+            PIL Image or Tensor: Padded image.
+        """
+        return F.pad(img, self.padding, self.fill, self.padding_mode)
+
+    def __repr__(self):
+        return (
+            self.__class__.__name__
+            + "(padding={0}, fill={1}, padding_mode={2})".format(
+                self.padding, self.fill, self.padding_mode
+            )
+        )
+
+
+class Lambda:
+    r"""Apply a user-defined lambda as a transform.
+
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+
+    def __init__(self, lambd):
+        if not callable(lambd):
+            raise TypeError(
+                "Argument lambd should be callable, got {}".format(
+                    repr(type(lambd).__name__)
+                )
+            )
+        self.lambd = lambd
+
+    def __call__(self, img):
+        return self.lambd(img)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+def _setup_size(size, error_msg):
+    if isinstance(size, numbers.Number):
+        return int(size), int(size)
+
+    if isinstance(size, Sequence) and len(size) == 1:
+        return size[0], size[0]
+
+    if len(size) != 2:
+        raise ValueError(error_msg)
+
+    return size
+
+
+class RandomTransforms:
+    r"""Base class for a list of transformations with randomness
+
+    Args:
+        transforms (sequence): list of transformations
+    """
+
+    def __init__(self, transforms):
+        if not isinstance(transforms, Sequence):
+            raise TypeError("Argument transforms should be a sequence")
+        self.transforms = transforms
+
+    def __call__(self, *args, **kwargs):
+        raise NotImplementedError()
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+
+
+class RandomApply(flow.nn.Module):
+    """Apply randomly a list of transformations with a given probability.
+
+    .. note::
+        In order to script the transformation, please use ``flow.nn.ModuleList`` as input instead of list/tuple of
+        transforms as shown below:
+
+        >>> transforms = transforms.RandomApply(flow.nn.ModuleList([
+        >>>     transforms.ColorJitter(),
+        >>> ]), p=0.3)
+
+
+        Make sure to use only scriptable transformations, i.e. that work with ``flow.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    Args:
+        transforms (sequence or flow.nn.Module): list of transformations
+        p (float): probability
+    """
+
+    def __init__(self, transforms, p=0.5):
+        super().__init__()
+        self.transforms = transforms
+        self.p = p
+
+    def forward(self, img):
+        # TODO:replace with flow.rand(1)
+        if self.p < np.random.rand(1):
+            return img
+        for t in self.transforms:
+            img = t(img)
+        return img
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        format_string += "\n    p={}".format(self.p)
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+
+
+class RandomOrder(RandomTransforms):
+    """Apply a list of transformations in a random order.
+    """
+
+    def __call__(self, img):
+        order = list(range(len(self.transforms)))
+        random.shuffle(order)
+        for i in order:
+            img = self.transforms[i](img)
+        return img
+
+
+class RandomChoice(RandomTransforms):
+    """Apply single transformation randomly picked from a list.
+    """
+
+    def __call__(self, img):
+        t = random.choice(self.transforms)
+        return t(img)
+
+
+class RandomCrop(flow.nn.Module):
+    """Crop the given image at a random location.
+    If the image is oneflow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
+    but if non-constant padding is used, the input is expected to have at most 2 leading dimensions
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is None. If a single int is provided this
+            is used to pad all borders. If sequence of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a sequence of length 4 is provided
+            this is the padding for the left, top, right and bottom borders respectively.
+
+        pad_if_needed (boolean): It will pad the image if smaller than the
+            desired size to avoid raising an exception. Since cropping is done
+            after padding, the padding seems to be done at a random offset.
+        fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant.
+            Only number is supported for flow Tensor.
+            Only int or str or tuple value is supported for PIL Image.
+        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+            Default is constant.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value at the edge of the image.
+              If input a 5D flow Tensor, the last 3 dimensions will be padded instead of the last 2
+
+            - reflect: pads with reflection of image without repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+              will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+              will result in [2, 1, 1, 2, 3, 4, 4, 3]
+    """
+
+    @staticmethod
+    def get_params(
+        img: Tensor, output_size: Tuple[int, int]
+    ) -> Tuple[int, int, int, int]:
+        """Get parameters for ``crop`` for a random crop.
+
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+            output_size (tuple): Expected output size of the crop.
+
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+        """
+        w, h = F._get_image_size(img)
+        th, tw = output_size
+
+        if h + 1 < th or w + 1 < tw:
+            raise ValueError(
+                "Required crop size {} is larger then input image size {}".format(
+                    (th, tw), (h, w)
+                )
+            )
+
+        if w == tw and h == th:
+            return 0, 0, h, w
+
+        # TODO:replace with flow.randint
+        # i = flow.randint(0, h - th + 1, size=(1, )).item()
+        # j = flow.randint(0, w - tw + 1, size=(1, )).item()
+        i = np.random.randint(low=0, high=h - th + 1, size=(1,), dtype=np.int32)
+        j = np.random.randint(low=0, high=w - tw + 1, size=(1,), dtype=np.int32)
+        return i, j, th, tw
+
+    def __init__(
+        self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"
+    ):
+        super().__init__()
+
+        self.size = tuple(
+            _setup_size(
+                size, error_msg="Please provide only two dimensions (h, w) for size."
+            )
+        )
+
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            PIL Image or Tensor: Cropped image.
+        """
+        if self.padding is not None:
+            img = F.pad(img, self.padding, self.fill, self.padding_mode)
+
+        width, height = F._get_image_size(img)
+        # pad the width if needed
+        if self.pad_if_needed and width < self.size[1]:
+            padding = [self.size[1] - width, 0]
+            img = F.pad(img, padding, self.fill, self.padding_mode)
+        # pad the height if needed
+        if self.pad_if_needed and height < self.size[0]:
+            padding = [0, self.size[0] - height]
+            img = F.pad(img, padding, self.fill, self.padding_mode)
+
+        i, j, h, w = self.get_params(img, self.size)
+
+        return F.crop(img, i, j, h, w)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(size={0}, padding={1})".format(
+            self.size, self.padding
+        )
+
+
+class RandomHorizontalFlip(flow.nn.Module):
+    """Horizontally flip the given image randomly with a given probability.
+    If the image is flow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
+
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+
+    def __init__(self, p=0.5):
+        super().__init__()
+        self.p = p
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be flipped.
+
+        Returns:
+            PIL Image or Tensor: Randomly flipped image.
+        """
+        # TODO: replace with flow.rand(1):
+        if np.random.rand(1) < self.p:
+            return F.hflip(img)
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(p={})".format(self.p)
+
+
+class RandomVerticalFlip(flow.nn.Module):
+    """Vertically flip the given image randomly with a given probability.
+    If the image is flow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
+
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+
+    def __init__(self, p=0.5):
+        super().__init__()
+        self.p = p
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be flipped.
+
+        Returns:
+            PIL Image or Tensor: Randomly flipped image.
+        """
+        # TODO:replace with flow.rand(1)
+        if np.random.rand(1) < self.p:
+            return F.vflip(img)
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(p={})".format(self.p)
+
+
+class RandomResizedCrop(flow.nn.Module):
+    """Crop a random portion of image and resize it to a given size.
+
+    If the image is flow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    A crop of the original image is made: the crop has a random area (H * W)
+    and a random aspect ratio. This crop is finally resized to the given
+    size. This is popularly used to train the Inception networks.
+
+    Args:
+        size (int or sequence): expected output size of the crop, for each edge. If size is an
+            int instead of sequence like (h, w), a square output size ``(size, size)`` is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+        scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
+            before resizing. The scale is defined with respect to the area of the original image.
+        ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
+            resizing.
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`flow.utils.vision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
+            ``InterpolationMode.BICUBIC`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+
+    """
+
+    def __init__(
+        self,
+        size,
+        scale=(0.08, 1.0),
+        ratio=(3.0 / 4.0, 4.0 / 3.0),
+        interpolation=InterpolationMode.BILINEAR,
+    ):
+        super().__init__()
+        self.size = _setup_size(
+            size, error_msg="Please provide only two dimensions (h, w) for size."
+        )
+
+        if not isinstance(scale, Sequence):
+            raise TypeError("Scale should be a sequence")
+        if not isinstance(ratio, Sequence):
+            raise TypeError("Ratio should be a sequence")
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("Scale and ratio should be of kind (min, max)")
+
+        # Backward compatibility with integer value
+        if isinstance(interpolation, int):
+            warnings.warn(
+                "Argument interpolation should be of type InterpolationMode instead of int. "
+                "Please, use InterpolationMode enum."
+            )
+            interpolation = _interpolation_modes_from_int(interpolation)
+
+        self.interpolation = interpolation
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(
+        img: Tensor, scale: List[float], ratio: List[float]
+    ) -> Tuple[int, int, int, int]:
+        """Get parameters for ``crop`` for a random sized crop.
+
+        Args:
+            img (PIL Image or Tensor): Input image.
+            scale (list): range of scale of the origin size cropped
+            ratio (list): range of aspect ratio of the origin aspect ratio cropped
+
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+            sized crop.
+        """
+        width, height = F._get_image_size(img)
+        area = height * width
+
+        log_ratio = flow.log(flow.tensor(ratio))
+        for _ in range(10):
+            target_area = area * flow.empty(1).uniform_(scale[0], scale[1]).item()
+            aspect_ratio = flow.exp(
+                flow.empty(1).uniform_(log_ratio[0], log_ratio[1])
+            ).item()
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if 0 < w <= width and 0 < h <= height:
+                # TODO:replace with flow.randint
+                # i = flow.randint(0, height - h + 1, size=(1,)).item()
+                # j = flow.randint(0, width - w + 1, size=(1,)).item()
+                i = np.random.randint(
+                    low=0, high=height - h + 1, size=(1,), dtype=np.int32
+                )
+                j = np.random.randint(
+                    low=0, high=width - w + 1, size=(1,), dtype=np.int32
+                )
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = float(width) / float(height)
+        if in_ratio < min(ratio):
+            w = width
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = height
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = width
+            h = height
+        i = (height - h) // 2
+        j = (width - w) // 2
+        return i, j, h, w
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped and resized.
+
+        Returns:
+            PIL Image or Tensor: Randomly cropped and resized image.
+        """
+        i, j, h, w = self.get_params(img, self.scale, self.ratio)
+        return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
+
+    def __repr__(self):
+        interpolate_str = self.interpolation.value
+        format_string = self.__class__.__name__ + "(size={0}".format(self.size)
+        format_string += ", scale={0}".format(tuple(round(s, 4) for s in self.scale))
+        format_string += ", ratio={0}".format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ", interpolation={0})".format(interpolate_str)
+        return format_string
+
+
+class RandomSizedCrop(RandomResizedCrop):
+    """
+    Note: This transform is deprecated in favor of RandomResizedCrop.
+    """
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The use of the transforms.RandomSizedCrop transform is deprecated, "
+            + "please use transforms.RandomResizedCrop instead."
+        )
+        super(RandomSizedCrop, self).__init__(*args, **kwargs)
+
+
+class FiveCrop(flow.nn.Module):
+    """Crop the given image into four corners and the central crop.
+    If the image is flow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
+
+    .. Note::
+         This transform returns a tuple of images and there may be a mismatch in the number of
+         inputs and targets your Dataset returns. See below for an example of how to deal with
+         this.
+
+    Args:
+         size (sequence or int): Desired output size of the crop. If size is an ``int``
+            instead of sequence like (h, w), a square crop of size (size, size) is made.
+            If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+
+    Example:
+         >>> transform = Compose([
+         >>>    FiveCrop(size), # this is a list of PIL Images
+         >>>    Lambda(lambda crops: flow.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
+         >>> ])
+         >>> #In your test loop you can do the following:
+         >>> input, target = batch # input is a 5d tensor, target is 2d
+         >>> bs, ncrops, c, h, w = input.size()
+         >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
+         >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
+    """
+
+    def __init__(self, size):
+        super().__init__()
+        self.size = _setup_size(
+            size, error_msg="Please provide only two dimensions (h, w) for size."
+        )
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            tuple of 5 images. Image can be PIL Image or Tensor
+        """
+        return F.five_crop(img, self.size)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(size={0})".format(self.size)
+
+
+class TenCrop(flow.nn.Module):
+    """Crop the given image into four corners and the central crop plus the flipped version of
+    these (horizontal flipping is used by default).
+    If the image is flow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
+
+    .. Note::
+         This transform returns a tuple of images and there may be a mismatch in the number of
+         inputs and targets your Dataset returns. See below for an example of how to deal with
+         this.
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+        vertical_flip (bool): Use vertical flipping instead of horizontal
+
+    Example:
+         >>> transform = Compose([
+         >>>    TenCrop(size), # this is a list of PIL Images
+         >>>    Lambda(lambda crops: flow.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
+         >>> ])
+         >>> #In your test loop you can do the following:
+         >>> input, target = batch # input is a 5d tensor, target is 2d
+         >>> bs, ncrops, c, h, w = input.size()
+         >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
+         >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
+    """
+
+    def __init__(self, size, vertical_flip=False):
+        super().__init__()
+        self.size = _setup_size(
+            size, error_msg="Please provide only two dimensions (h, w) for size."
+        )
+        self.vertical_flip = vertical_flip
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            tuple of 10 images. Image can be PIL Image or Tensor
+        """
+        return F.ten_crop(img, self.size, self.vertical_flip)
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(size={0}, vertical_flip={1})".format(
+            self.size, self.vertical_flip
+        )
+
+
+class RandomRotation(flow.nn.Module):
+    """Rotate the image by angle.
+    If the image is flow Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+    Args:
+        degrees (sequence or number): Range of degrees to select from.
+            If degrees is a number instead of sequence like (min, max), the range of degrees
+            will be (-degrees, +degrees).
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`flow.utils.vision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+        expand (bool, optional): Optional expansion flag.
+            If true, expands the output to make it large enough to hold the entire rotated image.
+            If false or omitted, make the output image the same size as the input image.
+            Note that the expand flag assumes rotation around the center and no translation.
+        center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner.
+            Default is the center of the image.
+        fill (sequence or number): Pixel fill value for the area outside the rotated
+            image. Default is ``0``. If given a number, the value is used for all bands respectively.
+        resample (int, optional): deprecated argument and will be removed since v0.10.0.
+            Please use the ``interpolation`` parameter instead.
+
+    .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+    """
+
+    def __init__(
+        self,
+        degrees,
+        interpolation=InterpolationMode.NEAREST,
+        expand=False,
+        center=None,
+        fill=0,
+        resample=None,
+    ):
+        super().__init__()
+        if resample is not None:
+            warnings.warn(
+                "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
+            )
+            interpolation = _interpolation_modes_from_int(resample)
+
+        # Backward compatibility with integer value
+        if isinstance(interpolation, int):
+            warnings.warn(
+                "Argument interpolation should be of type InterpolationMode instead of int. "
+                "Please, use InterpolationMode enum."
+            )
+            interpolation = _interpolation_modes_from_int(interpolation)
+
+        self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
+
+        if center is not None:
+            _check_sequence_input(center, "center", req_sizes=(2,))
+
+        self.center = center
+
+        self.resample = self.interpolation = interpolation
+        self.expand = expand
+
+        if fill is None:
+            fill = 0
+        elif not isinstance(fill, (Sequence, numbers.Number)):
+            raise TypeError("Fill should be either a sequence or a number.")
+
+        self.fill = fill
+
+    @staticmethod
+    def get_params(degrees: List[float]) -> float:
+        """Get parameters for ``rotate`` for a random rotation.
+
+        Returns:
+            float: angle parameter to be passed to ``rotate`` for random rotation.
+        """
+        angle = float(
+            flow.empty(1).uniform_(float(degrees[0]), float(degrees[1])).item()
+        )
+        return angle
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be rotated.
+
+        Returns:
+            PIL Image or Tensor: Rotated image.
+        """
+        fill = self.fill
+        if isinstance(img, Tensor):
+            if isinstance(fill, (int, float)):
+                fill = [float(fill)] * F._get_image_num_channels(img)
+            else:
+                fill = [float(f) for f in fill]
+        angle = self.get_params(self.degrees)
+
+        return F.rotate(img, angle, self.resample, self.expand, self.center, fill)
+
+    def __repr__(self):
+        interpolate_str = self.interpolation.value
+        format_string = self.__class__.__name__ + "(degrees={0}".format(self.degrees)
+        format_string += ", interpolation={0}".format(interpolate_str)
+        format_string += ", expand={0}".format(self.expand)
+        if self.center is not None:
+            format_string += ", center={0}".format(self.center)
+        if self.fill is not None:
+            format_string += ", fill={0}".format(self.fill)
+        format_string += ")"
+        return format_string
+
+
+def _setup_size(size, error_msg):
+    if isinstance(size, numbers.Number):
+        return int(size), int(size)
+
+    if isinstance(size, Sequence) and len(size) == 1:
+        return size[0], size[0]
+
+    if len(size) != 2:
+        raise ValueError(error_msg)
+
+    return size
+
+
+def _check_sequence_input(x, name, req_sizes):
+    msg = (
+        req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes])
+    )
+    if not isinstance(x, Sequence):
+        raise TypeError("{} should be a sequence of length {}.".format(name, msg))
+    if len(x) not in req_sizes:
+        raise ValueError("{} should be sequence of length {}.".format(name, msg))
+
+
+def _setup_angle(x, name, req_sizes=(2,)):
+    if isinstance(x, numbers.Number):
+        if x < 0:
+            raise ValueError(
+                "If {} is a single number, it must be positive.".format(name)
+            )
+        x = [-x, x]
+    else:
+        _check_sequence_input(x, name, req_sizes)
+
+    return [float(d) for d in x]