[Feature] Add module test process of mobilenetv3 (#5442)

* add_readme * update * update_dir * add_falsely_delete_README * update

[Feature] Add module test process of mobilenetv3 (#5442)
* add_readme * update * update_dir * add_falsely_delete_README * update
c7ba8c44 · shiyutang · GitHub · fbccf996 · c7ba8c44 · c7ba8c44
72 changed file
--- a/tutorials/mobilenetv3_prod/Step1-5/01_test_forward.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/01_test_forward.py
+import torch
+import paddle
+import numpy as np
+from reprod_log import ReprodDiffHelper
+from reprod_log import ReprodLogger
+
+from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
+from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
+
+
+def test_forward():
+    # load paddle model
+    paddle_model = mv3_small_paddle()
+    paddle_model.eval()
+    paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
+    paddle_model.set_dict(paddle_state_dict)
+
+    # load torch model
+    torch_model = mv3_small_torch()
+    torch_model.eval()
+    torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
+    torch_model.load_state_dict(torch_state_dict)
+
+    # load data
+    inputs = np.load("./data/fake_data.npy")
+
+    # save the paddle output
+    reprod_logger = ReprodLogger()
+    paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
+    reprod_logger.add("logits", paddle_out.cpu().detach().numpy())
+    reprod_logger.save("./result/forward_paddle.npy")
+
+    # save the torch output
+    torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
+    reprod_logger.add("logits", torch_out.cpu().detach().numpy())
+    reprod_logger.save("./result/forward_ref.npy")
+
+
+if __name__ == "__main__":
+    test_forward()
+
+    # load data
+    diff_helper = ReprodDiffHelper()
+    torch_info = diff_helper.load_info("./result/forward_ref.npy")
+    paddle_info = diff_helper.load_info("./result/forward_paddle.npy")
+
+    # compare result and produce log
+    diff_helper.compare_info(torch_info, paddle_info)
+    diff_helper.report(path="./result/log/forward_diff.log")
--- a/tutorials/mobilenetv3_prod/Step1-5/02_test_data.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/02_test_data.py
+import os
+import sys
+import torch
+import paddle
+import numpy as np
+from PIL import Image
+from reprod_log import ReprodLogger, ReprodDiffHelper
+import mobilenetv3_paddle.presets as presets_paddle
+import mobilenetv3_paddle.paddlevision as paddlevision
+import mobilenetv3_ref.presets as presets_torch
+import mobilenetv3_ref.torchvision as torchvision
+
+
+def build_paddle_data_pipeline():
+    # dataset & data_loader
+    dataset_test = paddlevision.datasets.ImageFolder(
+        "./lite_data/val/",
+        presets_paddle.ClassificationPresetEval(
+            crop_size=224, resize_size=256))
+
+    test_sampler = paddle.io.SequenceSampler(dataset_test)
+
+    test_batch_sampler = paddle.io.BatchSampler(
+        sampler=test_sampler, batch_size=4)
+
+    data_loader_test = paddle.io.DataLoader(
+        dataset_test, batch_sampler=test_batch_sampler, num_workers=0)
+
+    return dataset_test, data_loader_test
+
+
+def build_torch_data_pipeline():
+    dataset_test = torchvision.datasets.ImageFolder(
+        "./lite_data/val/",
+        presets_torch.ClassificationPresetEval(
+            crop_size=224, resize_size=256),
+        is_valid_file=None)
+
+    test_sampler = torch.utils.data.SequentialSampler(dataset_test)
+
+    data_loader_test = torch.utils.data.DataLoader(
+        dataset_test,
+        batch_size=4,
+        sampler=test_sampler,
+        num_workers=0,
+        pin_memory=True)
+    return dataset_test, data_loader_test
+
+
+def test_data_pipeline():
+    paddle_dataset, paddle_dataloader = build_paddle_data_pipeline()
+    torch_dataset, torch_dataloader = build_torch_data_pipeline()
+
+    logger_paddle_data = ReprodLogger()
+    logger_torch_data = ReprodLogger()
+
+    logger_paddle_data.add("length", np.array(len(paddle_dataset)))
+    logger_torch_data.add("length", np.array(len(torch_dataset)))
+
+    for idx, (paddle_batch, torch_batch
+              ) in enumerate(zip(paddle_dataloader, torch_dataloader)):
+        if idx >= 5:
+            break
+        logger_paddle_data.add(f"dataloader_{idx}", paddle_batch[0].numpy())
+        logger_torch_data.add(f"dataloader_{idx}",
+                              torch_batch[0].detach().cpu().numpy())
+    logger_paddle_data.save("./result/data_paddle.npy")
+    logger_torch_data.save("./result/data_ref.npy")
+
+
+if __name__ == "__main__":
+    test_data_pipeline()
+
+    # load data
+    diff_helper = ReprodDiffHelper()
+    torch_info = diff_helper.load_info("./result/data_ref.npy")
+    paddle_info = diff_helper.load_info("./result/data_paddle.npy")
+
+    # compare result and produce log
+    diff_helper.compare_info(torch_info, paddle_info)
+    diff_helper.report(path="./result/log/data_diff.log")
--- a/tutorials/mobilenetv3_prod/Step1-5/03_test_metric.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/03_test_metric.py
+# add test metric code paddle vs torch
+
+import torch
+import paddle
+import numpy as np
+from reprod_log import ReprodLogger
+from reprod_log import ReprodDiffHelper
+
+from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
+from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
+from mobilenetv3_ref import accuracy_torch
+from mobilenetv3_paddle import accuracy_paddle
+
+
+def evaluate(image, labels, model, acc, tag, reprod_logger):
+    model.eval()
+    output = model(image)
+
+    accracy = acc(output, labels, topk=(1, 5))
+
+    reprod_logger.add("acc_top1", np.array(accracy[0]))
+    reprod_logger.add("acc_top5", np.array(accracy[1]))
+
+    reprod_logger.save("./result/metric_{}.npy".format(tag))
+
+
+def test_forward():
+    # load paddle model
+    paddle_model = mv3_small_paddle()
+    paddle_model.eval()
+    paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
+    paddle_model.set_dict(paddle_state_dict)
+
+    # load torch model
+    torch_model = mv3_small_torch()
+    torch_model.eval()
+    torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
+    torch_model.load_state_dict(torch_state_dict)
+
+    # prepare logger & load data
+    reprod_logger = ReprodLogger()
+    inputs = np.load("./data/fake_data.npy")
+    labels = np.load("./data/fake_label.npy")
+    image = paddle.to_tensor(inputs, dtype="float32")
+    target = paddle.to_tensor(labels, dtype="int64")
+
+    evaluate(
+        paddle.to_tensor(
+            inputs, dtype="float32"),
+        paddle.to_tensor(
+            labels, dtype="int64"),
+        paddle_model,
+        accuracy_paddle,
+        'paddle',
+        reprod_logger)
+    evaluate(
+        torch.tensor(
+            inputs, dtype=torch.float32),
+        torch.tensor(
+            labels, dtype=torch.int64),
+        torch_model,
+        accuracy_torch,
+        'ref',
+        reprod_logger)
+
+
+if __name__ == "__main__":
+    test_forward()
+
+    # load data
+    diff_helper = ReprodDiffHelper()
+    torch_info = diff_helper.load_info("./result/metric_ref.npy")
+    paddle_info = diff_helper.load_info("./result/metric_paddle.npy")
+
+    # compare result and produce log
+    diff_helper.compare_info(torch_info, paddle_info)
+    diff_helper.report(path="./result/log/metric_diff.log")
--- a/tutorials/mobilenetv3_prod/Step1-5/04_test_loss.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/04_test_loss.py
+# add loss comparing code
+
+import torch
+import paddle
+import numpy as np
+from reprod_log import ReprodLogger
+from reprod_log import ReprodDiffHelper
+
+from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
+from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
+
+
+def test_forward():
+    # init loss
+    criterion_paddle = paddle.nn.CrossEntropyLoss()
+    criterion_torch = torch.nn.CrossEntropyLoss()
+
+    # load paddle model
+    paddle_model = mv3_small_paddle()
+    paddle_model.eval()
+    paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
+    paddle_model.set_dict(paddle_state_dict)
+
+    # load torch model
+    torch_model = mv3_small_torch()
+    torch_model.eval()
+    torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
+    torch_model.load_state_dict(torch_state_dict)
+
+    # prepare logger & load data
+    reprod_logger = ReprodLogger()
+    inputs = np.load("./data/fake_data.npy")
+    labels = np.load("./data/fake_label.npy")
+
+    # save the paddle output
+    paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
+    loss_paddle = criterion_paddle(
+        paddle_out, paddle.to_tensor(
+            labels, dtype="int64"))
+    reprod_logger.add("loss", loss_paddle.cpu().detach().numpy())
+    reprod_logger.save("./result/loss_paddle.npy")
+
+    # save the torch output
+    torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
+    loss_torch = criterion_torch(
+        torch_out, torch.tensor(
+            labels, dtype=torch.int64))
+    reprod_logger.add("loss", loss_torch.cpu().detach().numpy())
+    reprod_logger.save("./result/loss_ref.npy")
+
+
+if __name__ == "__main__":
+    test_forward()
+
+    # load data
+    diff_helper = ReprodDiffHelper()
+    torch_info = diff_helper.load_info("./result/loss_ref.npy")
+    paddle_info = diff_helper.load_info("./result/loss_paddle.npy")
+
+    # compare result and produce log
+    diff_helper.compare_info(torch_info, paddle_info)
+    diff_helper.report(path="./result/log/loss_diff.log")
--- a/tutorials/mobilenetv3_prod/Step1-5/05_test_backward.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/05_test_backward.py
+import paddle
+import numpy as np
+import torch
+import torch.optim.lr_scheduler as lr_scheduler
+from reprod_log import ReprodLogger
+from reprod_log import ReprodDiffHelper
+
+from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
+from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
+
+
+def train_one_epoch_paddle(inputs, labels, model, criterion, optimizer,
+                           lr_scheduler, max_iter, reprod_logger):
+    for idx in range(max_iter):
+        image = paddle.to_tensor(inputs, dtype="float32")
+        target = paddle.to_tensor(labels, dtype="int64")
+        # import pdb; pdb.set_trace()
+
+        output = model(image)
+        loss = criterion(output, target)
+
+        reprod_logger.add("loss_{}".format(idx), loss.cpu().detach().numpy())
+        reprod_logger.add("lr_{}".format(idx), np.array(lr_scheduler.get_lr()))
+
+        optimizer.clear_grad()
+        loss.backward()
+        optimizer.step()
+        # lr_scheduler.step() 
+
+    reprod_logger.save("./result/losses_paddle.npy")
+
+
+def train_one_epoch_torch(inputs, labels, model, criterion, optimizer,
+                          lr_scheduler, max_iter, reprod_logger):
+    for idx in range(max_iter):
+        image = torch.tensor(inputs, dtype=torch.float32).cuda()
+        target = torch.tensor(labels, dtype=torch.int64).cuda()
+        model = model.cuda()
+
+        output = model(image)
+        loss = criterion(output, target)
+
+        reprod_logger.add("loss_{}".format(idx), loss.cpu().detach().numpy())
+        reprod_logger.add("lr_{}".format(idx),
+                          np.array(lr_scheduler.get_last_lr()))
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        # lr_scheduler.step()
+
+    reprod_logger.save("./result/losses_ref.npy")
+
+
+def test_backward():
+    max_iter = 3
+    lr = 1e-3
+    momentum = 0.9
+    lr_gamma = 0.1
+
+    # set determinnistic flag
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    FLAGS_cudnn_deterministic = True
+
+    # load paddle model
+    paddle.set_device("gpu")
+    paddle_model = mv3_small_paddle(dropout=0.0)
+    paddle_model.eval()
+    paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
+    paddle_model.set_dict(paddle_state_dict)
+
+    # load torch model
+    torch_model = mv3_small_torch(dropout=0.0)
+    torch_model.eval()
+    torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
+    torch_model.load_state_dict(torch_state_dict, strict=False)
+
+    # init loss
+    criterion_paddle = paddle.nn.CrossEntropyLoss()
+    criterion_torch = torch.nn.CrossEntropyLoss()
+
+    # init optimizer
+    lr_scheduler_paddle = paddle.optimizer.lr.StepDecay(
+        lr, step_size=max_iter // 3, gamma=lr_gamma)
+    opt_paddle = paddle.optimizer.Momentum(
+        learning_rate=lr,
+        momentum=momentum,
+        parameters=paddle_model.parameters())
+
+    opt_torch = torch.optim.SGD(torch_model.parameters(),
+                                lr=lr,
+                                momentum=momentum)
+    lr_scheduler_torch = lr_scheduler.StepLR(
+        opt_torch, step_size=max_iter // 3, gamma=lr_gamma)
+
+    # prepare logger & load data
+    reprod_logger = ReprodLogger()
+    inputs = np.load("./data/fake_data.npy")
+    labels = np.load("./data/fake_label.npy")
+
+    train_one_epoch_paddle(inputs, labels, paddle_model, criterion_paddle,
+                           opt_paddle, lr_scheduler_paddle, max_iter,
+                           reprod_logger)
+
+    train_one_epoch_torch(inputs, labels, torch_model, criterion_torch,
+                          opt_torch, lr_scheduler_torch, max_iter,
+                          reprod_logger)
+
+
+if __name__ == "__main__":
+    test_backward()
+
+    # load data
+    diff_helper = ReprodDiffHelper()
+    torch_info = diff_helper.load_info("./result/losses_ref.npy")
+    paddle_info = diff_helper.load_info("./result/losses_paddle.npy")
+
+    # compare result and produce log
+    diff_helper.compare_info(torch_info, paddle_info)
+    diff_helper.report(path="./result/log/backward_diff.log")
--- a/tutorials/mobilenetv3_prod/Step1-5/README.md
+++ b/tutorials/mobilenetv3_prod/Step1-5/README.md
+# MobileNetV3
+
+## 目录
+
+
+- [1. 简介](#1)
+- [2. 复现流程](#2)
+    - [2.1 reprod_log简介](#2.1)
+- [3. 准备数据与环境](#3)
+    - [3.1 准备环境](#3.1)
+    - [3.2 生成伪数据](#3.2)
+    - [3.3 准备模型](#3.3)
+- [4. 开始使用](#4)
+    - [4.1 模型前向对齐](#4.1)
+    - [4.2 数据加载对齐](#4.2)
+    - [4.3 评估指标对齐](#4.3)
+    - [4.4 损失对齐](#4.4)
+    - [4.5 反向梯度对齐](#4.5)
+    - [4.6 训练对齐](#4.6)
+
+<a name="1"></a>
+## 1. 简介
+
+本部分内容包含基于 [MobileNetV3](https://arxiv.org/abs/1905.02244) 的复现对齐过程，可以结合[论文复现指南]()进行学习。
+
+<a name="2"></a>
+## 2. 复现流程
+在论文复现中我们可以根据网络训练的流程，将对齐流程划分为数据加载对齐、模型前向对齐、评估指标对齐、反向梯度对齐和训练对齐。其中不同对齐部分我们会在下方详细介绍。
+
+在对齐验证的流程中，我们依靠 reprod_log 日志工具查看 paddle 和官方同样输入下的输出是否相同，这样的查看方式具有标准统一，比较过程方便等优势。
+
+<a name="2.1"></a>
+### 2.1 reprod_log 简介
+Reprod_log 是一个用于 numpy 数据记录和对比工具，通过传入需要对比的两个 numpy 数组就可以在指定的规则下得到数据之差是否满足期望的结论。其主要接口的说明可以看它的 [github 主页](https://github.com/WenmuZhou/reprod_log)。
+
+<a name="3"></a>
+## 3. 准备数据和环境
+在进行我们的对齐验证之前，我们需要准备运行环境、用于输入的伪数据、paddle 模型参数和官方模型权重参数。
+
+<a name="3.1"></a>
+### 3.1 准备环境
+* 克隆本项目
+
+```bash
+git clone https://github.com/PaddlePaddle/models.git
+cd model/tutorials/mobilenetv3_prod/
+```
+
+* 安装paddlepaddle
+
+```bash
+# 需要安装2.2及以上版本的Paddle，如果
+# 安装GPU版本的Paddle
+pip install paddlepaddle-gpu==2.2.0
+# 安装CPU版本的Paddle
+pip install paddlepaddle==2.2.0
+```
+
+更多版本或者环境下的安装可以参考：[Paddle安装指南](https://www.paddlepaddle.org.cn/)
+* 安装requirements
+
+```bash
+pip install -r requirements.txt
+```
+<a name="3.2"></a>
+
+### 3.2 生成伪数据
+为了保证模型对齐不会受到输入数据的影响，我们生成一组数据作为两个模型的输入。
+伪数据可以通过如下代码生成，我们在本地目录下也提供了好的伪数据（./data/fake_*.npy）。
+
+```python
+def gen_fake_data():
+    fake_data = np.random.rand(1, 3, 224, 224).astype(np.float32) - 0.5
+    fake_label = np.arange(1).astype(np.int64)
+    np.save("fake_data.npy", fake_data)
+    np.save("fake_label.npy", fake_label)
+```
+
+<a name="3.3"></a>
+### 3.3 准备模型
+为了保证模型前向对齐不受到模型参数不一致的影响，我们使用相同的权重参数对模型进行初始化。
+
+生成相同权重参数分为以下 2 步：
+1. 随机初始化官方模型参数并保存成 mobilenet_v3_small-047dcff4.pth
+2. 将 model.pth 通过 ./torch2paddle.py 生成mv3_small_paddle.pdparams
+
+转换模型时，torch 和 paddle 存在参数需要转换的部分，主要是bn层、全连接层、num_batches_tracked等，可以参见转换脚本(./torch2paddle.py)。
+
+<a name="4"></a>
+## 4. 开始使用
+准备好数据之后，我们通过下面对应训练流程的拆解步骤进行复现对齐。
+
+<a name="4.1"></a>
+### 4.1 模型前向对齐
+论文复现中，最重要的来到前向对齐的验证，验证流程如下图所示:
+
+<div align="center">
+    <img src="./images/forward.png" width=500">
+</div>
+
+这里，为了判断判断模型组网部分能获得和原论文同样的输出，我们将两个模型参数固定，并输入相同伪数据，观察 paddle 模型产出的 logit 是否和官方模型一致。
+
+我们的示例代码如下所示：
+
+```python
+def test_forward():
+    # load paddle model
+    paddle_model = mv3_small_paddle()
+    paddle_model.eval()
+    paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
+    paddle_model.set_dict(paddle_state_dict)
+
+    # load torch model
+    torch_model = mv3_small_torch()
+    torch_model.eval()
+    torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
+    torch_model.load_state_dict(torch_state_dict)
+
+    # load data
+    inputs = np.load("./data/fake_data.npy")
+
+    # save the paddle output
+    reprod_logger = ReprodLogger()
+    paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
+    reprod_logger.add("logits", paddle_out.cpu().detach().numpy())
+    reprod_logger.save("./result/forward_paddle.npy")
+
+    # save the torch output
+    torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
+    reprod_logger.add("logits", torch_out.cpu().detach().numpy())
+    reprod_logger.save("./result/forward_torch.npy")
+```
+
+可以看到，我们在代码中加载准备的相同的模型参数、并固定输入，从而获得两个模型的输出。输出结果使用相同的 key 值存到 numpy 文件中，随后使用下列代码加载并比较：
+
+```python
+    # load data
+    diff_helper = ReprodDiffHelper()
+    torch_info = diff_helper.load_info("./result/forward_torch.npy")
+    paddle_info = diff_helper.load_info("./result/forward_paddle.npy")
+
+    # compare result and produce log
+    diff_helper.compare_info(torch_info, paddle_info)
+    diff_helper.report(path="./result/log/forward_diff.log")
+```
+
+在代码示例中也可以学习到 reprod_log的主要接口，包含add、save、load_infor、compare_infor、report的用法。
+
+【**运行文件**】
+通过运行以下代码，我们验证前向对齐效果。
+```bash
+cd models/tutorials/mobilenetv3_prod/
+python 01_test_forward.py
+```
+
+【**获得结果**】
+根据示例代码可以看到，我们将结果保存在`result/log/forward_diff.log`中，打开对应文件或者直接观察命令行输出，就会有下列结果：
+
+```bash
+[2021/12/21 15:00:38] root INFO: logits:
+[2021/12/21 15:00:38] root INFO:     mean diff: check passed: False, value: 2.308018565599923e-06
+[2021/12/21 15:00:38] root INFO: diff check failed
+```
+
+这里我们发现在`reprod_log`默认的平均差异小于1e-6的标准下，当前前向对齐是不符合条件的，但是这是由于前向 op 计算导致的微小的差异。
+
+一般说来前向误差在 1e-5 左右都是可以接受的，到这里我们就验证了网络的前向是对齐的，完成了第一个打卡点。
+
+<a name="4.2"></a>
+### 4.2 数据加载对齐
+
+在验证了模型的前向对齐之后，我们验证数据读取部分，这一部分，我们比较从数据读取到模型传入之间我们进行的操作是否和参考操作一致。
+
+主要代码如下所示，我们读取相同的输入，比较数据增强后输出之间的差异，即可知道我们的数据增强是否和参考实现保持一致：
+
+```python
+def build_torch_data_pipeline():
+    dataset_test = torchvision.datasets.ImageFolder(
+        "./lite_data/val/",
+        presets_torch.ClassificationPresetEval(
+            crop_size=224, resize_size=256), is_valid_file=None)
+
+    test_sampler = torch.utils.data.SequentialSampler(dataset_test)
+
+    data_loader_test = torch.utils.data.DataLoader(
+        dataset_test,
+        batch_size=4,
+        sampler=test_sampler,
+        num_workers=0,
+        pin_memory=True)
+    return dataset_test, data_loader_test
+
+
+def test_data_pipeline():
+    paddle_dataset, paddle_dataloader = build_paddle_data_pipeline()
+    torch_dataset, torch_dataloader = build_torch_data_pipeline()
+
+    logger_paddle_data = ReprodLogger()
+    logger_torch_data = ReprodLogger()
+
+    logger_paddle_data.add("length", np.array(len(paddle_dataset)))
+    logger_torch_data.add("length", np.array(len(torch_dataset)))
+
+
+    for idx, (paddle_batch, torch_batch
+              ) in enumerate(zip(paddle_dataloader, torch_dataloader)):
+        if idx >= 5:
+            break
+        logger_paddle_data.add(f"dataloader_{idx}", paddle_batch[0].numpy())
+        logger_torch_data.add(f"dataloader_{idx}",
+                              torch_batch[0].detach().cpu().numpy())
+    logger_paddle_data.save("./result/data_paddle.npy")
+    logger_torch_data.save("./result/data_ref.npy")
+
+```
+
+【**运行文件**】
+通过运行以下指令，我们进行测试，测试数据可以解压我们准备的 [lite_data.tar](https://github.com/PaddlePaddle/models/blob/release%2F2.2/tutorials/mobilenetv3_prod/Step6/test_images/lite_data.tar) 获得，对于自身的数据，也可以抽取几张 validationset 的图片用作验证。
+
+```python
+cd models/tutorials/mobilenetv3_prod/
+tar -xvf lite_data.rar
+python 02_test_data.py
+```
+
+【**获得结果**】
+运行文件之后，我们获得以下命令行输出，可以发现我们的验证结果满足预期，数据加载部分验证通过：
+
+```bash
+[2021/12/23 17:21:22] root INFO: length:
+[2021/12/23 17:21:22] root INFO:        mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_0:
+[2021/12/23 17:21:22] root INFO:        mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_1:
+[2021/12/23 17:21:22] root INFO:        mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_2:
+[2021/12/23 17:21:22] root INFO:        mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_3:
+[2021/12/23 17:21:22] root INFO:        mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: diff check passed
+```
+
+<a name="4.3"></a>
+### 4.3 评估指标对齐
+随后我们来到评估指标对齐，对齐流程如图所示：
+
+<div align="center">
+    <img src="./images/metric.png" width=500">
+</div>
+
+这部分的对齐流程主要差异在于我们在模型基础上添加了对应参考代码实现 metric，并导入到测试文件中。在论文复现中，我们尽量将模型的不同部分封装起来，之后就可以通过我们这样导入的方式进行验证。
+
+这部分的参考代码如下：
+
+```python
+
+def evaluate(image, labels, model, acc, tag, reprod_logger):
+    model.eval()
+    output = model(image)
+
+    accracy = acc(output, labels, topk=(1, 5))
+
+    reprod_logger.add("acc_top1", np.array(accracy[0]))
+    reprod_logger.add("acc_top5", np.array(accracy[1]))
+
+    reprod_logger.save("./result/metric_{}.npy".format(tag))
+
+
+def test_forward():
+    # load model & data
+
+    evaluate(
+        paddle.to_tensor(
+            inputs, dtype="float32"),
+        paddle.to_tensor(
+            labels, dtype="int64"),
+        paddle_model,
+        accuracy_paddle,
+        'paddle', reprod_logger)
+    evaluate(
+        torch.tensor(
+            inputs, dtype=torch.float32),
+        torch.tensor(
+            labels, dtype=torch.int64),
+        torch_model,
+        accuracy_torch,
+        'ref', reprod_logger)
+```
+这部分模型和输入的导入均和之前一致，只是在之前的基础上增加了模型计算评估指标的部分。
+
+由于我们之前验证了模型的输出一致。那么也就是评估指标的输入相同，我们只需要对比输出是否一致，即可确定评估指标的实现是否正确。
+
+【**运行文件**】
+通过运行以下代码，我们验证评估指标对齐效果。
+
+```bash
+cd models/tutorials/mobilenetv3_prod/
+python 03_test_metric.py
+```
+
+【**获得结果**】
+
+进入`result/log/metric_diff.log`中，就会有下列结果，而结果说明我们评估指标的实现正确， 从而完成第二个打卡点：
+```bash
+[2021/12/21 19:28:49] root INFO: acc_top1:
+[2021/12/21 19:28:49] root INFO:     mean diff: check passed: True, value: 0.0
+[2021/12/21 19:28:49] root INFO: acc_top5:
+[2021/12/21 19:28:49] root INFO:     mean diff: check passed: True, value: 0.0
+[2021/12/21 19:28:49] root INFO: diff check passed
+```
+
+<a name="4.4"></a>
+### 4.4 损失对齐
+进一步，我们验证损失实现的正确性，验证流程如下：
+
+<div align="center">
+    <img src="./images/losses.png" width=500">
+</div>
+
+这部分的对齐流程主要差异在于我们在模型基础上添加了对应参考代码实现的 loss。这部分的参考代码如下：
+
+```python
+def test_forward():
+    # init loss
+    criterion_paddle = paddle.nn.CrossEntropyLoss()
+    criterion_torch = torch.nn.CrossEntropyLoss()
+
+    # load model & data
+
+    # save the paddle output
+    paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
+    loss_paddle = criterion_paddle(
+        paddle_out, paddle.to_tensor(
+            labels, dtype="int64"))
+    reprod_logger.add("loss", loss_paddle.cpu().detach().numpy())
+    reprod_logger.save("./result/loss_paddle.npy")
+
+    # save the torch output
+    torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
+    loss_torch = criterion_torch(
+        torch_out, torch.tensor(
+            labels, dtype=torch.int64))
+    reprod_logger.add("loss", loss_torch.cpu().detach().numpy())
+    reprod_logger.save("./result/loss_ref.npy")
+```
+这部分代码进一步增加损失导入的部分，由于我们之前验证了模型的输出一致，也就是损失的输入相同，我们只需要对比输出是否一致，即可确定损失的实现是否正确。
+
+【**运行文件**】
+通过运行以下代码，我们验证评估指标对齐效果。
+```bash
+cd models/tutorials/mobilenetv3_prod/
+python 04_test_loss.py
+```
+
+【**获得结果**】
+
+进入`result/log/loss_diff.log`中，就会有下列结果，而结果说明我们评估指标的实现正确，完成第三个打卡点：
+```bash
+[2021/12/22 20:13:41] root INFO: loss:
+[2021/12/22 20:13:41] root INFO:     mean diff: check passed: True, value: 0.0
+[2021/12/22 20:13:41] root INFO: diff check passed
+```
+
+<a name="4.5"></a>
+### 4.5 反向梯度对齐
+
+结合模型和损失，我们就可以验证反向过程，反向梯度传导的是否正确包含了优化器，学习率以及梯度的计算，而验证过程只需要多观察几轮损失即可明确反向是否正确传导，主要验证流程如下所示：
+
+<div align="center">
+    <img src="./images/backward.png" width=500">
+</div>
+
+以上参考流程可以使用以下代码实现：
+
+```python
+def train_one_epoch_torch(inputs, labels, model, criterion, optimizer,
+                          max_iter, reprod_logger):
+    for idx in range(max_iter):
+        image = torch.tensor(inputs, dtype=torch.float32).cuda()
+        target = torch.tensor(labels, dtype=torch.int64).cuda()
+        model = model.cuda()
+
+        output = model(image)
+        loss = criterion(output, target)
+
+        reprod_logger.add("loss_{}".format(idx), loss.cpu().detach().numpy())
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+    reprod_logger.save("./result/losses_ref.npy")
+
+
+def test_backward():
+    max_iter = 3
+    lr = 1e-3
+    momentum = 0.9
+
+    # load model, loss, data
+
+    # init optimizer
+    opt_paddle = paddle.optimizer.Momentum(
+        learning_rate=lr,
+        momentum=momentum,
+        parameters=paddle_model.parameters())
+
+    opt_torch = torch.optim.SGD(torch_model.parameters(), lr=lr, momentum=momentum)
+
+    train_one_epoch_paddle(inputs, labels, paddle_model, criterion_paddle,
+                           opt_paddle, max_iter, reprod_logger)
+
+    train_one_epoch_torch(inputs, labels, torch_model, criterion_torch,
+                          opt_torch, max_iter, reprod_logger)
+```
+
+代码中增加了optimizer用于迭代网络参数，其他则基本一致。
+
+【**运行文件**】
+通过运行以下代码，我们验证反向传播对齐效果。
+```bash
+cd models/tutorials/mobilenetv3_prod/
+python 05_test_backward.py
+```
+
+【**获得结果**】
+进入`result/log/loss_diff.log`中，就会有下列结果，结果表示三轮损失的差异在 1e-6 附近，说明我们反向传播的实现对齐， 完成第四个打卡点：
+
+```bash
+[2021/12/23 15:51:16] root INFO: loss_0:
+[2021/12/23 15:51:16] root INFO:     mean diff: check passed: False, value: 1.9073486328125e-06
+[2021/12/23 15:51:16] root INFO: lr_0:
+[2021/12/23 15:51:16] root INFO:     mean diff: check passed: True, value: 0.0
+[2021/12/23 15:51:16] root INFO: loss_1:
+[2021/12/23 15:51:16] root INFO:     mean diff: check passed: False, value: 2.384185791015625e-06
+[2021/12/23 15:51:16] root INFO: lr_1:
+[2021/12/23 15:51:16] root INFO:     mean diff: check passed: True, value: 0.0
+[2021/12/23 15:51:16] root INFO: loss_2:
+[2021/12/23 15:51:16] root INFO:     mean diff: check passed: False, value: 1.1920928955078125e-05
+[2021/12/23 15:51:16] root INFO: lr_2:
+[2021/12/23 15:51:16] root INFO:     mean diff: check passed: True, value: 0.0
+[2021/12/23 15:51:16] root INFO: diff check failed
+
+
+```
+
+<a name="4.6"></a>
+### 4.6 训练对齐
+通过以上步骤，我们验证了模型、数据、评估指标、损失、反向传播的正确性，也就为我们的训练对齐打下了良好的基础。
+
+接下来，我们按照以下流程验证训练对齐结果，即对网络进行训练，并在训练后验证精度是否达到指标：
+
+<div align="center">
+    <img src="./images/train.png" width=500">
+</div>
+
+
+我们可以使用reprd logger对比精度，也可以直接肉眼观察结果对比：
+
+```python
+if paddle.distributed.get_rank() == 0:
+    reprod_logger = ReprodLogger()
+    reprod_logger.add("top1", np.array([top1]))
+    reprod_logger.save("train_align_paddle.npy")
+```
+
+【**运行文件**】
+
+```bash
+cd models/tutorials/mobilenetv3_prod/Checkpoint6
+python train.py
+```
+
+【**获得结果**】
+最终训练精度超过原模型精度，我们的复现到这里就圆满结束，如果还有任何问题，欢迎随时向我们[提问](https://github.com/PaddlePaddle/Paddle/issues)！
--- a/tutorials/mobilenetv3_prod/Step1-5/data/fake_data.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/data/fake_data.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/data/fake_label.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/data/fake_label.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/data/mobilenet_v3_small-047dcff4.pth
+++ b/tutorials/mobilenetv3_prod/Step1-5/data/mobilenet_v3_small-047dcff4.pth
--- a/tutorials/mobilenetv3_prod/Step1-5/data/mv3_small_paddle.pdparams
+++ b/tutorials/mobilenetv3_prod/Step1-5/data/mv3_small_paddle.pdparams
--- a/tutorials/mobilenetv3_prod/Step1-5/images/backward.png
+++ b/tutorials/mobilenetv3_prod/Step1-5/images/backward.png
--- a/tutorials/mobilenetv3_prod/Step1-5/images/forward.png
+++ b/tutorials/mobilenetv3_prod/Step1-5/images/forward.png
--- a/tutorials/mobilenetv3_prod/Step1-5/images/losses.png
+++ b/tutorials/mobilenetv3_prod/Step1-5/images/losses.png
--- a/tutorials/mobilenetv3_prod/Step1-5/images/metric.png
+++ b/tutorials/mobilenetv3_prod/Step1-5/images/metric.png
--- a/tutorials/mobilenetv3_prod/Step1-5/images/train.png
+++ b/tutorials/mobilenetv3_prod/Step1-5/images/train.png
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/__init__.py
+from .metric import accuracy_paddle
+from .presets import *
\ No newline at end of file
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/metric.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/metric.py
+import paddle
+
+
+def accuracy_paddle(output, target, topk=(1, )):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with paddle.no_grad():
+        maxk = max(topk)
+        batch_size = target.shape[0]
+
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.equal(target)
+
+        res = []
+        for k in topk:
+            correct_k = correct.astype(paddle.int32)[:k].flatten().sum(
+                dtype='float32')
+            res.append(correct_k / batch_size)
+        return res
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/__init__.py
+from .datasets import *
+from .models import *
+from .transforms import *
\ No newline at end of file
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/datasets/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/datasets/__init__.py
+from .folder import ImageFolder, DatasetFolder
+from .vision import VisionDataset
+
+__all__ = ('ImageFolder', 'DatasetFolder', 'VisionDataset')
\ No newline at end of file
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/datasets/folder.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/datasets/folder.py
+from .vision import VisionDataset
+
+from PIL import Image
+
+import os
+import os.path
+from typing import Any, Callable, cast, Dict, List, Optional, Tuple
+
+
+def has_file_allowed_extension(filename: str,
+                               extensions: Tuple[str, ...]) -> bool:
+    """Checks if a file is an allowed extension.
+
+    Args:
+        filename (string): path to a file
+        extensions (tuple of strings): extensions to consider (lowercase)
+
+    Returns:
+        bool: True if the filename ends with one of given extensions
+    """
+    return filename.lower().endswith(extensions)
+
+
+def is_image_file(filename: str) -> bool:
+    """Checks if a file is an allowed image extension.
+
+    Args:
+        filename (string): path to a file
+
+    Returns:
+        bool: True if the filename ends with a known image extension
+    """
+    return has_file_allowed_extension(filename, IMG_EXTENSIONS)
+
+
+def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
+    """Finds the class folders in a dataset.
+
+    See :class:`DatasetFolder` for details.
+    """
+    classes = sorted(
+        entry.name for entry in os.scandir(directory) if entry.is_dir())
+    if not classes:
+        raise FileNotFoundError(
+            f"Couldn't find any class folder in {directory}.")
+
+    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
+
+    return classes, class_to_idx
+
+
+def make_dataset(
+        directory: str,
+        class_to_idx: Optional[Dict[str, int]]=None,
+        extensions: Optional[Tuple[str, ...]]=None,
+        is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[Tuple[
+            str, int]]:
+    """Generates a list of samples of a form (path_to_sample, class).
+
+    See :class:`DatasetFolder` for details.
+
+    Note: The class_to_idx parameter is here optional and will use the logic of the ``find_classes`` function
+    by default.
+    """
+    directory = os.path.expanduser(directory)
+
+    if class_to_idx is None:
+        _, class_to_idx = find_classes(directory)
+    elif not class_to_idx:
+        raise ValueError(
+            "'class_to_index' must have at least one entry to collect any samples."
+        )
+
+    both_none = extensions is None and is_valid_file is None
+    both_something = extensions is not None and is_valid_file is not None
+    if both_none or both_something:
+        raise ValueError(
+            "Both extensions and is_valid_file cannot be None or not None at the same time"
+        )
+
+    if extensions is not None:
+
+        def is_valid_file(x: str) -> bool:
+            return has_file_allowed_extension(
+                x, cast(Tuple[str, ...], extensions))
+
+    is_valid_file = cast(Callable[[str], bool], is_valid_file)
+
+    instances = []
+    available_classes = set()
+    for target_class in sorted(class_to_idx.keys()):
+        class_index = class_to_idx[target_class]
+        target_dir = os.path.join(directory, target_class)
+        if not os.path.isdir(target_dir):
+            continue
+        for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
+            for fname in sorted(fnames):
+                if is_valid_file(fname):
+                    path = os.path.join(root, fname)
+                    item = path, class_index
+                    instances.append(item)
+
+                    if target_class not in available_classes:
+                        available_classes.add(target_class)
+            #     print(fname)
+            # exit()
+
+            # empty_classes = set(class_to_idx.keys()) - available_classes
+            # if empty_classes:
+            #     msg = f"Found no valid file for the classes {', '.join(sorted(empty_classes))}. "
+            #     if extensions is not None:
+            #         msg += f"Supported extensions are: {', '.join(extensions)}"
+            #     raise FileNotFoundError(msg)
+
+    return instances
+
+
+class DatasetFolder(VisionDataset):
+    """A generic data loader.
+
+    This default directory structure can be customized by overriding the
+    :meth:`find_classes` method.
+
+    Args:
+        root (string): Root directory path.
+        loader (callable): A function to load a sample given its path.
+        extensions (tuple[string]): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable, optional): A function/transform that takes in
+            a sample and returns a transformed version.
+            E.g, ``transforms.RandomCrop`` for images.
+        target_transform (callable, optional): A function/transform that takes
+            in the target and transforms it.
+        is_valid_file (callable, optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+
+     Attributes:
+        classes (list): List of the class names sorted alphabetically.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        samples (list): List of (sample path, class_index) tuples
+        targets (list): The class_index value for each image in the dataset
+    """
+
+    def __init__(
+            self,
+            root: str,
+            loader: Callable[[str], Any],
+            extensions: Optional[Tuple[str, ...]]=None,
+            transform: Optional[Callable]=None,
+            target_transform: Optional[Callable]=None,
+            is_valid_file: Optional[Callable[[str], bool]]=None, ) -> None:
+        super(DatasetFolder, self).__init__(
+            root, transform=transform, target_transform=target_transform)
+        classes, class_to_idx = self.find_classes(self.root)
+        samples = self.make_dataset(self.root, class_to_idx, extensions,
+                                    is_valid_file)
+
+        self.loader = loader
+        self.extensions = extensions
+
+        self.classes = classes
+        self.class_to_idx = class_to_idx
+        self.samples = samples
+        self.targets = [s[1] for s in samples]
+
+    @staticmethod
+    def make_dataset(
+            directory: str,
+            class_to_idx: Dict[str, int],
+            extensions: Optional[Tuple[str, ...]]=None,
+            is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[
+                Tuple[str, int]]:
+        """Generates a list of samples of a form (path_to_sample, class).
+
+        This can be overridden to e.g. read files from a compressed zip file instead of from the disk.
+
+        Args:
+            directory (str): root dataset directory, corresponding to ``self.root``.
+            class_to_idx (Dict[str, int]): Dictionary mapping class name to class index.
+            extensions (optional): A list of allowed extensions.
+                Either extensions or is_valid_file should be passed. Defaults to None.
+            is_valid_file (optional): A function that takes path of a file
+                and checks if the file is a valid file
+                (used to check of corrupt files) both extensions and
+                is_valid_file should not be passed. Defaults to None.
+
+        Raises:
+            ValueError: In case ``class_to_idx`` is empty.
+            ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None.
+            FileNotFoundError: In case no valid file was found for any class.
+
+        Returns:
+            List[Tuple[str, int]]: samples of a form (path_to_sample, class)
+        """
+        if class_to_idx is None:
+            # prevent potential bug since make_dataset() would use the class_to_idx logic of the
+            # find_classes() function, instead of using that of the find_classes() method, which
+            # is potentially overridden and thus could have a different logic.
+            raise ValueError("The class_to_idx parameter cannot be None.")
+        return make_dataset(
+            directory,
+            class_to_idx,
+            extensions=extensions,
+            is_valid_file=is_valid_file)
+
+    def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]:
+        """Find the class folders in a dataset structured as follows::
+
+            directory/
+            ├── class_x
+            │   ├── xxx.ext
+            │   ├── xxy.ext
+            │   └── ...
+            │       └── xxz.ext
+            └── class_y
+                ├── 123.ext
+                ├── nsdf3.ext
+                └── ...
+                └── asd932_.ext
+
+        This method can be overridden to only consider
+        a subset of classes, or to adapt to a different dataset directory structure.
+
+        Args:
+            directory(str): Root directory path, corresponding to ``self.root``
+
+        Raises:
+            FileNotFoundError: If ``dir`` has no class folders.
+
+        Returns:
+            (Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index.
+        """
+        return find_classes(directory)
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return sample, target
+
+    def __len__(self) -> int:
+        return len(self.samples)
+
+
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
+                  '.tiff', '.webp')
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+
+
+def default_loader(path: str) -> Any:
+    return pil_loader(path)
+
+
+class ImageFolder(DatasetFolder):
+    """A generic data loader where the images are arranged in this way by default: ::
+
+        root/dog/xxx.png
+        root/dog/xxy.png
+        root/dog/[...]/xxz.png
+
+        root/cat/123.png
+        root/cat/nsdf3.png
+        root/cat/[...]/asd932_.png
+    Args:
+        root (string): Root directory path.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        loader (callable, optional): A function to load an image given its path.
+        is_valid_file (callable, optional): A function that takes path of an Image file
+            and check if the file is a valid file (used to check of corrupt files)
+
+     Attributes:
+        classes (list): List of the class names sorted alphabetically.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        imgs (list): List of (image path, class_index) tuples
+    """
+
+    def __init__(
+            self,
+            root: str,
+            transform: Optional[Callable]=None,
+            target_transform: Optional[Callable]=None,
+            loader: Callable[[str], Any]=default_loader,
+            is_valid_file: Optional[Callable[[str], bool]]=None, ):
+        super(ImageFolder, self).__init__(
+            root,
+            loader,
+            IMG_EXTENSIONS if is_valid_file is None else None,
+            transform=transform,
+            target_transform=target_transform,
+            is_valid_file=is_valid_file)
+        self.imgs = self.samples
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/datasets/vision.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/datasets/vision.py
+import os
+import paddle
+from typing import Any, Callable, List, Optional, Tuple
+
+
+class VisionDataset(paddle.io.Dataset):
+    """
+    Base Class For making datasets which are compatible with our model.
+    It is necessary to override the ``__getitem__`` and ``__len__`` method.
+
+    Args:
+        root (string): Root directory of dataset.
+        transforms (callable, optional): A function/transforms that takes in
+            an image and a label and returns the transformed versions of both.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+
+    .. note::
+
+        :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
+    """
+    _repr_indent = 4
+
+    def __init__(
+            self,
+            root: str,
+            transforms: Optional[Callable]=None,
+            transform: Optional[Callable]=None,
+            target_transform: Optional[Callable]=None, ) -> None:
+        if isinstance(root, (str, bytes())):
+            root = os.path.expanduser(root)
+        self.root = root
+
+        has_transforms = transforms is not None
+        has_separate_transform = transform is not None or target_transform is not None
+        if has_transforms and has_separate_transform:
+            raise ValueError(
+                "Only transforms or transform/target_transform can "
+                "be passed as argument")
+
+        # for backwards-compatibility
+        self.transform = transform
+        self.target_transform = target_transform
+
+        if has_separate_transform:
+            transforms = StandardTransform(transform, target_transform)
+        self.transforms = transforms
+
+    def __getitem__(self, index: int) -> Any:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            (Any): Sample and meta data, optionally transformed by the respective transforms.
+        """
+        raise NotImplementedError
+
+    def __len__(self) -> int:
+        raise NotImplementedError
+
+    def __repr__(self) -> str:
+        head = "Dataset " + self.__class__.__name__
+        body = ["Number of datapoints: {}".format(self.__len__())]
+        if self.root is not None:
+            body.append("Root location: {}".format(self.root))
+        body += self.extra_repr().splitlines()
+        if hasattr(self, "transforms") and self.transforms is not None:
+            body += [repr(self.transforms)]
+        lines = [head] + [" " * self._repr_indent + line for line in body]
+        return '\n'.join(lines)
+
+    def _format_transform_repr(self, transform: Callable,
+                               head: str) -> List[str]:
+        lines = transform.__repr__().splitlines()
+        return (["{}{}".format(head, lines[0])] +
+                ["{}{}".format(" " * len(head), line) for line in lines[1:]])
+
+    def extra_repr(self) -> str:
+        return ""
+
+
+class StandardTransform(object):
+    def __init__(self,
+                 transform: Optional[Callable]=None,
+                 target_transform: Optional[Callable]=None) -> None:
+        self.transform = transform
+        self.target_transform = target_transform
+
+    def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
+        if self.transform is not None:
+            input = self.transform(input)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return input, target
+
+    def _format_transform_repr(self, transform: Callable,
+                               head: str) -> List[str]:
+        lines = transform.__repr__().splitlines()
+        return (["{}{}".format(head, lines[0])] +
+                ["{}{}".format(" " * len(head), line) for line in lines[1:]])
+
+    def __repr__(self) -> str:
+        body = [self.__class__.__name__]
+        if self.transform is not None:
+            body += self._format_transform_repr(self.transform, "Transform: ")
+        if self.target_transform is not None:
+            body += self._format_transform_repr(self.target_transform,
+                                                "Target transform: ")
+
+        return '\n'.join(body)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/models/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/models/__init__.py
+from .mobilenet_v3_paddle import mobilenet_v3_large, mobilenet_v3_small
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/models/misc_paddle.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/models/misc_paddle.py
+from typing import Any, Callable, List, Optional, Sequence
+
+import paddle
+import paddle.nn as nn
+
+
+class ConvNormActivation(nn.Sequential):
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            kernel_size: int=3,
+            stride: int=1,
+            padding: Optional[int]=None,
+            groups: int=1,
+            norm_layer: Optional[Callable[..., nn.Layer]]=nn.BatchNorm2D,
+            activation_layer: Optional[Callable[..., nn.Layer]]=nn.ReLU,
+            dilation: int=1,
+            bias: Optional[bool]=None, ) -> None:
+        if padding is None:
+            padding = (kernel_size - 1) // 2 * dilation
+        if bias is None:
+            bias = norm_layer is None
+        layers = [
+            nn.Conv2D(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                dilation=dilation,
+                groups=groups,
+                bias_attr=bias, )
+        ]
+        if norm_layer is not None:
+            layers.append(norm_layer(out_channels))
+        if activation_layer is not None:
+            layers.append(activation_layer())
+        super().__init__(*layers)
+        self.out_channels = out_channels
+
+
+class SqueezeExcitation(nn.Layer):
+    def __init__(
+            self,
+            input_channels: int,
+            squeeze_channels: int,
+            activation: Callable[..., nn.Layer]=nn.ReLU,
+            scale_activation: Callable[..., nn.Layer]=nn.Sigmoid, ) -> None:
+        super().__init__()
+        self.avgpool = nn.AdaptiveAvgPool2D(1)
+        self.fc1 = nn.Conv2D(input_channels, squeeze_channels, 1)
+        self.fc2 = nn.Conv2D(squeeze_channels, input_channels, 1)
+        self.activation = activation()
+        self.scale_activation = scale_activation()
+
+    def _scale(self, input: paddle.Tensor) -> paddle.Tensor:
+        scale = self.avgpool(input)
+        scale = self.fc1(scale)
+        scale = self.activation(scale)
+        scale = self.fc2(scale)
+        return self.scale_activation(scale)
+
+    def forward(self, input: paddle.Tensor) -> paddle.Tensor:
+        scale = self._scale(input)
+        return scale * input
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/models/mobilenet_v3_paddle.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/models/mobilenet_v3_paddle.py
+import warnings
+from functools import partial
+from typing import Any, Callable, List, Optional, Sequence
+
+import paddle
+import paddle.nn as nn
+
+from .misc_paddle import ConvNormActivation, SqueezeExcitation as SElayer
+
+__all__ = ["MobileNetV3", "mobilenet_v3_large", "mobilenet_v3_small"]
+
+
+def _make_divisible(v: float, divisor: int,
+                    min_value: Optional[int]=None) -> int:
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class SqueezeExcitation(SElayer):
+    def __init__(self, input_channels: int, squeeze_factor: int=4):
+        squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
+        super().__init__(
+            input_channels, squeeze_channels, scale_activation=nn.Hardsigmoid)
+        self.relu = self.activation
+        delattr(self, "activation")
+
+
+class InvertedResidualConfig:
+    # Stores information listed at Tables 1 and 2 of the MobileNetV3 paper
+    def __init__(
+            self,
+            input_channels: int,
+            kernel: int,
+            expanded_channels: int,
+            out_channels: int,
+            use_se: bool,
+            activation: str,
+            stride: int,
+            dilation: int,
+            width_mult: float, ):
+        self.input_channels = self.adjust_channels(input_channels, width_mult)
+        self.kernel = kernel
+        self.expanded_channels = self.adjust_channels(expanded_channels,
+                                                      width_mult)
+        self.out_channels = self.adjust_channels(out_channels, width_mult)
+        self.use_se = use_se
+        self.use_hs = activation == "HS"
+        self.stride = stride
+        self.dilation = dilation
+
+    @staticmethod
+    def adjust_channels(channels: int, width_mult: float):
+        return _make_divisible(channels * width_mult, 8)
+
+
+class InvertedResidual(nn.Layer):
+    # Implemented as described at section 5 of MobileNetV3 paper
+    def __init__(
+            self,
+            cnf: InvertedResidualConfig,
+            norm_layer: Callable[..., nn.Layer],
+            se_layer: Callable[..., nn.Layer]=partial(
+                SElayer, scale_activation=nn.Hardsigmoid), ):
+        super().__init__()
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError("illegal stride value")
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers: List[nn.Layer] = []
+        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU
+
+        # expand
+        if cnf.expanded_channels != cnf.input_channels:
+            layers.append(
+                ConvNormActivation(
+                    cnf.input_channels,
+                    cnf.expanded_channels,
+                    kernel_size=1,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer, ))
+
+        # depthwise
+        stride = 1 if cnf.dilation > 1 else cnf.stride
+        layers.append(
+            ConvNormActivation(
+                cnf.expanded_channels,
+                cnf.expanded_channels,
+                kernel_size=cnf.kernel,
+                stride=stride,
+                dilation=cnf.dilation,
+                groups=cnf.expanded_channels,
+                norm_layer=norm_layer,
+                activation_layer=activation_layer, ))
+        if cnf.use_se:
+            squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8)
+            layers.append(se_layer(cnf.expanded_channels, squeeze_channels))
+
+        # project
+        layers.append(
+            ConvNormActivation(
+                cnf.expanded_channels,
+                cnf.out_channels,
+                kernel_size=1,
+                norm_layer=norm_layer,
+                activation_layer=None))
+
+        self.block = nn.Sequential(*layers)
+        self.out_channels = cnf.out_channels
+        self._is_cn = cnf.stride > 1
+
+    def forward(self, input: paddle.Tensor) -> paddle.Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result += input
+        return result
+
+
+class MobileNetV3(nn.Layer):
+    def __init__(
+            self,
+            inverted_residual_setting: List[InvertedResidualConfig],
+            last_channel: int,
+            num_classes: int=1000,
+            block: Optional[Callable[..., nn.Layer]]=None,
+            norm_layer: Optional[Callable[..., nn.Layer]]=None,
+            dropout: float=0.2,
+            **kwargs: Any, ) -> None:
+        """
+        MobileNet V3 main class
+
+        Args:
+            inverted_residual_setting (List[InvertedResidualConfig]): Network structure
+            last_channel (int): The number of channels on the penultimate layer
+            num_classes (int): Number of classes
+            block (Optional[Callable[..., nn.Layer]]): Module specifying inverted residual building block for mobilenet
+            norm_layer (Optional[Callable[..., nn.Layer]]): Module specifying the normalization layer to use
+            dropout (float): The droupout probability
+        """
+        super().__init__()
+        if not inverted_residual_setting:
+            raise ValueError(
+                "The inverted_residual_setting should not be empty")
+        elif not (isinstance(inverted_residual_setting, Sequence) and all([
+                isinstance(s, InvertedResidualConfig)
+                for s in inverted_residual_setting
+        ])):
+            raise TypeError(
+                "The inverted_residual_setting should be List[InvertedResidualConfig]"
+            )
+
+        if block is None:
+            block = InvertedResidual
+
+        if norm_layer is None:
+            norm_layer = partial(nn.BatchNorm2D, epsilon=0.001, momentum=0.01)
+
+        layers: List[nn.Layer] = []
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.append(
+            ConvNormActivation(
+                3,
+                firstconv_output_channels,
+                kernel_size=3,
+                stride=2,
+                norm_layer=norm_layer,
+                activation_layer=nn.Hardswish, ))
+
+        # building inverted residual blocks
+        for cnf in inverted_residual_setting:
+            layers.append(block(cnf, norm_layer))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = 6 * lastconv_input_channels
+        layers.append(
+            ConvNormActivation(
+                lastconv_input_channels,
+                lastconv_output_channels,
+                kernel_size=1,
+                norm_layer=norm_layer,
+                activation_layer=nn.Hardswish, ))
+
+        self.features = nn.Sequential(*layers)
+        self.avgpool = nn.AdaptiveAvgPool2D(1)
+        self.classifier = nn.Sequential(
+            nn.Linear(lastconv_output_channels, last_channel),
+            nn.Hardswish(),
+            nn.Dropout(p=dropout),
+            nn.Linear(last_channel, num_classes), )
+
+    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
+        x = self.features(x)
+        x = self.avgpool(x)
+        x = paddle.flatten(x, 1)
+        x = self.classifier(x)
+        return x
+
+
+def _mobilenet_v3_conf(arch: str,
+                       width_mult: float=1.0,
+                       reduced_tail: bool=False,
+                       dilated: bool=False,
+                       **kwargs: Any):
+    reduce_divider = 2 if reduced_tail else 1
+    dilation = 2 if dilated else 1
+
+    bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult)
+    adjust_channels = partial(
+        InvertedResidualConfig.adjust_channels, width_mult=width_mult)
+
+    if arch == "mobilenet_v3_large":
+        inverted_residual_setting = [
+            bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
+            bneck_conf(16, 3, 64, 24, False, "RE", 2, 1),  # C1
+            bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
+            bneck_conf(24, 5, 72, 40, True, "RE", 2, 1),  # C2
+            bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
+            bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
+            bneck_conf(40, 3, 240, 80, False, "HS", 2, 1),  # C3
+            bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
+            bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
+            bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
+            bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
+            bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
+            bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2,
+                       dilation),  # C4
+            bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider,
+                       160 // reduce_divider, True, "HS", 1, dilation),
+            bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider,
+                       160 // reduce_divider, True, "HS", 1, dilation),
+        ]
+        last_channel = adjust_channels(1280 // reduce_divider)  # C5
+    elif arch == "mobilenet_v3_small":
+        inverted_residual_setting = [
+            bneck_conf(16, 3, 16, 16, True, "RE", 2, 1),  # C1
+            bneck_conf(16, 3, 72, 24, False, "RE", 2, 1),  # C2
+            bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
+            bneck_conf(24, 5, 96, 40, True, "HS", 2, 1),  # C3
+            bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
+            bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
+            bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
+            bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
+            bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2,
+                       dilation),  # C4
+            bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider,
+                       96 // reduce_divider, True, "HS", 1, dilation),
+            bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider,
+                       96 // reduce_divider, True, "HS", 1, dilation),
+        ]
+        last_channel = adjust_channels(1024 // reduce_divider)  # C5
+    else:
+        raise ValueError(f"Unsupported model type {arch}")
+
+    return inverted_residual_setting, last_channel
+
+
+def _mobilenet_v3(
+        arch: str,
+        inverted_residual_setting: List[InvertedResidualConfig],
+        last_channel: int,
+        pretrained: bool,
+        progress: bool,
+        **kwargs: Any, ):
+    model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs)
+    if pretrained:
+        state_dict = paddle.load(pretrained)
+        model.set_dict(state_dict)
+    return model
+
+
+def mobilenet_v3_large(pretrained: bool=False,
+                       progress: bool=True,
+                       **kwargs: Any) -> MobileNetV3:
+    """
+    Constructs a large MobileNetV3 architecture from
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    arch = "mobilenet_v3_large"
+    inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch,
+                                                                 **kwargs)
+    return _mobilenet_v3(arch, inverted_residual_setting, last_channel,
+                         pretrained, progress, **kwargs)
+
+
+def mobilenet_v3_small(pretrained: bool=False,
+                       progress: bool=True,
+                       **kwargs: Any) -> MobileNetV3:
+    """
+    Constructs a small MobileNetV3 architecture from
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    arch = "mobilenet_v3_small"
+    inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch,
+                                                                 **kwargs)
+    return _mobilenet_v3(arch, inverted_residual_setting, last_channel,
+                         pretrained, progress, **kwargs)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/__init__.py
+from .transforms import *
+from .autoaugment import *
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/autoaugment.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/autoaugment.py
+import math
+import paddle
+
+from enum import Enum
+from paddle import Tensor
+from typing import List, Tuple, Optional
+
+from . import functional as f
+from .functional import InterpolationMode
+
+__all__ = ["AutoAugmentPolicy", "AutoAugment"]
+
+
+class AutoAugmentPolicy(Enum):
+    """AutoAugment policies learned on different datasets.
+    Available policies are IMAGENET, CIFAR10 and SVHN.
+    """
+    IMAGENET = "imagenet"
+    CIFAR10 = "cifar10"
+    SVHN = "svhn"
+
+
+def _get_transforms(policy: AutoAugmentPolicy):
+    if policy == AutoAugmentPolicy.IMAGENET:
+        return [
+            (("Posterize", 0.4, 8), ("Rotate", 0.6, 9)),
+            (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
+            (("Equalize", 0.8, None), ("Equalize", 0.6, None)),
+            (("Posterize", 0.6, 7), ("Posterize", 0.6, 6)),
+            (("Equalize", 0.4, None), ("Solarize", 0.2, 4)),
+            (("Equalize", 0.4, None), ("Rotate", 0.8, 8)),
+            (("Solarize", 0.6, 3), ("Equalize", 0.6, None)),
+            (("Posterize", 0.8, 5), ("Equalize", 1.0, None)),
+            (("Rotate", 0.2, 3), ("Solarize", 0.6, 8)),
+            (("Equalize", 0.6, None), ("Posterize", 0.4, 6)),
+            (("Rotate", 0.8, 8), ("Color", 0.4, 0)),
+            (("Rotate", 0.4, 9), ("Equalize", 0.6, None)),
+            (("Equalize", 0.0, None), ("Equalize", 0.8, None)),
+            (("Invert", 0.6, None), ("Equalize", 1.0, None)),
+            (("Color", 0.6, 4), ("Contrast", 1.0, 8)),
+            (("Rotate", 0.8, 8), ("Color", 1.0, 2)),
+            (("Color", 0.8, 8), ("Solarize", 0.8, 7)),
+            (("Sharpness", 0.4, 7), ("Invert", 0.6, None)),
+            (("ShearX", 0.6, 5), ("Equalize", 1.0, None)),
+            (("Color", 0.4, 0), ("Equalize", 0.6, None)),
+            (("Equalize", 0.4, None), ("Solarize", 0.2, 4)),
+            (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
+            (("Invert", 0.6, None), ("Equalize", 1.0, None)),
+            (("Color", 0.6, 4), ("Contrast", 1.0, 8)),
+            (("Equalize", 0.8, None), ("Equalize", 0.6, None)),
+        ]
+    elif policy == AutoAugmentPolicy.CIFAR10:
+        return [
+            (("Invert", 0.1, None), ("Contrast", 0.2, 6)),
+            (("Rotate", 0.7, 2), ("TranslateX", 0.3, 9)),
+            (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
+            (("ShearY", 0.5, 8), ("TranslateY", 0.7, 9)),
+            (("AutoContrast", 0.5, None), ("Equalize", 0.9, None)),
+            (("ShearY", 0.2, 7), ("Posterize", 0.3, 7)),
+            (("Color", 0.4, 3), ("Brightness", 0.6, 7)),
+            (("Sharpness", 0.3, 9), ("Brightness", 0.7, 9)),
+            (("Equalize", 0.6, None), ("Equalize", 0.5, None)),
+            (("Contrast", 0.6, 7), ("Sharpness", 0.6, 5)),
+            (("Color", 0.7, 7), ("TranslateX", 0.5, 8)),
+            (("Equalize", 0.3, None), ("AutoContrast", 0.4, None)),
+            (("TranslateY", 0.4, 3), ("Sharpness", 0.2, 6)),
+            (("Brightness", 0.9, 6), ("Color", 0.2, 8)),
+            (("Solarize", 0.5, 2), ("Invert", 0.0, None)),
+            (("Equalize", 0.2, None), ("AutoContrast", 0.6, None)),
+            (("Equalize", 0.2, None), ("Equalize", 0.6, None)),
+            (("Color", 0.9, 9), ("Equalize", 0.6, None)),
+            (("AutoContrast", 0.8, None), ("Solarize", 0.2, 8)),
+            (("Brightness", 0.1, 3), ("Color", 0.7, 0)),
+            (("Solarize", 0.4, 5), ("AutoContrast", 0.9, None)),
+            (("TranslateY", 0.9, 9), ("TranslateY", 0.7, 9)),
+            (("AutoContrast", 0.9, None), ("Solarize", 0.8, 3)),
+            (("Equalize", 0.8, None), ("Invert", 0.1, None)),
+            (("TranslateY", 0.7, 9), ("AutoContrast", 0.9, None)),
+        ]
+    elif policy == AutoAugmentPolicy.SVHN:
+        return [
+            (("ShearX", 0.9, 4), ("Invert", 0.2, None)),
+            (("ShearY", 0.9, 8), ("Invert", 0.7, None)),
+            (("Equalize", 0.6, None), ("Solarize", 0.6, 6)),
+            (("Invert", 0.9, None), ("Equalize", 0.6, None)),
+            (("Equalize", 0.6, None), ("Rotate", 0.9, 3)),
+            (("ShearX", 0.9, 4), ("AutoContrast", 0.8, None)),
+            (("ShearY", 0.9, 8), ("Invert", 0.4, None)),
+            (("ShearY", 0.9, 5), ("Solarize", 0.2, 6)),
+            (("Invert", 0.9, None), ("AutoContrast", 0.8, None)),
+            (("Equalize", 0.6, None), ("Rotate", 0.9, 3)),
+            (("ShearX", 0.9, 4), ("Solarize", 0.3, 3)),
+            (("ShearY", 0.8, 8), ("Invert", 0.7, None)),
+            (("Equalize", 0.9, None), ("TranslateY", 0.6, 6)),
+            (("Invert", 0.9, None), ("Equalize", 0.6, None)),
+            (("Contrast", 0.3, 3), ("Rotate", 0.8, 4)),
+            (("Invert", 0.8, None), ("TranslateY", 0.0, 2)),
+            (("ShearY", 0.7, 6), ("Solarize", 0.4, 8)),
+            (("Invert", 0.6, None), ("Rotate", 0.8, 4)),
+            (("ShearY", 0.3, 7), ("TranslateX", 0.9, 3)),
+            (("ShearX", 0.1, 6), ("Invert", 0.6, None)),
+            (("Solarize", 0.7, 2), ("TranslateY", 0.6, 7)),
+            (("ShearY", 0.8, 4), ("Invert", 0.8, None)),
+            (("ShearX", 0.7, 9), ("TranslateY", 0.8, 3)),
+            (("ShearY", 0.8, 5), ("AutoContrast", 0.7, None)),
+            (("ShearX", 0.7, 2), ("Invert", 0.1, None)),
+        ]
+
+
+def _get_magnitudes():
+    _BINS = 10
+    return {
+        # name: (magnitudes, signed)
+        "ShearX": (paddle.linspace(0.0, 0.3, _BINS), True),
+        "ShearY": (paddle.linspace(0.0, 0.3, _BINS), True),
+        "TranslateX": (paddle.linspace(0.0, 150.0 / 331.0, _BINS), True),
+        "TranslateY": (paddle.linspace(0.0, 150.0 / 331.0, _BINS), True),
+        "Rotate": (paddle.linspace(0.0, 30.0, _BINS), True),
+        "Brightness": (paddle.linspace(0.0, 0.9, _BINS), True),
+        "Color": (paddle.linspace(0.0, 0.9, _BINS), True),
+        "Contrast": (paddle.linspace(0.0, 0.9, _BINS), True),
+        "Sharpness": (paddle.linspace(0.0, 0.9, _BINS), True),
+        "Posterize": (paddle.tensor([8, 8, 7, 7, 6, 6, 5, 5, 4, 4]), False),
+        "Solarize": (paddle.linspace(256.0, 0.0, _BINS), False),
+        "AutoContrast": (None, None),
+        "Equalize": (None, None),
+        "Invert": (None, None),
+    }
+
+
+class AutoAugment(paddle.nn.Layer):
+    r"""AutoAugment data augmentation method based on
+    `"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_.
+    If the image is paddle Tensor, it should be of type paddle.uint8, and it is expected
+    to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+    Args:
+        policy (AutoAugmentPolicy): Default is ``AutoAugmentPolicy.IMAGENET``.
+        interpolation (InterpolationMode): Default is ``InterpolationMode.NEAREST``.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+        fill (sequence or number, optional): Pixel fill value for the area outside the transformed
+            image. If given a number, the value is used for all bands respectively.
+    """
+
+    def __init__(self,
+                 policy: AutoAugmentPolicy=AutoAugmentPolicy.IMAGENET,
+                 interpolation: InterpolationMode=InterpolationMode.NEAREST,
+                 fill: Optional[List[float]]=None):
+        super().__init__()
+        self.policy = policy
+        self.interpolation = interpolation
+        self.fill = fill
+
+        self.transforms = _get_transforms(policy)
+        if self.transforms is None:
+            raise ValueError(
+                "The provided policy {} is not recognized.".format(policy))
+        self._op_meta = _get_magnitudes()
+
+    @staticmethod
+    def get_params(transform_num: int) -> Tuple[int, Tensor, Tensor]:
+        """Get parameters for autoaugment transformation
+
+        Returns:
+            params required by the autoaugment transformation
+        """
+        policy_id = int(paddle.randint(low=0, high=transform_num, shape=(1, )))
+        probs = paddle.rand((2, ))
+        signs = paddle.randint(low=0, high=2, shape=(2, ))
+
+        return policy_id, probs, signs
+
+    def _get_op_meta(self,
+                     name: str) -> Tuple[Optional[Tensor], Optional[bool]]:
+        return self._op_meta[name]
+
+    def forward(self, img: Tensor):
+        """
+            img (PIL Image or Tensor): Image to be transformed.
+
+        Returns:
+            PIL Image or Tensor: AutoAugmented image.
+        """
+        fill = self.fill
+        if isinstance(img, Tensor):
+            if isinstance(fill, (int, float)):
+                fill = [float(fill)] * F._get_image_num_channels(img)
+            elif fill is not None:
+                fill = [float(f) for f in fill]
+
+        transform_id, probs, signs = self.get_params(len(self.transforms))
+
+        for i, (op_name, p,
+                magnitude_id) in enumerate(self.transforms[transform_id]):
+            if probs[i] <= p:
+                magnitudes, signed = self._get_op_meta(op_name)
+                magnitude = float(magnitudes[magnitude_id].item()) \
+                    if magnitudes is not None and magnitude_id is not None else 0.0
+                if signed is not None and signed and signs[i] == 0:
+                    magnitude *= -1.0
+
+                if op_name == "ShearX":
+                    img = F.affine(
+                        img,
+                        angle=0.0,
+                        translate=[0, 0],
+                        scale=1.0,
+                        shear=[math.degrees(magnitude), 0.0],
+                        interpolation=self.interpolation,
+                        fill=fill)
+                elif op_name == "ShearY":
+                    img = F.affine(
+                        img,
+                        angle=0.0,
+                        translate=[0, 0],
+                        scale=1.0,
+                        shear=[0.0, math.degrees(magnitude)],
+                        interpolation=self.interpolation,
+                        fill=fill)
+                elif op_name == "TranslateX":
+                    img = F.affine(
+                        img,
+                        angle=0.0,
+                        translate=[
+                            int(F._get_image_size(img)[0] * magnitude), 0
+                        ],
+                        scale=1.0,
+                        interpolation=self.interpolation,
+                        shear=[0.0, 0.0],
+                        fill=fill)
+                elif op_name == "TranslateY":
+                    img = F.affine(
+                        img,
+                        angle=0.0,
+                        translate=[
+                            0, int(F._get_image_size(img)[1] * magnitude)
+                        ],
+                        scale=1.0,
+                        interpolation=self.interpolation,
+                        shear=[0.0, 0.0],
+                        fill=fill)
+                elif op_name == "Rotate":
+                    img = F.rotate(
+                        img,
+                        magnitude,
+                        interpolation=self.interpolation,
+                        fill=fill)
+                elif op_name == "Brightness":
+                    img = F.adjust_brightness(img, 1.0 + magnitude)
+                elif op_name == "Color":
+                    img = F.adjust_saturation(img, 1.0 + magnitude)
+                elif op_name == "Contrast":
+                    img = F.adjust_contrast(img, 1.0 + magnitude)
+                elif op_name == "Sharpness":
+                    img = F.adjust_sharpness(img, 1.0 + magnitude)
+                elif op_name == "Posterize":
+                    img = F.posterize(img, int(magnitude))
+                elif op_name == "Solarize":
+                    img = F.solarize(img, magnitude)
+                elif op_name == "AutoContrast":
+                    img = F.autocontrast(img)
+                elif op_name == "Equalize":
+                    img = F.equalize(img)
+                elif op_name == "Invert":
+                    img = F.invert(img)
+                else:
+                    raise ValueError(
+                        "The provided operator {} is not recognized.".format(
+                            op_name))
+
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(policy={}, fill={})'.format(
+            self.policy, self.fill)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/functional.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/functional.py
+import numbers
+import warnings
+from enum import Enum
+
+import numpy as np
+
+import paddle
+from paddle import Tensor
+from typing import List, Tuple, Any, Optional
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+
+from . import functional_pil as F_pil
+from . import functional_tensor as F_t
+
+
+class InterpolationMode(Enum):
+    """Interpolation modes
+    Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``.
+    """
+    NEAREST = "nearest"
+    BILINEAR = "bilinear"
+    BICUBIC = "bicubic"
+    # For PIL compatibility
+    BOX = "box"
+    HAMMING = "hamming"
+    LANCZOS = "lanczos"
+
+
+def _interpolation_modes_from_int(i: int) -> InterpolationMode:
+    inverse_modes_mapping = {
+        0: InterpolationMode.NEAREST,
+        2: InterpolationMode.BILINEAR,
+        3: InterpolationMode.BICUBIC,
+        4: InterpolationMode.BOX,
+        5: InterpolationMode.HAMMING,
+        1: InterpolationMode.LANCZOS,
+    }
+    return inverse_modes_mapping[i]
+
+
+pil_modes_mapping = {
+    InterpolationMode.NEAREST: 0,
+    InterpolationMode.BILINEAR: 2,
+    InterpolationMode.BICUBIC: 3,
+    InterpolationMode.BOX: 4,
+    InterpolationMode.HAMMING: 5,
+    InterpolationMode.LANCZOS: 1,
+}
+
+
+def _is_numpy(img: Any) -> bool:
+    return isinstance(img, np.ndarray)
+
+
+def _is_numpy_image(img: Any) -> bool:
+    return img.ndim in {2, 3}
+
+
+def to_tensor(pic):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+    See :class:`~paddlevision.transforms.ToTensor` for more details.
+
+    Args:
+        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+    Returns:
+        Tensor: Converted image.
+    """
+    if not (F_pil._is_pil_image(pic) or _is_numpy(pic)):
+        raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(
+            type(pic)))
+
+    if _is_numpy(pic) and not _is_numpy_image(pic):
+        raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.
+                         format(pic.ndim))
+
+    default_float_dtype = paddle.get_default_dtype()
+
+    if isinstance(pic, np.ndarray):
+        # handle numpy array
+        if pic.ndim == 2:
+            pic = pic[:, :, None]
+        img = paddle.to_tensor(pic.transpose((2, 0, 1)))
+        # backward compatibility
+        if not img.dtype == default_float_dtype:
+            img = img.astype(dtype=default_float_dtype)
+            return img.divide(paddle.full_like(img, 255))
+        else:
+            return img
+
+    if accimage is not None and isinstance(pic, accimage.Image):
+        nppic = np.zeros(
+            [pic.channels, pic.height, pic.width], dtype=np.float32)
+        pic.copyto(nppic)
+        return paddle.to_tensor(nppic).astype(dtype=default_float_dtype)
+
+    # handle PIL Image
+    mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32}
+    img = paddle.to_tensor(
+        np.array(
+            pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
+
+    if pic.mode == '1':
+        img = 255 * img
+    img = img.reshape([pic.size[1], pic.size[0], len(pic.getbands())])
+
+    if not img.dtype == default_float_dtype:
+        img = img.astype(dtype=default_float_dtype)
+        # put it from HWC to CHW format
+        img = img.transpose((2, 0, 1))
+        return img.divide(paddle.full_like(img, 255))
+    else:
+        # put it from HWC to CHW format
+        img = img.transpose((2, 0, 1))
+        return img
+
+
+def normalize(tensor: Tensor,
+              mean: List[float],
+              std: List[float],
+              inplace: bool=False) -> Tensor:
+    """Normalize a float tensor image with mean and standard deviation.
+    This transform does not support PIL Image.
+
+    .. note::
+        This transform acts out of place by default, i.e., it does not mutates the input tensor.
+
+    See :class:`~paddlevision.transforms.Normalize` for more details.
+
+    Args:
+        tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+        inplace(bool,optional): Bool to make this operation inplace.
+
+    Returns:
+        Tensor: Normalized Tensor image.
+    """
+    if not isinstance(tensor, paddle.Tensor):
+        raise TypeError('Input tensor should be a paddle tensor. Got {}.'.
+                        format(type(tensor)))
+
+    if not tensor.dtype in (paddle.float16, paddle.float32, paddle.float64):
+        raise TypeError('Input tensor should be a float tensor. Got {}.'.
+                        format(tensor.dtype))
+
+    if tensor.ndim < 3:
+        raise ValueError(
+            'Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.shape() = '
+            '{}.'.format(tensor.shape))
+
+    if not inplace:
+        tensor = tensor.clone()
+
+    dtype = tensor.dtype
+    mean = paddle.to_tensor(mean, dtype=dtype, place=tensor.place)
+    std = paddle.to_tensor(std, dtype=dtype, place=tensor.place)
+    if (std == 0).any():
+        raise ValueError('std evaluated to zero, leading to division by zero.')
+    if mean.ndim == 1:
+        mean = mean.reshape((-1, 1, 1))
+    if std.ndim == 1:
+        std = std.reshape((-1, 1, 1))
+    tensor = tensor.subtract(mean).divide(std)
+    return tensor
+
+
+def resize(img: Tensor,
+           size: List[int],
+           interpolation: InterpolationMode=InterpolationMode.BILINEAR,
+           max_size: Optional[int]=None,
+           antialias: Optional[bool]=None) -> Tensor:
+    r"""Resize the input image to the given size.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    .. warning::
+        The output image might be different depending on its type: when downsampling, the interpolation of PIL images
+        and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
+        in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
+        types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
+        closer.
+
+    Args:
+        img (PIL Image or Tensor): Image to be resized.
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), the output size will be matched to this. If size is an int,
+            the smaller edge of the image will be matched to this number maintaining
+            the aspect ratio. i.e, if height > width, then image will be rescaled to
+            :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
+
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`paddlevision.transforms.InterpolationMode`.
+            Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
+            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+        max_size (int, optional): The maximum allowed for the longer edge of
+            the resized image: if the longer edge of the image is greater
+            than ``max_size`` after being resized according to ``size``, then
+            the image is resized again so that the longer edge is equal to
+            ``max_size``. As a result, ``size`` might be overruled, i.e the
+            smaller edge may be shorter than ``size``.
+        antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
+            is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
+            ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
+            closer.
+
+            .. warning::
+                There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.
+
+    Returns:
+        PIL Image or Tensor: Resized image.
+    """
+    # Backward compatibility with integer value
+    if isinstance(interpolation, int):
+        warnings.warn(
+            "Argument interpolation should be of type InterpolationMode instead of int. "
+            "Please, use InterpolationMode enum.")
+        interpolation = _interpolation_modes_from_int(interpolation)
+
+    if not isinstance(interpolation, InterpolationMode):
+        raise TypeError("Argument interpolation should be a InterpolationMode")
+
+    if not isinstance(img, paddle.Tensor):
+        if antialias is not None and not antialias:
+            warnings.warn(
+                "Anti-alias option is always applied for PIL Image input. Argument antialias is ignored."
+            )
+        pil_interpolation = pil_modes_mapping[interpolation]
+        return F_pil.resize(
+            img, size=size, interpolation=pil_interpolation, max_size=max_size)
+
+    return F_t.resize(
+        img,
+        size=size,
+        interpolation=interpolation.value,
+        max_size=max_size,
+        antialias=antialias)
+
+
+def _get_image_size(img: Tensor) -> List[int]:
+    """Returns image size as [w, h]
+    """
+    if isinstance(img, paddle.Tensor):
+        return F_t._get_image_size(img)
+
+    return F_pil._get_image_size(img)
+
+
+def pad(img: Tensor,
+        padding: List[int],
+        fill: int=0,
+        padding_mode: str="constant") -> Tensor:
+    r"""Pad the given image on all sides with the given "pad" value.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+    at most 3 leading dimensions for mode edge,
+    and an arbitrary number of leading dimensions for mode constant
+
+    Args:
+        img (PIL Image or Tensor): Image to be padded.
+        padding (int or sequence): Padding on each border. If a single int is provided this
+            is used to pad all borders. If sequence of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a sequence of length 4 is provided
+            this is the padding for the left, top, right and bottom borders respectively.
+        fill (number or str or tuple): Pixel fill value for constant fill. Default is 0.
+            If a tuple of length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant.
+            Only number is supported for paddle Tensor.
+            Only int or str or tuple value is supported for PIL Image.
+        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+            Default is constant.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value at the edge of the image.
+              If input a 5D paddle Tensor, the last 3 dimensions will be padded instead of the last 2
+
+            - reflect: pads with reflection of image without repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+              will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image repeating the last value on the edge.
+              For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+              will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        PIL Image or Tensor: Padded image.
+    """
+    if not isinstance(img, paddle.Tensor):
+        return F_pil.pad(img,
+                         padding=padding,
+                         fill=fill,
+                         padding_mode=padding_mode)
+
+    return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
+
+
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+    """Crop the given image at specified location and output size.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then cropped.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+
+    Returns:
+        PIL Image or Tensor: Cropped image.
+    """
+
+    if not isinstance(img, paddle.Tensor):
+        return F_pil.crop(img, top, left, height, width)
+
+    return F_t.crop(img, top, left, height, width)
+
+
+def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
+    """Crops the given image at the center.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped.
+        output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
+            it is used for both directions.
+
+    Returns:
+        PIL Image or Tensor: Cropped image.
+    """
+    if isinstance(output_size, numbers.Number):
+        output_size = (int(output_size), int(output_size))
+    elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
+        output_size = (output_size[0], output_size[0])
+
+    image_width, image_height = _get_image_size(img)
+    crop_height, crop_width = output_size
+
+    if crop_width > image_width or crop_height > image_height:
+        padding_ltrb = [
+            (crop_width - image_width) // 2 if crop_width > image_width else 0,
+            (crop_height - image_height) // 2
+            if crop_height > image_height else 0,
+            (crop_width - image_width + 1) // 2
+            if crop_width > image_width else 0,
+            (crop_height - image_height + 1) // 2
+            if crop_height > image_height else 0,
+        ]
+        img = pad(img, padding_ltrb, fill=0)  # PIL uses fill value 0
+        image_width, image_height = _get_image_size(img)
+        if crop_width == image_width and crop_height == image_height:
+            return img
+
+    crop_top = int(round((image_height - crop_height) / 2.))
+    crop_left = int(round((image_width - crop_width) / 2.))
+    return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def resized_crop(
+        img: Tensor,
+        top: int,
+        left: int,
+        height: int,
+        width: int,
+        size: List[int],
+        interpolation: InterpolationMode=InterpolationMode.BILINEAR) -> Tensor:
+    """Crop the given image and resize it to desired size.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    Args:
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+        size (sequence or int): Desired output size. Same semantics as ``resize``.
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`paddlevision.transforms.InterpolationMode`.
+            Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
+            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+
+    Returns:
+        PIL Image or Tensor: Cropped image.
+    """
+    img = crop(img, top, left, height, width)
+    img = resize(img, size, interpolation)
+    return img
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/functional_pil.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/functional_pil.py
+import numbers
+from typing import Any, List, Sequence
+
+import numpy as np
+from PIL import Image, ImageOps, ImageEnhance
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+
+
+def _is_pil_image(img: Any) -> bool:
+    if accimage is not None:
+        return isinstance(img, (Image.Image, accimage.Image))
+    else:
+        return isinstance(img, Image.Image)
+
+
+def _get_image_size(img: Any) -> List[int]:
+    if _is_pil_image(img):
+        return img.size
+    raise TypeError("Unexpected type {}".format(type(img)))
+
+
+def _get_image_num_channels(img: Any) -> int:
+    if _is_pil_image(img):
+        return 1 if img.mode == 'L' else 3
+    raise TypeError("Unexpected type {}".format(type(img)))
+
+
+def hflip(img):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+def vflip(img):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+def adjust_brightness(img, brightness_factor):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    enhancer = ImageEnhance.Brightness(img)
+    img = enhancer.enhance(brightness_factor)
+    return img
+
+
+def adjust_contrast(img, contrast_factor):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    enhancer = ImageEnhance.Contrast(img)
+    img = enhancer.enhance(contrast_factor)
+    return img
+
+
+def adjust_saturation(img, saturation_factor):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    enhancer = ImageEnhance.Color(img)
+    img = enhancer.enhance(saturation_factor)
+    return img
+
+
+def adjust_hue(img, hue_factor):
+    if not (-0.5 <= hue_factor <= 0.5):
+        raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(
+            hue_factor))
+
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    input_mode = img.mode
+    if input_mode in {'L', '1', 'I', 'F'}:
+        return img
+
+    h, s, v = img.convert('HSV').split()
+
+    np_h = np.array(h, dtype=np.uint8)
+    # uint8 addition take cares of rotation across boundaries
+    with np.errstate(over='ignore'):
+        np_h += np.uint8(hue_factor * 255)
+    h = Image.fromarray(np_h, 'L')
+
+    img = Image.merge('HSV', (h, s, v)).convert(input_mode)
+    return img
+
+
+def adjust_gamma(img, gamma, gain=1):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    if gamma < 0:
+        raise ValueError('Gamma should be a non-negative real number')
+
+    input_mode = img.mode
+    img = img.convert('RGB')
+    gamma_map = [(255 + 1 - 1e-3) * gain * pow(ele / 255., gamma)
+                 for ele in range(256)] * 3
+    img = img.point(
+        gamma_map)  # use PIL's point-function to accelerate this part
+
+    img = img.convert(input_mode)
+    return img
+
+
+def pad(img, padding, fill=0, padding_mode="constant"):
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    if not isinstance(padding, (numbers.Number, tuple, list)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (numbers.Number, str, tuple)):
+        raise TypeError("Got inappropriate fill arg")
+    if not isinstance(padding_mode, str):
+        raise TypeError("Got inappropriate padding_mode arg")
+
+    if isinstance(padding, list):
+        padding = tuple(padding)
+
+    if isinstance(padding, tuple) and len(padding) not in [1, 2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 1, 2, or 4 element tuple, not a " +
+            "{} element tuple".format(len(padding)))
+
+    if isinstance(padding, tuple) and len(padding) == 1:
+        # Compatibility with `functional_tensor.pad`
+        padding = padding[0]
+
+    if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+        raise ValueError(
+            "Padding mode should be either constant, edge, reflect or symmetric"
+        )
+
+    if padding_mode == "constant":
+        opts = _parse_fill(fill, img, name="fill")
+        if img.mode == "P":
+            palette = img.getpalette()
+            image = ImageOps.expand(img, border=padding, **opts)
+            image.putpalette(palette)
+            return image
+
+        return ImageOps.expand(img, border=padding, **opts)
+    else:
+        if isinstance(padding, int):
+            pad_left = pad_right = pad_top = pad_bottom = padding
+        if isinstance(padding, tuple) and len(padding) == 2:
+            pad_left = pad_right = padding[0]
+            pad_top = pad_bottom = padding[1]
+        if isinstance(padding, tuple) and len(padding) == 4:
+            pad_left = padding[0]
+            pad_top = padding[1]
+            pad_right = padding[2]
+            pad_bottom = padding[3]
+
+        p = [pad_left, pad_top, pad_right, pad_bottom]
+        cropping = -np.minimum(p, 0)
+
+        if cropping.any():
+            crop_left, crop_top, crop_right, crop_bottom = cropping
+            img = img.crop((crop_left, crop_top, img.width - crop_right,
+                            img.height - crop_bottom))
+
+        pad_left, pad_top, pad_right, pad_bottom = np.maximum(p, 0)
+
+        if img.mode == 'P':
+            palette = img.getpalette()
+            img = np.asarray(img)
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)),
+                         padding_mode)
+            img = Image.fromarray(img)
+            img.putpalette(palette)
+            return img
+
+        img = np.asarray(img)
+        # RGB image
+        if len(img.shape) == 3:
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right),
+                               (0, 0)), padding_mode)
+        # Grayscale image
+        if len(img.shape) == 2:
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)),
+                         padding_mode)
+
+        return Image.fromarray(img)
+
+
+def crop(img: Image.Image, top: int, left: int, height: int,
+         width: int) -> Image.Image:
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    return img.crop((left, top, left + width, top + height))
+
+
+def resize(img, size, interpolation=Image.BILINEAR, max_size=None):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    if not (isinstance(size, int) or
+            (isinstance(size, Sequence) and len(size) in (1, 2))):
+        raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+    if isinstance(size, Sequence) and len(size) == 1:
+        size = size[0]
+    if isinstance(size, int):
+        w, h = img.size
+
+        short, long = (w, h) if w <= h else (h, w)
+        if short == size:
+            return img
+
+        new_short, new_long = size, int(size * long / short)
+
+        if max_size is not None:
+            if max_size <= size:
+                raise ValueError(
+                    f"max_size = {max_size} must be strictly greater than the requested "
+                    f"size for the smaller edge size = {size}")
+            if new_long > max_size:
+                new_short, new_long = int(max_size * new_short /
+                                          new_long), max_size
+
+        new_w, new_h = (new_short, new_long) if w <= h else (new_long,
+                                                             new_short)
+        return img.resize((new_w, new_h), interpolation)
+    else:
+        if max_size is not None:
+            raise ValueError(
+                "max_size should only be passed if size specifies the length of the smaller edge, "
+                "i.e. size should be an int or a sequence of length 1 in deploy mode."
+            )
+        return img.resize(size[::-1], interpolation)
+
+
+def _parse_fill(fill, img, name="fillcolor"):
+    # Process fill color for affine transforms
+    num_bands = len(img.getbands())
+    if fill is None:
+        fill = 0
+    if isinstance(fill, (int, float)) and num_bands > 1:
+        fill = tuple([fill] * num_bands)
+    if isinstance(fill, (list, tuple)):
+        if len(fill) != num_bands:
+            msg = (
+                "The number of elements in 'fill' does not match the number of "
+                "bands of the image ({} != {})")
+            raise ValueError(msg.format(len(fill), num_bands))
+
+        fill = tuple(fill)
+
+    return {name: fill}
+
+
+def affine(img, matrix, interpolation=0, fill=None):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    output_size = img.size
+    opts = _parse_fill(fill, img)
+    return img.transform(output_size, Image.AFFINE, matrix, interpolation,
+                         **opts)
+
+
+def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None):
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    opts = _parse_fill(fill, img)
+    return img.rotate(angle, interpolation, expand, center, **opts)
+
+
+def perspective(img,
+                perspective_coeffs,
+                interpolation=Image.BICUBIC,
+                fill=None):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    opts = _parse_fill(fill, img)
+
+    return img.transform(img.size, Image.PERSPECTIVE, perspective_coeffs,
+                         interpolation, **opts)
+
+
+def to_grayscale(img, num_output_channels):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    if num_output_channels == 1:
+        img = img.convert('L')
+    elif num_output_channels == 3:
+        img = img.convert('L')
+        np_img = np.array(img, dtype=np.uint8)
+        np_img = np.dstack([np_img, np_img, np_img])
+        img = Image.fromarray(np_img, 'RGB')
+    else:
+        raise ValueError('num_output_channels should be either 1 or 3')
+
+    return img
+
+
+def invert(img):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    return ImageOps.invert(img)
+
+
+def posterize(img, bits):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    return ImageOps.posterize(img, bits)
+
+
+def solarize(img, threshold):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    return ImageOps.solarize(img, threshold)
+
+
+def adjust_sharpness(img, sharpness_factor):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    enhancer = ImageEnhance.Sharpness(img)
+    img = enhancer.enhance(sharpness_factor)
+    return img
+
+
+def autocontrast(img):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    return ImageOps.autocontrast(img)
+
+
+def equalize(img):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    return ImageOps.equalize(img)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/functional_tensor.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/functional_tensor.py
+import warnings
+
+import paddle
+from paddle import Tensor
+from paddle.nn.functional import grid_sample, conv2d, interpolate, pad as paddle_pad
+from typing import Optional, Tuple, List
+
+
+def _is_tensor_a_paddle_image(x: Tensor) -> bool:
+    return x.ndim >= 2
+
+
+def _assert_image_tensor(img):
+    if not _is_tensor_a_paddle_image(img):
+        raise TypeError("Tensor is not a paddle image.")
+
+
+def _get_image_size(img: Tensor) -> List[int]:
+    # Returns (w, h) of tensor image
+    _assert_image_tensor(img)
+    return [img.shape[-1], img.shape[-2]]
+
+
+def _cast_squeeze_in(img: Tensor, req_dtypes: List[paddle.dtype]) -> Tuple[
+        Tensor, bool, bool, paddle.dtype]:
+    need_squeeze = False
+    # make image NCHW
+    if img.ndim < 4:
+        img = img.unsqueeze(dim=0)
+        need_squeeze = True
+
+    out_dtype = img.dtype
+    need_cast = False
+    if out_dtype not in req_dtypes:
+        need_cast = True
+        req_dtype = req_dtypes[0]
+        img = img.as_type(req_dtype)
+    return img, need_cast, need_squeeze, out_dtype
+
+
+def _cast_squeeze_out(img: Tensor,
+                      need_cast: bool,
+                      need_squeeze: bool,
+                      out_dtype: paddle.dtype):
+    if need_squeeze:
+        img = img.squeeze(dim=0)
+
+    if need_cast:
+        if out_dtype in (paddle.uint8, paddle.int8, paddle.int16, paddle.int32,
+                         paddle.int64):
+            # it is better to round before cast
+            img = paddle.round(img)
+        img = img.as_type(out_dtype)
+
+    return img
+
+
+def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
+    # padding is left, right, top, bottom
+
+    # crop if needed
+    if padding[0] < 0 or padding[1] < 0 or padding[2] < 0 or padding[3] < 0:
+        crop_left, crop_right, crop_top, crop_bottom = [
+            -min(x, 0) for x in padding
+        ]
+        img = img[..., crop_top:img.shape[-2] - crop_bottom, crop_left:
+                  img.shape[-1] - crop_right]
+        padding = [max(x, 0) for x in padding]
+
+    in_sizes = img.size()
+
+    x_indices = [i for i in range(in_sizes[-1])]  # [0, 1, 2, 3, ...]
+    left_indices = [i for i in range(padding[0] - 1, -1, -1)
+                    ]  # e.g. [3, 2, 1, 0]
+    right_indices = [-(i + 1) for i in range(padding[1])]  # e.g. [-1, -2, -3]
+    x_indices = paddle.to_tensor(
+        left_indices + x_indices + right_indices, device=img.device)
+
+    y_indices = [i for i in range(in_sizes[-2])]
+    top_indices = [i for i in range(padding[2] - 1, -1, -1)]
+    bottom_indices = [-(i + 1) for i in range(padding[3])]
+    y_indices = paddle.to_tensor(
+        top_indices + y_indices + bottom_indices, device=img.device)
+
+    ndim = img.ndim
+    if ndim == 3:
+        return img[:, y_indices[:, None], x_indices[None, :]]
+    elif ndim == 4:
+        return img[:, :, y_indices[:, None], x_indices[None, :]]
+    else:
+        raise RuntimeError(
+            "Symmetric padding of N-D tensors are not supported yet")
+
+
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+    _assert_image_tensor(img)
+
+    w, h = _get_image_size(img)
+    right = left + width
+    bottom = top + height
+
+    if left < 0 or top < 0 or right > w or bottom > h:
+        padding_ltrb = [
+            max(-left, 0), max(-top, 0), max(right - w, 0), max(bottom - h, 0)
+        ]
+        return pad(img[..., max(top, 0):bottom, max(left, 0):right],
+                   padding_ltrb,
+                   fill=0)
+    return img[..., top:bottom, left:right]
+
+
+def pad(img: Tensor,
+        padding: List[int],
+        fill: int=0,
+        padding_mode: str="constant") -> Tensor:
+    _assert_image_tensor(img)
+
+    if not isinstance(padding, (int, tuple, list)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (int, float)):
+        raise TypeError("Got inappropriate fill arg")
+    if not isinstance(padding_mode, str):
+        raise TypeError("Got inappropriate padding_mode arg")
+
+    if isinstance(padding, tuple):
+        padding = list(padding)
+
+    if isinstance(padding, list) and len(padding) not in [1, 2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 1, 2, or 4 element tuple, not a " +
+            "{} element tuple".format(len(padding)))
+
+    if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+        raise ValueError(
+            "Padding mode should be either constant, edge, reflect or symmetric"
+        )
+
+    if isinstance(padding, int):
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    elif len(padding) == 1:
+        pad_left = pad_right = pad_top = pad_bottom = padding[0]
+    elif len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    else:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    p = [pad_left, pad_right, pad_top, pad_bottom]
+
+    if padding_mode == "edge":
+        # remap padding_mode str
+        padding_mode = "replicate"
+    elif padding_mode == "symmetric":
+        # route to another implementation
+        return _pad_symmetric(img, p)
+
+    need_squeeze = False
+    if img.ndim < 4:
+        img = img.unsqueeze(dim=0)
+        need_squeeze = True
+
+    out_dtype = img.dtype
+    need_cast = False
+    if (padding_mode != "constant") and img.dtype not in (paddle.float32,
+                                                          paddle.float64):
+        # Here we temporary cast input tensor to float
+        need_cast = True
+        img = img.as_type(paddle.float32)
+
+    img = paddle_pad(img, p, mode=padding_mode, value=float(fill))
+
+    if need_squeeze:
+        img = img.squeeze(axis=0)
+
+    if need_cast:
+        img = img.as_type(out_dtype)
+
+    return img
+
+
+def resize(img: Tensor,
+           size: List[int],
+           interpolation: str="bilinear",
+           max_size: Optional[int]=None,
+           antialias: Optional[bool]=None) -> Tensor:
+    _assert_image_tensor(img)
+
+    if not isinstance(size, (int, tuple, list)):
+        raise TypeError("Got inappropriate size arg")
+    if not isinstance(interpolation, str):
+        raise TypeError("Got inappropriate interpolation arg")
+
+    if interpolation not in ["nearest", "bilinear", "bicubic"]:
+        raise ValueError(
+            "This interpolation mode is unsupported with Tensor input")
+
+    if isinstance(size, tuple):
+        size = list(size)
+
+    if isinstance(size, list):
+        if len(size) not in [1, 2]:
+            raise ValueError(
+                "Size must be an int or a 1 or 2 element tuple/list, not a "
+                "{} element tuple/list".format(len(size)))
+        if max_size is not None and len(size) != 1:
+            raise ValueError(
+                "max_size should only be passed if size specifies the length of the smaller edge."
+            )
+
+    if antialias is None:
+        antialias = False
+
+    if antialias and interpolation not in ["bilinear", "bicubic"]:
+        raise ValueError(
+            "Antialias option is supported for bilinear and bicubic interpolation modes only"
+        )
+
+    w, h = _get_image_size(img)
+
+    if isinstance(size, int) or len(
+            size) == 1:  # specified size only for the smallest edge
+        short, long = (w, h) if w <= h else (h, w)
+        requested_new_short = size if isinstance(size, int) else size[0]
+
+        if short == requested_new_short:
+            return img
+
+        new_short, new_long = requested_new_short, int(requested_new_short *
+                                                       long / short)
+
+        if max_size is not None:
+            if max_size <= requested_new_short:
+                raise ValueError(
+                    f"max_size = {max_size} must be strictly greater than the requested "
+                    f"size for the smaller edge size = {size}")
+            if new_long > max_size:
+                new_short, new_long = int(max_size * new_short /
+                                          new_long), max_size
+
+        new_w, new_h = (new_short, new_long) if w <= h else (new_long,
+                                                             new_short)
+
+    else:  # specified both h and w
+        new_w, new_h = size[1], size[0]
+
+    img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(
+        img, [paddle.float32, paddle.float64])
+
+    # Define align_corners to avoid warnings
+    align_corners = False if interpolation in ["bilinear", "bicubic"] else None
+
+    img = interpolate(
+        img,
+        size=[new_h, new_w],
+        mode=interpolation,
+        align_corners=align_corners)
+
+    if interpolation == "bicubic" and out_dtype == paddle.uint8:
+        img = img.clamp(min=0, max=255)
+
+    img = _cast_squeeze_out(
+        img,
+        need_cast=need_cast,
+        need_squeeze=need_squeeze,
+        out_dtype=out_dtype)
+
+    return img
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/transforms.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/paddlevision/transforms/transforms.py
+import math
+import numbers
+import warnings
+from collections.abc import Sequence
+from typing import Tuple, List
+
+import paddle
+from paddle import Tensor
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+
+from . import functional as F
+from .functional import InterpolationMode, _interpolation_modes_from_int
+
+__all__ = [
+    "Compose", "ToTensor", "Normalize", "Resize", "CenterCrop",
+    "RandomResizedCrop"
+]
+
+
+class Compose:
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img):
+        for t in self.transforms:
+            img = t(img)
+        return img
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += '    {0}'.format(t)
+        format_string += '\n)'
+        return format_string
+
+
+class ToTensor:
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+    [0, 255] to a paddle tensor of shape (C x H x W) in the range [0.0, 1.0]
+    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
+    or if the numpy.ndarray has dtype = np.uint8
+
+    In the other cases, tensors are returned without scaling.
+
+    .. note::
+        Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
+        transforming target image masks. See the `references`_ for implementing the transforms for image masks.
+    """
+
+    def __call__(self, pic):
+        """
+        Args:
+            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+        Returns:
+            Tensor: Converted image.
+        """
+        return F.to_tensor(pic)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+
+
+class Normalize(paddle.nn.Layer):
+    """Normalize a tensor image with mean and standard deviation.
+    This transform does not support PIL Image.
+    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
+    channels, this transform will normalize each channel of the input
+    ``paddle.*Tensor`` i.e.,
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+    .. note::
+        This transform acts out of place, i.e., it does not mutate the input tensor.
+
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+        inplace(bool,optional): Bool to make this operation in-place.
+
+    """
+
+    def __init__(self, mean, std, inplace=False):
+        super().__init__()
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+
+    def forward(self, tensor: Tensor) -> Tensor:
+        """
+        Args:
+            tensor (Tensor): Tensor image to be normalized.
+
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(tensor, self.mean, self.std, self.inplace)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(
+            self.mean, self.std)
+
+
+class Resize(paddle.nn.Layer):
+    """Resize the input image to the given size.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    .. warning::
+        The output image might be different depending on its type: when downsampling, the interpolation of PIL images
+        and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
+        in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
+        types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
+        closer.
+
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size).
+
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`paddlevision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
+            ``InterpolationMode.BICUBIC`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+        max_size (int, optional): The maximum allowed for the longer edge of
+            the resized image: if the longer edge of the image is greater
+            than ``max_size`` after being resized according to ``size``, then
+            the image is resized again so that the longer edge is equal to
+            ``max_size``. As a result, ``size`` might be overruled, i.e the
+            smaller edge may be shorter than ``size``.
+        antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
+            is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
+            ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
+            closer.
+
+            .. warning::
+                There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.
+
+    """
+
+    def __init__(self,
+                 size,
+                 interpolation=InterpolationMode.BILINEAR,
+                 max_size=None,
+                 antialias=None):
+        super().__init__()
+        if not isinstance(size, (int, Sequence)):
+            raise TypeError("Size should be int or sequence. Got {}".format(
+                type(size)))
+        if isinstance(size, Sequence) and len(size) not in (1, 2):
+            raise ValueError(
+                "If size is a sequence, it should have 1 or 2 values")
+        self.size = size
+        self.max_size = max_size
+
+        # Backward compatibility with integer value
+        if isinstance(interpolation, int):
+            warnings.warn(
+                "Argument interpolation should be of type InterpolationMode instead of int. "
+                "Please, use InterpolationMode enum.")
+            interpolation = _interpolation_modes_from_int(interpolation)
+
+        self.interpolation = interpolation
+        self.antialias = antialias
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be scaled.
+
+        Returns:
+            PIL Image or Tensor: Rescaled image.
+        """
+        return F.resize(img, self.size, self.interpolation, self.max_size,
+                        self.antialias)
+
+    def __repr__(self):
+        interpolate_str = self.interpolation.value
+        return self.__class__.__name__ + '(size={0}, interpolation={1}, max_size={2}, antialias={3})'.format(
+            self.size, interpolate_str, self.max_size, self.antialias)
+
+
+class CenterCrop(paddle.nn.Layer):
+    """Crops the given image at the center.
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+    """
+
+    def __init__(self, size):
+        super().__init__()
+        self.size = _setup_size(
+            size,
+            error_msg="Please provide only two dimensions (h, w) for size.")
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            PIL Image or Tensor: Cropped image.
+        """
+        return F.center_crop(img, self.size)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class RandomResizedCrop(paddle.nn.Layer):
+    """Crop a random portion of image and resize it to a given size.
+
+    If the image is paddle Tensor, it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+    A crop of the original image is made: the crop has a random area (H * W)
+    and a random aspect ratio. This crop is finally resized to the given
+    size. This is popularly used to train the Inception networks.
+
+    Args:
+        size (int or sequence): expected output size of the crop, for each edge. If size is an
+            int instead of sequence like (h, w), a square output size ``(size, size)`` is
+            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+        scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
+            before resizing. The scale is defined with respect to the area of the original image.
+        ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
+            resizing.
+        interpolation (InterpolationMode): Desired interpolation enum defined by
+            :class:`paddlevision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
+            ``InterpolationMode.BICUBIC`` are supported.
+            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+
+    """
+
+    def __init__(self,
+                 size,
+                 scale=(0.08, 1.0),
+                 ratio=(3. / 4., 4. / 3.),
+                 interpolation=InterpolationMode.BILINEAR):
+        super().__init__()
+        self.size = _setup_size(
+            size,
+            error_msg="Please provide only two dimensions (h, w) for size.")
+
+        if not isinstance(scale, Sequence):
+            raise TypeError("Scale should be a sequence")
+        if not isinstance(ratio, Sequence):
+            raise TypeError("Ratio should be a sequence")
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("Scale and ratio should be of kind (min, max)")
+
+        # Backward compatibility with integer value
+        if isinstance(interpolation, int):
+            warnings.warn(
+                "Argument interpolation should be of type InterpolationMode instead of int. "
+                "Please, use InterpolationMode enum.")
+            interpolation = _interpolation_modes_from_int(interpolation)
+
+        self.interpolation = interpolation
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img: Tensor, scale: List[float],
+                   ratio: List[float]) -> Tuple[int, int, int, int]:
+        """Get parameters for ``crop`` for a random sized crop.
+
+        Args:
+            img (PIL Image or Tensor): Input image.
+            scale (list): range of scale of the origin size cropped
+            ratio (list): range of aspect ratio of the origin aspect ratio cropped
+
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+            sized crop.
+        """
+        width, height = F._get_image_size(img)
+        area = height * width
+
+        log_ratio = paddle.log(paddle.to_tensor(ratio))
+        for _ in range(10):
+            target_area = area * paddle.uniform(
+                shape=[1], min=scale[0], max=scale[1]).numpy().item()
+            aspect_ratio = paddle.exp(
+                paddle.uniform(
+                    shape=[1], min=log_ratio[0], max=log_ratio[1])).numpy(
+                    ).item()
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if 0 < w <= width and 0 < h <= height:
+                i = paddle.randint(
+                    0, height - h + 1, shape=(1, )).numpy().item()
+                j = paddle.randint(
+                    0, width - w + 1, shape=(1, )).numpy().item()
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = float(width) / float(height)
+        if in_ratio < min(ratio):
+            w = width
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = height
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = width
+            h = height
+        i = (height - h) // 2
+        j = (width - w) // 2
+        return i, j, h, w
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped and resized.
+
+        Returns:
+            PIL Image or Tensor: Randomly cropped and resized image.
+        """
+        i, j, h, w = self.get_params(img, self.scale, self.ratio)
+        return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
+
+    def __repr__(self):
+        interpolate_str = self.interpolation.value
+        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
+        format_string += ', scale={0}'.format(
+            tuple(round(s, 4) for s in self.scale))
+        format_string += ', ratio={0}'.format(
+            tuple(round(r, 4) for r in self.ratio))
+        format_string += ', interpolation={0})'.format(interpolate_str)
+        return format_string
+
+
+def _setup_size(size, error_msg):
+    if isinstance(size, numbers.Number):
+        return int(size), int(size)
+
+    if isinstance(size, Sequence) and len(size) == 1:
+        return size[0], size[0]
+
+    if len(size) != 2:
+        raise ValueError(error_msg)
+
+    return size
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/presets.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/presets.py
+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(__file__, '../')))
+
+from paddlevision.transforms import autoaugment, transforms
+
+
+class ClassificationPresetTrain:
+    def __init__(self,
+                 crop_size,
+                 mean=(0.485, 0.456, 0.406),
+                 std=(0.229, 0.224, 0.225),
+                 hflip_prob=0.5,
+                 auto_augment_policy=None,
+                 random_erase_prob=0.0):
+        trans = [transforms.RandomResizedCrop(crop_size)]
+        # if hflip_prob > 0:
+        #    trans.append(transforms.RandomHorizontalFlip(hflip_prob))
+        if auto_augment_policy is not None:
+            aa_policy = autoaugment.AutoAugmentPolicy(auto_augment_policy)
+            trans.append(autoaugment.AutoAugment(policy=aa_policy))
+        trans.extend([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=mean, std=std),
+        ])
+        # if random_erase_prob > 0:
+        #    trans.append(transforms.RandomErasing(p=random_erase_prob))
+
+        self.transforms = transforms.Compose(trans)
+
+    def __call__(self, img):
+        return self.transforms(img)
+
+
+class ClassificationPresetEval:
+    def __init__(self,
+                 crop_size,
+                 resize_size=256,
+                 mean=(0.485, 0.456, 0.406),
+                 std=(0.229, 0.224, 0.225)):
+
+        self.transforms = transforms.Compose([
+            transforms.Resize(resize_size),
+            transforms.CenterCrop(crop_size),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=mean, std=std),
+        ])
+
+    def __call__(self, img):
+        return self.transforms(img)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/train.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/train.py
+import datetime
+import os
+import sys
+import time
+
+import paddle
+from paddle import nn
+import paddlevision
+
+import presets
+import utils
+
+import numpy as np
+import random
+
+apex = None
+
+import numpy as np
+from reprod_log import ReprodLogger
+
+
+def train_one_epoch(
+        model,
+        criterion,
+        optimizer,
+        data_loader,
+        device,
+        epoch,
+        print_freq, ):
+    model.train()
+    # training log
+    train_reader_cost = 0.0
+    train_run_cost = 0.0
+    total_samples = 0
+    acc1 = 0.0
+    acc5 = 0.0
+    reader_start = time.time()
+    batch_past = 0
+
+    for batch_idx, (image, target) in enumerate(data_loader):
+        train_reader_cost += time.time() - reader_start
+        train_start = time.time()
+        output = model(image)
+        loss = criterion(output, target)
+        loss.backward()
+        optimizer.step()
+        optimizer.clear_grad()
+        train_run_cost += time.time() - train_start
+        acc = utils.accuracy(output, target, topk=(1, 5))
+        acc1 += acc[0].item()
+        acc5 += acc[1].item()
+        total_samples += image.shape[0]
+        batch_past += 1
+
+        if batch_idx > 0 and batch_idx % print_freq == 0:
+            msg = "[Epoch {}, iter: {}] top1: {:.5f}, top5: {:.5f}, lr: {:.5f}, loss: {:.5f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {}, avg_ips: {:.5f} images/sec.".format(
+                epoch, batch_idx, acc1 / batch_past, acc5 / batch_past,
+                optimizer.get_lr(),
+                loss.item(), train_reader_cost / batch_past,
+                (train_reader_cost + train_run_cost) / batch_past,
+                total_samples / batch_past,
+                total_samples / (train_reader_cost + train_run_cost))
+            if paddle.distributed.get_rank() <= 0:
+                print(msg)
+                sys.stdout.flush()
+            train_reader_cost = 0.0
+            train_run_cost = 0.0
+            total_samples = 0
+            acc1 = 0.0
+            acc5 = 0.0
+            batch_past = 0
+
+        reader_start = time.time()
+
+
+def evaluate(model, criterion, data_loader, device, print_freq=100):
+    model.eval()
+    metric_logger = utils.MetricLogger(delimiter="  ")
+    header = 'Test:'
+    with paddle.no_grad():
+        for image, target in metric_logger.log_every(data_loader, print_freq,
+                                                     header):
+            output = model(image)
+            loss = criterion(output, target)
+
+            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
+            # FIXME need to take into account that the datasets
+            # could have been padded in distributed setup
+            batch_size = image.shape[0]
+            metric_logger.update(loss=loss.item())
+            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
+            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
+
+    # gather the stats from all processes
+    metric_logger.synchronize_between_processes()
+
+    print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'.format(
+        top1=metric_logger.acc1, top5=metric_logger.acc5))
+    return metric_logger.acc1.global_avg
+
+
+def load_data(traindir, valdir, args):
+    # Data loading code
+    print("Loading data")
+    resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (
+        256, 224)
+
+    print("Loading training data")
+    st = time.time()
+    auto_augment_policy = getattr(args, "auto_augment", None)
+    random_erase_prob = getattr(args, "random_erase", 0.0)
+    dataset = paddlevision.datasets.ImageFolder(
+        traindir,
+        presets.ClassificationPresetTrain(
+            crop_size=crop_size,
+            auto_augment_policy=auto_augment_policy,
+            random_erase_prob=random_erase_prob))
+
+    print("Took", time.time() - st)
+
+    print("Loading validation data")
+    dataset_test = paddlevision.datasets.ImageFolder(
+        valdir,
+        presets.ClassificationPresetEval(
+            crop_size=crop_size, resize_size=resize_size))
+
+    print("Creating data loaders")
+    train_sampler = paddle.io.DistributedBatchSampler(
+        dataset=dataset,
+        batch_size=args.batch_size,
+        shuffle=True,
+        drop_last=False)
+
+    test_sampler = paddle.io.SequenceSampler(dataset_test)
+
+    return dataset, dataset_test, train_sampler, test_sampler
+
+
+def main(args):
+    if args.output_dir:
+        utils.mkdir(args.output_dir)
+
+    print(args)
+
+    device = paddle.set_device(args.device)
+
+    # multi cards
+    if paddle.distributed.get_world_size() > 1:
+        paddle.distributed.init_parallel_env()
+
+    train_dir = os.path.join(args.data_path, 'train')
+    val_dir = os.path.join(args.data_path, 'val')
+    dataset, dataset_test, train_sampler, test_sampler = load_data(
+        train_dir, val_dir, args)
+    train_batch_sampler = train_sampler
+    data_loader = paddle.io.DataLoader(
+        dataset=dataset,
+        num_workers=args.workers,
+        return_list=True,
+        batch_sampler=train_batch_sampler)
+    test_batch_sampler = paddle.io.BatchSampler(
+        sampler=test_sampler, batch_size=args.batch_size)
+    data_loader_test = paddle.io.DataLoader(
+        dataset_test,
+        batch_sampler=test_batch_sampler,
+        num_workers=args.workers)
+
+    print("Creating model")
+    model = paddlevision.models.__dict__[args.model](
+        pretrained=args.pretrained)
+
+    criterion = nn.CrossEntropyLoss()
+
+    lr_scheduler = paddle.optimizer.lr.StepDecay(
+        args.lr, step_size=args.lr_step_size, gamma=args.lr_gamma)
+
+    opt_name = args.opt.lower()
+    if opt_name == 'sgd':
+        optimizer = paddle.optimizer.Momentum(
+            learning_rate=lr_scheduler,
+            momentum=args.momentum,
+            parameters=model.parameters(),
+            weight_decay=args.weight_decay)
+    elif opt_name == 'rmsprop':
+        optimizer = paddle.optimizer.RMSprop(
+            learning_rate=lr_scheduler,
+            momentum=args.momentum,
+            parameters=model.parameters(),
+            weight_decay=args.weight_decay,
+            eps=0.0316,
+            alpha=0.9)
+    else:
+        raise RuntimeError(
+            "Invalid optimizer {}. Only SGD and RMSprop are supported.".format(
+                args.opt))
+
+    if args.resume:
+        layer_state_dict = paddle.load(os.path.join(args.resume, '.pdparams'))
+        model.set_state_dict(layer_state_dict)
+        opt_state_dict = paddle.load(os.path.join(args.resume, '.pdopt'))
+        optimizer.load_state_dict(opt_state_dict)
+
+    # multi cards
+    if paddle.distributed.get_world_size() > 1:
+        model = paddle.DataParallel(model)
+
+    if args.test_only and paddle.distributed.get_rank() == 0:
+        top1 = evaluate(model, criterion, data_loader_test, device=device)
+        return top1
+
+    print("Start training")
+    start_time = time.time()
+    best_top1 = 0.0
+
+    for epoch in range(args.start_epoch, args.epochs):
+        train_one_epoch(model, criterion, optimizer, data_loader, device,
+                        epoch, args.print_freq)
+        lr_scheduler.step()
+        if paddle.distributed.get_rank() == 0:
+            top1 = evaluate(model, criterion, data_loader_test, device=device)
+            best_top1 = max(best_top1, top1)
+            if args.output_dir:
+                paddle.save(model.state_dict(),
+                            os.path.join(args.output_dir,
+                                         'model_{}.pdparams'.format(epoch)))
+                paddle.save(optimizer.state_dict(),
+                            os.path.join(args.output_dir,
+                                         'model_{}.pdopt'.format(epoch)))
+                paddle.save(model.state_dict(),
+                            os.path.join(args.output_dir, 'latest.pdparams'))
+                paddle.save(optimizer.state_dict(),
+                            os.path.join(args.output_dir, 'latest.pdopt'))
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    print('Training time {}'.format(total_time_str))
+    return best_top1
+
+
+def get_args_parser(add_help=True):
+    import argparse
+    parser = argparse.ArgumentParser(
+        description='PaddlePaddle Classification Training', add_help=add_help)
+
+    parser.add_argument('--data-path', default='../data', help='dataset')
+    parser.add_argument('--model', default='alexnet', help='model')
+    parser.add_argument('--device', default='gpu', help='device')
+    parser.add_argument('-b', '--batch-size', default=32, type=int)
+    parser.add_argument(
+        '--epochs',
+        default=90,
+        type=int,
+        metavar='N',
+        help='number of total epochs to run')
+    parser.add_argument(
+        '-j',
+        '--workers',
+        default=8,
+        type=int,
+        metavar='N',
+        help='number of data loading workers (default: 16)')
+    parser.add_argument('--opt', default='sgd', type=str, help='optimizer')
+    parser.add_argument(
+        '--lr', default=0.00125, type=float, help='initial learning rate')
+    parser.add_argument(
+        '--momentum', default=0.9, type=float, metavar='M', help='momentum')
+    parser.add_argument(
+        '--wd',
+        '--weight-decay',
+        default=1e-4,
+        type=float,
+        metavar='W',
+        help='weight decay (default: 1e-4)',
+        dest='weight_decay')
+    parser.add_argument(
+        '--lr-step-size',
+        default=30,
+        type=int,
+        help='decrease lr every step-size epochs')
+    parser.add_argument(
+        '--lr-gamma',
+        default=0.1,
+        type=float,
+        help='decrease lr by a factor of lr-gamma')
+    parser.add_argument(
+        '--print-freq', default=10, type=int, help='print frequency')
+    parser.add_argument('--output-dir', default='.', help='path where to save')
+    parser.add_argument('--resume', default='', help='resume from checkpoint')
+    parser.add_argument(
+        '--start-epoch', default=0, type=int, metavar='N', help='start epoch')
+    parser.add_argument(
+        "--sync-bn",
+        dest="sync_bn",
+        help="Use sync batch norm",
+        action="store_true", )
+    parser.add_argument(
+        "--test-only",
+        dest="test_only",
+        help="Only test the model",
+        action="store_true", )
+    parser.add_argument(
+        "--pretrained",
+        dest="pretrained",
+        help="Use pre-trained models from the modelzoo")
+    parser.add_argument(
+        '--auto-augment',
+        default=None,
+        help='auto augment policy (default: None)')
+    parser.add_argument(
+        '--random-erase',
+        default=0.0,
+        type=float,
+        help='random erasing probability (default: 0.0)')
+
+    # Mixed precision training parameters
+    parser.add_argument(
+        '--apex',
+        action='store_true',
+        help='Use apex for mixed precision training')
+    parser.add_argument(
+        '--apex-opt-level',
+        default='O1',
+        type=str,
+        help='For apex mixed precision training'
+        'O0 for FP32 training, O1 for mixed precision training.'
+        'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet'
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+    args = get_args_parser().parse_args()
+    top1 = main(args)
+    if paddle.distributed.get_rank() == 0:
+        reprod_logger = ReprodLogger()
+        reprod_logger.add("top1", np.array([top1]))
+        reprod_logger.save("train_align_paddle.npy")
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/utils.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_paddle/utils.py
+from collections import defaultdict, deque, OrderedDict
+import copy
+import datetime
+import hashlib
+import time
+import paddle
+import paddle.distributed as dist
+
+import errno
+import os
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        t = paddle.to_tensor([self.count, self.total], dtype='float64')
+        t = t.numpy().tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = paddle.to_tensor(list(self.deque))
+        return d.median().numpy().item()
+
+    @property
+    def avg(self):
+        d = paddle.to_tensor(list(self.deque), dtype='float32')
+        return d.mean().numpy().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, paddle.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append("{}: {}".format(name, str(meter)))
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        log_msg = self.delimiter.join([
+            header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}',
+            'time: {time}', 'data: {data}'
+        ])
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                print(
+                    log_msg.format(
+                        i,
+                        len(iterable),
+                        eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time),
+                        data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {}'.format(header, total_time_str))
+
+
+def accuracy(output, target, topk=(1, )):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with paddle.no_grad():
+        maxk = max(topk)
+        batch_size = target.shape[0]
+
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.equal(target)
+
+        res = []
+        for k in topk:
+            correct_k = correct.astype(paddle.int32)[:k].flatten().sum(
+                dtype='float32')
+            res.append(correct_k / batch_size)
+        return res
+
+
+def get_world_size():
+    return dist.get_world_size()
+
+
+def mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/__init__.py
+from .metric import accuracy_torch
+from .presets import *
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/checkpoint.pth
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/checkpoint.pth
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/metric.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/metric.py
+import torch
+
+
+def accuracy_torch(output, target, topk=(1, )):
+    """Computes the accuracy over the k top predictions for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target[None])
+
+        res = []
+        for k in topk:
+            correct_k = correct[:k].flatten().sum(dtype=torch.float32)
+            res.append(correct_k * (100.0 / batch_size))
+        return res
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/presets.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/presets.py
+from torchvision.transforms import autoaugment, transforms
+
+
+class ClassificationPresetTrain:
+    def __init__(self,
+                 crop_size,
+                 mean=(0.485, 0.456, 0.406),
+                 std=(0.229, 0.224, 0.225),
+                 hflip_prob=0.5,
+                 auto_augment_policy=None,
+                 random_erase_prob=0.0):
+        trans = [transforms.RandomResizedCrop(crop_size)]
+        # if hflip_prob > 0:
+        #     trans.append(transforms.RandomHorizontalFlip(hflip_prob))
+        if auto_augment_policy is not None:
+            aa_policy = autoaugment.AutoAugmentPolicy(auto_augment_policy)
+            trans.append(autoaugment.AutoAugment(policy=aa_policy))
+        trans.extend([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=mean, std=std),
+        ])
+        # if random_erase_prob > 0:
+        #     trans.append(transforms.RandomErasing(p=random_erase_prob))
+
+        self.transforms = transforms.Compose(trans)
+
+    def __call__(self, img):
+        return self.transforms(img)
+
+
+class ClassificationPresetEval:
+    def __init__(self,
+                 crop_size,
+                 resize_size=256,
+                 mean=(0.485, 0.456, 0.406),
+                 std=(0.229, 0.224, 0.225)):
+
+        self.transforms = transforms.Compose([
+            transforms.Resize(resize_size),
+            transforms.CenterCrop(crop_size),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=mean, std=std),
+        ])
+
+    def __call__(self, img):
+        return self.transforms(img)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/__init__.py
+from . import datasets
+from . import models
+from . import transforms
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/_internally_replaced_utils.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/_internally_replaced_utils.py
+import os
+import importlib.machinery
+
+
+def _download_file_from_remote_location(fpath: str, url: str) -> None:
+    pass
+
+
+def _is_remote_location_available() -> bool:
+    return False
+
+
+try:
+    from torch.hub import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+
+
+def _get_extension_path(lib_name):
+
+    lib_dir = os.path.dirname(__file__)
+    if os.name == 'nt':
+        # Register the main torchvision library location on the default DLL path
+        import ctypes
+        import sys
+
+        kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
+        with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
+        prev_error_mode = kernel32.SetErrorMode(0x0001)
+
+        if with_load_library_flags:
+            kernel32.AddDllDirectory.restype = ctypes.c_void_p
+
+        if sys.version_info >= (3, 8):
+            os.add_dll_directory(lib_dir)
+        elif with_load_library_flags:
+            res = kernel32.AddDllDirectory(lib_dir)
+            if res is None:
+                err = ctypes.WinError(ctypes.get_last_error())
+                err.strerror += f' Error adding "{lib_dir}" to the DLL directories.'
+                raise err
+
+        kernel32.SetErrorMode(prev_error_mode)
+
+    loader_details = (importlib.machinery.ExtensionFileLoader,
+                      importlib.machinery.EXTENSION_SUFFIXES)
+
+    extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
+    ext_specs = extfinder.find_spec(lib_name)
+    if ext_specs is None:
+        raise ImportError
+
+    return ext_specs.origin
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/datasets/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/datasets/__init__.py
+from .folder import ImageFolder, DatasetFolder
+
+from .vision import VisionDataset
+
+__all__ = ('ImageFolder', 'DatasetFolder', 'VisionDataset')
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/datasets/folder.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/datasets/folder.py
+from .vision import VisionDataset
+
+from PIL import Image
+
+import os
+import os.path
+from typing import Any, Callable, cast, Dict, List, Optional, Tuple
+
+
+def has_file_allowed_extension(filename: str,
+                               extensions: Tuple[str, ...]) -> bool:
+    """Checks if a file is an allowed extension.
+
+    Args:
+        filename (string): path to a file
+        extensions (tuple of strings): extensions to consider (lowercase)
+
+    Returns:
+        bool: True if the filename ends with one of given extensions
+    """
+    return filename.lower().endswith(extensions)
+
+
+def is_image_file(filename: str) -> bool:
+    """Checks if a file is an allowed image extension.
+
+    Args:
+        filename (string): path to a file
+
+    Returns:
+        bool: True if the filename ends with a known image extension
+    """
+    return has_file_allowed_extension(filename, IMG_EXTENSIONS)
+
+
+def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
+    """Finds the class folders in a dataset.
+
+    See :class:`DatasetFolder` for details.
+    """
+    classes = sorted(
+        entry.name for entry in os.scandir(directory) if entry.is_dir())
+    if not classes:
+        raise FileNotFoundError(
+            f"Couldn't find any class folder in {directory}.")
+
+    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
+    return classes, class_to_idx
+
+
+def make_dataset(
+        directory: str,
+        class_to_idx: Optional[Dict[str, int]]=None,
+        extensions: Optional[Tuple[str, ...]]=None,
+        is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[Tuple[
+            str, int]]:
+    """Generates a list of samples of a form (path_to_sample, class).
+
+    See :class:`DatasetFolder` for details.
+
+    Note: The class_to_idx parameter is here optional and will use the logic of the ``find_classes`` function
+    by default.
+    """
+    directory = os.path.expanduser(directory)
+
+    if class_to_idx is None:
+        _, class_to_idx = find_classes(directory)
+    elif not class_to_idx:
+        raise ValueError(
+            "'class_to_index' must have at least one entry to collect any samples."
+        )
+
+    both_none = extensions is None and is_valid_file is None
+    both_something = extensions is not None and is_valid_file is not None
+    if both_none or both_something:
+        raise ValueError(
+            "Both extensions and is_valid_file cannot be None or not None at the same time"
+        )
+
+    if extensions is not None:
+
+        def is_valid_file(x: str) -> bool:
+            return has_file_allowed_extension(
+                x, cast(Tuple[str, ...], extensions))
+
+    is_valid_file = cast(Callable[[str], bool], is_valid_file)
+
+    instances = []
+    available_classes = set()
+    for target_class in sorted(class_to_idx.keys()):
+        class_index = class_to_idx[target_class]
+        target_dir = os.path.join(directory, target_class)
+        if not os.path.isdir(target_dir):
+            continue
+        for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
+            for fname in sorted(fnames):
+                if is_valid_file(fname):
+                    path = os.path.join(root, fname)
+                    item = path, class_index
+                    instances.append(item)
+
+                    if target_class not in available_classes:
+                        available_classes.add(target_class)
+
+    return instances
+
+
+class DatasetFolder(VisionDataset):
+    """A generic data loader.
+
+    This default directory structure can be customized by overriding the
+    :meth:`find_classes` method.
+
+    Args:
+        root (string): Root directory path.
+        loader (callable): A function to load a sample given its path.
+        extensions (tuple[string]): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable, optional): A function/transform that takes in
+            a sample and returns a transformed version.
+            E.g, ``transforms.RandomCrop`` for images.
+        target_transform (callable, optional): A function/transform that takes
+            in the target and transforms it.
+        is_valid_file (callable, optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+
+     Attributes:
+        classes (list): List of the class names sorted alphabetically.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        samples (list): List of (sample path, class_index) tuples
+        targets (list): The class_index value for each image in the dataset
+    """
+
+    def __init__(
+            self,
+            root: str,
+            loader: Callable[[str], Any],
+            extensions: Optional[Tuple[str, ...]]=None,
+            transform: Optional[Callable]=None,
+            target_transform: Optional[Callable]=None,
+            is_valid_file: Optional[Callable[[str], bool]]=None, ) -> None:
+        super(DatasetFolder, self).__init__(
+            root, transform=transform, target_transform=target_transform)
+        classes, class_to_idx = self.find_classes(self.root)
+        samples = self.make_dataset(self.root, class_to_idx, extensions,
+                                    is_valid_file)
+
+        self.loader = loader
+        self.extensions = extensions
+
+        self.classes = classes
+        self.class_to_idx = class_to_idx
+        self.samples = samples
+        self.targets = [s[1] for s in samples]
+
+    @staticmethod
+    def make_dataset(
+            directory: str,
+            class_to_idx: Dict[str, int],
+            extensions: Optional[Tuple[str, ...]]=None,
+            is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[
+                Tuple[str, int]]:
+        """Generates a list of samples of a form (path_to_sample, class).
+
+        This can be overridden to e.g. read files from a compressed zip file instead of from the disk.
+
+        Args:
+            directory (str): root dataset directory, corresponding to ``self.root``.
+            class_to_idx (Dict[str, int]): Dictionary mapping class name to class index.
+            extensions (optional): A list of allowed extensions.
+                Either extensions or is_valid_file should be passed. Defaults to None.
+            is_valid_file (optional): A function that takes path of a file
+                and checks if the file is a valid file
+                (used to check of corrupt files) both extensions and
+                is_valid_file should not be passed. Defaults to None.
+
+        Raises:
+            ValueError: In case ``class_to_idx`` is empty.
+            ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None.
+            FileNotFoundError: In case no valid file was found for any class.
+
+        Returns:
+            List[Tuple[str, int]]: samples of a form (path_to_sample, class)
+        """
+        if class_to_idx is None:
+            # prevent potential bug since make_dataset() would use the class_to_idx logic of the
+            # find_classes() function, instead of using that of the find_classes() method, which
+            # is potentially overridden and thus could have a different logic.
+            raise ValueError("The class_to_idx parameter cannot be None.")
+        return make_dataset(
+            directory,
+            class_to_idx,
+            extensions=extensions,
+            is_valid_file=is_valid_file)
+
+    def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]:
+        """Find the class folders in a dataset structured as follows::
+
+            directory/
+            ├── class_x
+            │   ├── xxx.ext
+            │   ├── xxy.ext
+            │   └── ...
+            │       └── xxz.ext
+            └── class_y
+                ├── 123.ext
+                ├── nsdf3.ext
+                └── ...
+                └── asd932_.ext
+
+        This method can be overridden to only consider
+        a subset of classes, or to adapt to a different dataset directory structure.
+
+        Args:
+            directory(str): Root directory path, corresponding to ``self.root``
+
+        Raises:
+            FileNotFoundError: If ``dir`` has no class folders.
+
+        Returns:
+            (Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index.
+        """
+        return find_classes(directory)
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return sample, target
+
+    def __len__(self) -> int:
+        return len(self.samples)
+
+
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
+                  '.tiff', '.webp')
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+
+
+# TODO: specify the return type
+def accimage_loader(path: str) -> Any:
+    import accimage
+    try:
+        return accimage.Image(path)
+    except IOError:
+        # Potentially a decoding problem, fall back to PIL.Image
+        return pil_loader(path)
+
+
+def default_loader(path: str) -> Any:
+    return pil_loader(path)
+
+
+class ImageFolder(DatasetFolder):
+    """A generic data loader where the images are arranged in this way by default: ::
+
+        root/dog/xxx.png
+        root/dog/xxy.png
+        root/dog/[...]/xxz.png
+
+        root/cat/123.png
+        root/cat/nsdf3.png
+        root/cat/[...]/asd932_.png
+
+    This class inherits from :class:`~torchvision.datasets.DatasetFolder` so
+    the same methods can be overridden to customize the dataset.
+
+    Args:
+        root (string): Root directory path.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        loader (callable, optional): A function to load an image given its path.
+        is_valid_file (callable, optional): A function that takes path of an Image file
+            and check if the file is a valid file (used to check of corrupt files)
+
+     Attributes:
+        classes (list): List of the class names sorted alphabetically.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        imgs (list): List of (image path, class_index) tuples
+    """
+
+    def __init__(
+            self,
+            root: str,
+            transform: Optional[Callable]=None,
+            target_transform: Optional[Callable]=None,
+            loader: Callable[[str], Any]=default_loader,
+            is_valid_file: Optional[Callable[[str], bool]]=None, ):
+        super(ImageFolder, self).__init__(
+            root,
+            loader,
+            IMG_EXTENSIONS if is_valid_file is None else None,
+            transform=transform,
+            target_transform=target_transform,
+            is_valid_file=is_valid_file)
+        self.imgs = self.samples
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/datasets/vision.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/datasets/vision.py
+import os
+import torch
+import torch.utils.data as data
+from typing import Any, Callable, List, Optional, Tuple
+
+
+class VisionDataset(data.Dataset):
+    """
+    Base Class For making datasets which are compatible with torchvision.
+    It is necessary to override the ``__getitem__`` and ``__len__`` method.
+
+    Args:
+        root (string): Root directory of dataset.
+        transforms (callable, optional): A function/transforms that takes in
+            an image and a label and returns the transformed versions of both.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+
+    .. note::
+
+        :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
+    """
+    _repr_indent = 4
+
+    def __init__(
+            self,
+            root: str,
+            transforms: Optional[Callable]=None,
+            transform: Optional[Callable]=None,
+            target_transform: Optional[Callable]=None, ) -> None:
+        if isinstance(root, torch._six.string_classes):
+            root = os.path.expanduser(root)
+        self.root = root
+
+        has_transforms = transforms is not None
+        has_separate_transform = transform is not None or target_transform is not None
+        if has_transforms and has_separate_transform:
+            raise ValueError(
+                "Only transforms or transform/target_transform can "
+                "be passed as argument")
+
+        # for backwards-compatibility
+        self.transform = transform
+        self.target_transform = target_transform
+
+        if has_separate_transform:
+            transforms = StandardTransform(transform, target_transform)
+        self.transforms = transforms
+
+    def __getitem__(self, index: int) -> Any:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            (Any): Sample and meta data, optionally transformed by the respective transforms.
+        """
+        raise NotImplementedError
+
+    def __len__(self) -> int:
+        raise NotImplementedError
+
+    def __repr__(self) -> str:
+        head = "Dataset " + self.__class__.__name__
+        body = ["Number of datapoints: {}".format(self.__len__())]
+        if self.root is not None:
+            body.append("Root location: {}".format(self.root))
+        body += self.extra_repr().splitlines()
+        if hasattr(self, "transforms") and self.transforms is not None:
+            body += [repr(self.transforms)]
+        lines = [head] + [" " * self._repr_indent + line for line in body]
+        return '\n'.join(lines)
+
+    def _format_transform_repr(self, transform: Callable,
+                               head: str) -> List[str]:
+        lines = transform.__repr__().splitlines()
+        return (["{}{}".format(head, lines[0])] +
+                ["{}{}".format(" " * len(head), line) for line in lines[1:]])
+
+    def extra_repr(self) -> str:
+        return ""
+
+
+class StandardTransform(object):
+    def __init__(self,
+                 transform: Optional[Callable]=None,
+                 target_transform: Optional[Callable]=None) -> None:
+        self.transform = transform
+        self.target_transform = target_transform
+
+    def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
+        if self.transform is not None:
+            input = self.transform(input)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return input, target
+
+    def _format_transform_repr(self, transform: Callable,
+                               head: str) -> List[str]:
+        lines = transform.__repr__().splitlines()
+        return (["{}{}".format(head, lines[0])] +
+                ["{}{}".format(" " * len(head), line) for line in lines[1:]])
+
+    def __repr__(self) -> str:
+        body = [self.__class__.__name__]
+        if self.transform is not None:
+            body += self._format_transform_repr(self.transform, "Transform: ")
+        if self.target_transform is not None:
+            body += self._format_transform_repr(self.target_transform,
+                                                "Target transform: ")
+
+        return '\n'.join(body)
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/__init__.py
+from .mobilenet_v3_torch import mobilenet_v3_large, mobilenet_v3_small
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/_utils.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/_utils.py
+from collections import OrderedDict
+from typing import Dict, Optional
+
+from torch import nn
+
+
+class IntermediateLayerGetter(nn.ModuleDict):
+    """
+    Module wrapper that returns intermediate layers from a model
+
+    It has a strong assumption that the modules have been registered
+    into the model in the same order as they are used.
+    This means that one should **not** reuse the same nn.Module
+    twice in the forward if you want this to work.
+
+    Additionally, it is only able to query submodules that are directly
+    assigned to the model. So if `model` is passed, `model.feature1` can
+    be returned, but not `model.feature1.layer2`.
+
+    Args:
+        model (nn.Module): model on which we will extract the features
+        return_layers (Dict[name, new_name]): a dict containing the names
+            of the modules for which the activations will be returned as
+            the key of the dict, and the value of the dict is the name
+            of the returned activation (which the user can specify).
+
+    Examples::
+
+        >>> m = torchvision.models.resnet18(pretrained=True)
+        >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
+        >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
+        >>>     {'layer1': 'feat1', 'layer3': 'feat2'})
+        >>> out = new_m(torch.rand(1, 3, 224, 224))
+        >>> print([(k, v.shape) for k, v in out.items()])
+        >>>     [('feat1', torch.Size([1, 64, 56, 56])),
+        >>>      ('feat2', torch.Size([1, 256, 14, 14]))]
+    """
+
+    _version = 2
+    __annotations__ = {"return_layers": Dict[str, str], }
+
+    def __init__(self, model: nn.Module,
+                 return_layers: Dict[str, str]) -> None:
+        if not set(return_layers).issubset(
+            [name for name, _ in model.named_children()]):
+            raise ValueError("return_layers are not present in model")
+        orig_return_layers = return_layers
+        return_layers = {str(k): str(v) for k, v in return_layers.items()}
+        layers = OrderedDict()
+        for name, module in model.named_children():
+            layers[name] = module
+            if name in return_layers:
+                del return_layers[name]
+            if not return_layers:
+                break
+
+        super().__init__(layers)
+        self.return_layers = orig_return_layers
+
+    def forward(self, x):
+        out = OrderedDict()
+        for name, module in self.items():
+            x = module(x)
+            if name in self.return_layers:
+                out_name = self.return_layers[name]
+                out[out_name] = x
+        return out
+
+
+def _make_divisible(v: float, divisor: int,
+                    min_value: Optional[int]=None) -> int:
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/misc_torch.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/misc_torch.py
+import warnings
+from typing import Callable, List, Optional
+
+import torch
+from torch import Tensor
+
+
+class Conv2d(torch.nn.Conv2d):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            "torchvision.ops.misc.Conv2d is deprecated and will be "
+            "removed in future versions, use torch.nn.Conv2d instead.",
+            FutureWarning, )
+
+
+class ConvTranspose2d(torch.nn.ConvTranspose2d):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            "torchvision.ops.misc.ConvTranspose2d is deprecated and will be "
+            "removed in future versions, use torch.nn.ConvTranspose2d instead.",
+            FutureWarning, )
+
+
+class BatchNorm2d(torch.nn.BatchNorm2d):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            "torchvision.ops.misc.BatchNorm2d is deprecated and will be "
+            "removed in future versions, use torch.nn.BatchNorm2d instead.",
+            FutureWarning, )
+
+
+interpolate = torch.nn.functional.interpolate
+
+
+# This is not in nn
+class FrozenBatchNorm2d(torch.nn.Module):
+    """
+    BatchNorm2d where the batch statistics and the affine parameters are fixed
+
+    Args:
+        num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)``
+        eps (float): a value added to the denominator for numerical stability. Default: 1e-5
+    """
+
+    def __init__(
+            self,
+            num_features: int,
+            eps: float=1e-5,
+            n: Optional[int]=None, ):
+        # n=None for backward-compatibility
+        if n is not None:
+            warnings.warn(
+                "`n` argument is deprecated and has been renamed `num_features`",
+                DeprecationWarning)
+            num_features = n
+        super().__init__()
+        # _log_api_usage_once("ops", self.__class__.__name__)
+        self.eps = eps
+        self.register_buffer("weight", torch.ones(num_features))
+        self.register_buffer("bias", torch.zeros(num_features))
+        self.register_buffer("running_mean", torch.zeros(num_features))
+        self.register_buffer("running_var", torch.ones(num_features))
+
+    def _load_from_state_dict(
+            self,
+            state_dict: dict,
+            prefix: str,
+            local_metadata: dict,
+            strict: bool,
+            missing_keys: List[str],
+            unexpected_keys: List[str],
+            error_msgs: List[str], ):
+        num_batches_tracked_key = prefix + "num_batches_tracked"
+        if num_batches_tracked_key in state_dict:
+            del state_dict[num_batches_tracked_key]
+
+        super()._load_from_state_dict(state_dict, prefix, local_metadata,
+                                      strict, missing_keys, unexpected_keys,
+                                      error_msgs)
+
+    def forward(self, x: Tensor) -> Tensor:
+        # move reshapes to the beginning
+        # to make it fuser-friendly
+        w = self.weight.reshape(1, -1, 1, 1)
+        b = self.bias.reshape(1, -1, 1, 1)
+        rv = self.running_var.reshape(1, -1, 1, 1)
+        rm = self.running_mean.reshape(1, -1, 1, 1)
+        scale = w * (rv + self.eps).rsqrt()
+        bias = b - rm * scale
+        return x * scale + bias
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})"
+
+
+class ConvNormActivation(torch.nn.Sequential):
+    """
+    Configurable block used for Convolution-Normalzation-Activation blocks.
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block
+        kernel_size: (int, optional): Size of the convolving kernel. Default: 3
+        stride (int, optional): Stride of the convolution. Default: 1
+        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
+        norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d``
+        activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
+        dilation (int): Spacing between kernel elements. Default: 1
+        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
+        bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
+
+    """
+
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            kernel_size: int=3,
+            stride: int=1,
+            padding: Optional[int]=None,
+            groups: int=1,
+            norm_layer: Optional[Callable[
+                ..., torch.nn.Module]]=torch.nn.BatchNorm2d,
+            activation_layer: Optional[Callable[
+                ..., torch.nn.Module]]=torch.nn.ReLU,
+            dilation: int=1,
+            inplace: bool=True,
+            bias: Optional[bool]=None, ) -> None:
+        if padding is None:
+            padding = (kernel_size - 1) // 2 * dilation
+        if bias is None:
+            bias = norm_layer is None
+        layers = [
+            torch.nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                dilation=dilation,
+                groups=groups,
+                bias=bias, )
+        ]
+        if norm_layer is not None:
+            layers.append(norm_layer(out_channels))
+        if activation_layer is not None:
+            layers.append(activation_layer(inplace=inplace))
+        super().__init__(*layers)
+        # _log_api_usage_once("ops", self.__class__.__name__)
+        self.out_channels = out_channels
+
+
+class SqueezeExcitation(torch.nn.Module):
+    """
+    This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
+    Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3.
+
+    Args:
+        input_channels (int): Number of channels in the input image
+        squeeze_channels (int): Number of squeeze channels
+        activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU``
+        scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid``
+    """
+
+    def __init__(
+            self,
+            input_channels: int,
+            squeeze_channels: int,
+            activation: Callable[..., torch.nn.Module]=torch.nn.ReLU,
+            scale_activation: Callable[..., torch.nn.Module]=torch.nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        # _log_api_usage_once("ops", self.__class__.__name__)
+        self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
+        self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1)
+        self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1)
+        self.activation = activation()
+        self.scale_activation = scale_activation()
+
+    def _scale(self, input: Tensor) -> Tensor:
+        scale = self.avgpool(input)
+        scale = self.fc1(scale)
+        scale = self.activation(scale)
+        scale = self.fc2(scale)
+        return self.scale_activation(scale)
+
+    def forward(self, input: Tensor) -> Tensor:
+        scale = self._scale(input)
+        return scale * input
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/mobilenet_v3_torch.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/models/mobilenet_v3_torch.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/__init__.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/__init__.py
+from .transforms import *
+from .autoaugment import *
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/autoaugment.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/autoaugment.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/functional.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/functional.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/functional_pil.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/functional_pil.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/functional_tensor.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/functional_tensor.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/transforms.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/torchvision/transforms/transforms.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/train.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/train.py
--- a/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/utils.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/mobilenetv3_ref/utils.py
--- a/tutorials/mobilenetv3_prod/Step1-5/requirements.txt
+++ b/tutorials/mobilenetv3_prod/Step1-5/requirements.txt
+reprod-log
\ No newline at end of file
--- a/tutorials/mobilenetv3_prod/Step1-5/result/data_paddle.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/data_paddle.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/data_ref.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/data_ref.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/forward_paddle.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/forward_paddle.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/forward_ref.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/forward_ref.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/log/acc_diff.log
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/log/acc_diff.log
+[2021/12/22 20:08:46] root INFO: acc_top1: 
+[2021/12/22 20:08:46] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/22 20:08:46] root INFO: acc_top5: 
+[2021/12/22 20:08:46] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/22 20:08:46] root INFO: diff check passed
--- a/tutorials/mobilenetv3_prod/Step1-5/result/log/backward_diff.log
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/log/backward_diff.log
+[2021/12/23 17:49:27] root INFO: loss_0: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: False, value: 1.9073486328125e-06
+[2021/12/23 17:49:27] root INFO: lr_0: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:49:27] root INFO: loss_1: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: False, value: 2.384185791015625e-06
+[2021/12/23 17:49:27] root INFO: lr_1: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:49:27] root INFO: loss_2: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: False, value: 7.62939453125e-06
+[2021/12/23 17:49:27] root INFO: lr_2: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:49:27] root INFO: loss_3: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: False, value: 0.002070903778076172
+[2021/12/23 17:49:27] root INFO: lr_3: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:49:27] root INFO: loss_4: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: False, value: 0.002232074737548828
+[2021/12/23 17:49:27] root INFO: lr_4: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:49:27] root INFO: loss_5: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: False, value: 0.03954291343688965
+[2021/12/23 17:49:27] root INFO: lr_5: 
+[2021/12/23 17:49:27] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:49:27] root INFO: diff check failed
--- a/tutorials/mobilenetv3_prod/Step1-5/result/log/data_diff.log
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/log/data_diff.log
+[2021/12/23 17:21:22] root INFO: length: 
+[2021/12/23 17:21:22] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_0: 
+[2021/12/23 17:21:22] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_1: 
+[2021/12/23 17:21:22] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_2: 
+[2021/12/23 17:21:22] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: dataloader_3: 
+[2021/12/23 17:21:22] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:21:22] root INFO: diff check passed
--- a/tutorials/mobilenetv3_prod/Step1-5/result/log/forward_diff.log
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/log/forward_diff.log
+[2021/12/23 17:44:09] root INFO: logits: 
+[2021/12/23 17:44:09] root INFO: 	mean diff: check passed: False, value: 2.308018565599923e-06
+[2021/12/23 17:44:09] root INFO: diff check failed
--- a/tutorials/mobilenetv3_prod/Step1-5/result/log/loss_diff.log
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/log/loss_diff.log
+[2021/12/23 17:46:12] root INFO: loss: 
+[2021/12/23 17:46:12] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:46:12] root INFO: diff check passed
--- a/tutorials/mobilenetv3_prod/Step1-5/result/log/metric_diff.log
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/log/metric_diff.log
+[2021/12/23 17:45:32] root INFO: acc_top1: 
+[2021/12/23 17:45:32] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:45:32] root INFO: acc_top5: 
+[2021/12/23 17:45:32] root INFO: 	mean diff: check passed: True, value: 0.0
+[2021/12/23 17:45:32] root INFO: diff check passed
--- a/tutorials/mobilenetv3_prod/Step1-5/result/loss_paddle.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/loss_paddle.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/loss_ref.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/loss_ref.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/losses_paddle.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/losses_paddle.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/losses_ref.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/losses_ref.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/metric_paddle.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/metric_paddle.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/result/metric_ref.npy
+++ b/tutorials/mobilenetv3_prod/Step1-5/result/metric_ref.npy
--- a/tutorials/mobilenetv3_prod/Step1-5/utilities.py
+++ b/tutorials/mobilenetv3_prod/Step1-5/utilities.py
+import numpy as np
+
+
+def gen_fake_data():
+    fake_data = np.random.rand(1, 3, 224, 224).astype(np.float32) - 0.5
+    fake_label = np.arange(1).astype(np.int64)
+    np.save("fake_data.npy", fake_data)
+    np.save("fake_label.npy", fake_label)