未验证 提交 c7ba8c44 编写于 作者: S shiyutang 提交者: GitHub

[Feature] Add module test process of mobilenetv3 (#5442)

* add_readme

* update

* update_dir

* add_falsely_delete_README

* update
上级 fbccf996
import torch
import paddle
import numpy as np
from reprod_log import ReprodDiffHelper
from reprod_log import ReprodLogger
from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
def test_forward():
# load paddle model
paddle_model = mv3_small_paddle()
paddle_model.eval()
paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
paddle_model.set_dict(paddle_state_dict)
# load torch model
torch_model = mv3_small_torch()
torch_model.eval()
torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
torch_model.load_state_dict(torch_state_dict)
# load data
inputs = np.load("./data/fake_data.npy")
# save the paddle output
reprod_logger = ReprodLogger()
paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
reprod_logger.add("logits", paddle_out.cpu().detach().numpy())
reprod_logger.save("./result/forward_paddle.npy")
# save the torch output
torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
reprod_logger.add("logits", torch_out.cpu().detach().numpy())
reprod_logger.save("./result/forward_ref.npy")
if __name__ == "__main__":
test_forward()
# load data
diff_helper = ReprodDiffHelper()
torch_info = diff_helper.load_info("./result/forward_ref.npy")
paddle_info = diff_helper.load_info("./result/forward_paddle.npy")
# compare result and produce log
diff_helper.compare_info(torch_info, paddle_info)
diff_helper.report(path="./result/log/forward_diff.log")
import os
import sys
import torch
import paddle
import numpy as np
from PIL import Image
from reprod_log import ReprodLogger, ReprodDiffHelper
import mobilenetv3_paddle.presets as presets_paddle
import mobilenetv3_paddle.paddlevision as paddlevision
import mobilenetv3_ref.presets as presets_torch
import mobilenetv3_ref.torchvision as torchvision
def build_paddle_data_pipeline():
# dataset & data_loader
dataset_test = paddlevision.datasets.ImageFolder(
"./lite_data/val/",
presets_paddle.ClassificationPresetEval(
crop_size=224, resize_size=256))
test_sampler = paddle.io.SequenceSampler(dataset_test)
test_batch_sampler = paddle.io.BatchSampler(
sampler=test_sampler, batch_size=4)
data_loader_test = paddle.io.DataLoader(
dataset_test, batch_sampler=test_batch_sampler, num_workers=0)
return dataset_test, data_loader_test
def build_torch_data_pipeline():
dataset_test = torchvision.datasets.ImageFolder(
"./lite_data/val/",
presets_torch.ClassificationPresetEval(
crop_size=224, resize_size=256),
is_valid_file=None)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
data_loader_test = torch.utils.data.DataLoader(
dataset_test,
batch_size=4,
sampler=test_sampler,
num_workers=0,
pin_memory=True)
return dataset_test, data_loader_test
def test_data_pipeline():
paddle_dataset, paddle_dataloader = build_paddle_data_pipeline()
torch_dataset, torch_dataloader = build_torch_data_pipeline()
logger_paddle_data = ReprodLogger()
logger_torch_data = ReprodLogger()
logger_paddle_data.add("length", np.array(len(paddle_dataset)))
logger_torch_data.add("length", np.array(len(torch_dataset)))
for idx, (paddle_batch, torch_batch
) in enumerate(zip(paddle_dataloader, torch_dataloader)):
if idx >= 5:
break
logger_paddle_data.add(f"dataloader_{idx}", paddle_batch[0].numpy())
logger_torch_data.add(f"dataloader_{idx}",
torch_batch[0].detach().cpu().numpy())
logger_paddle_data.save("./result/data_paddle.npy")
logger_torch_data.save("./result/data_ref.npy")
if __name__ == "__main__":
test_data_pipeline()
# load data
diff_helper = ReprodDiffHelper()
torch_info = diff_helper.load_info("./result/data_ref.npy")
paddle_info = diff_helper.load_info("./result/data_paddle.npy")
# compare result and produce log
diff_helper.compare_info(torch_info, paddle_info)
diff_helper.report(path="./result/log/data_diff.log")
# add test metric code paddle vs torch
import torch
import paddle
import numpy as np
from reprod_log import ReprodLogger
from reprod_log import ReprodDiffHelper
from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
from mobilenetv3_ref import accuracy_torch
from mobilenetv3_paddle import accuracy_paddle
def evaluate(image, labels, model, acc, tag, reprod_logger):
model.eval()
output = model(image)
accracy = acc(output, labels, topk=(1, 5))
reprod_logger.add("acc_top1", np.array(accracy[0]))
reprod_logger.add("acc_top5", np.array(accracy[1]))
reprod_logger.save("./result/metric_{}.npy".format(tag))
def test_forward():
# load paddle model
paddle_model = mv3_small_paddle()
paddle_model.eval()
paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
paddle_model.set_dict(paddle_state_dict)
# load torch model
torch_model = mv3_small_torch()
torch_model.eval()
torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
torch_model.load_state_dict(torch_state_dict)
# prepare logger & load data
reprod_logger = ReprodLogger()
inputs = np.load("./data/fake_data.npy")
labels = np.load("./data/fake_label.npy")
image = paddle.to_tensor(inputs, dtype="float32")
target = paddle.to_tensor(labels, dtype="int64")
evaluate(
paddle.to_tensor(
inputs, dtype="float32"),
paddle.to_tensor(
labels, dtype="int64"),
paddle_model,
accuracy_paddle,
'paddle',
reprod_logger)
evaluate(
torch.tensor(
inputs, dtype=torch.float32),
torch.tensor(
labels, dtype=torch.int64),
torch_model,
accuracy_torch,
'ref',
reprod_logger)
if __name__ == "__main__":
test_forward()
# load data
diff_helper = ReprodDiffHelper()
torch_info = diff_helper.load_info("./result/metric_ref.npy")
paddle_info = diff_helper.load_info("./result/metric_paddle.npy")
# compare result and produce log
diff_helper.compare_info(torch_info, paddle_info)
diff_helper.report(path="./result/log/metric_diff.log")
# add loss comparing code
import torch
import paddle
import numpy as np
from reprod_log import ReprodLogger
from reprod_log import ReprodDiffHelper
from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
def test_forward():
# init loss
criterion_paddle = paddle.nn.CrossEntropyLoss()
criterion_torch = torch.nn.CrossEntropyLoss()
# load paddle model
paddle_model = mv3_small_paddle()
paddle_model.eval()
paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
paddle_model.set_dict(paddle_state_dict)
# load torch model
torch_model = mv3_small_torch()
torch_model.eval()
torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
torch_model.load_state_dict(torch_state_dict)
# prepare logger & load data
reprod_logger = ReprodLogger()
inputs = np.load("./data/fake_data.npy")
labels = np.load("./data/fake_label.npy")
# save the paddle output
paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
loss_paddle = criterion_paddle(
paddle_out, paddle.to_tensor(
labels, dtype="int64"))
reprod_logger.add("loss", loss_paddle.cpu().detach().numpy())
reprod_logger.save("./result/loss_paddle.npy")
# save the torch output
torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
loss_torch = criterion_torch(
torch_out, torch.tensor(
labels, dtype=torch.int64))
reprod_logger.add("loss", loss_torch.cpu().detach().numpy())
reprod_logger.save("./result/loss_ref.npy")
if __name__ == "__main__":
test_forward()
# load data
diff_helper = ReprodDiffHelper()
torch_info = diff_helper.load_info("./result/loss_ref.npy")
paddle_info = diff_helper.load_info("./result/loss_paddle.npy")
# compare result and produce log
diff_helper.compare_info(torch_info, paddle_info)
diff_helper.report(path="./result/log/loss_diff.log")
import paddle
import numpy as np
import torch
import torch.optim.lr_scheduler as lr_scheduler
from reprod_log import ReprodLogger
from reprod_log import ReprodDiffHelper
from mobilenetv3_paddle.paddlevision.models import mobilenet_v3_small as mv3_small_paddle
from mobilenetv3_ref.torchvision.models import mobilenet_v3_small as mv3_small_torch
def train_one_epoch_paddle(inputs, labels, model, criterion, optimizer,
lr_scheduler, max_iter, reprod_logger):
for idx in range(max_iter):
image = paddle.to_tensor(inputs, dtype="float32")
target = paddle.to_tensor(labels, dtype="int64")
# import pdb; pdb.set_trace()
output = model(image)
loss = criterion(output, target)
reprod_logger.add("loss_{}".format(idx), loss.cpu().detach().numpy())
reprod_logger.add("lr_{}".format(idx), np.array(lr_scheduler.get_lr()))
optimizer.clear_grad()
loss.backward()
optimizer.step()
# lr_scheduler.step()
reprod_logger.save("./result/losses_paddle.npy")
def train_one_epoch_torch(inputs, labels, model, criterion, optimizer,
lr_scheduler, max_iter, reprod_logger):
for idx in range(max_iter):
image = torch.tensor(inputs, dtype=torch.float32).cuda()
target = torch.tensor(labels, dtype=torch.int64).cuda()
model = model.cuda()
output = model(image)
loss = criterion(output, target)
reprod_logger.add("loss_{}".format(idx), loss.cpu().detach().numpy())
reprod_logger.add("lr_{}".format(idx),
np.array(lr_scheduler.get_last_lr()))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# lr_scheduler.step()
reprod_logger.save("./result/losses_ref.npy")
def test_backward():
max_iter = 3
lr = 1e-3
momentum = 0.9
lr_gamma = 0.1
# set determinnistic flag
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
FLAGS_cudnn_deterministic = True
# load paddle model
paddle.set_device("gpu")
paddle_model = mv3_small_paddle(dropout=0.0)
paddle_model.eval()
paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
paddle_model.set_dict(paddle_state_dict)
# load torch model
torch_model = mv3_small_torch(dropout=0.0)
torch_model.eval()
torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
torch_model.load_state_dict(torch_state_dict, strict=False)
# init loss
criterion_paddle = paddle.nn.CrossEntropyLoss()
criterion_torch = torch.nn.CrossEntropyLoss()
# init optimizer
lr_scheduler_paddle = paddle.optimizer.lr.StepDecay(
lr, step_size=max_iter // 3, gamma=lr_gamma)
opt_paddle = paddle.optimizer.Momentum(
learning_rate=lr,
momentum=momentum,
parameters=paddle_model.parameters())
opt_torch = torch.optim.SGD(torch_model.parameters(),
lr=lr,
momentum=momentum)
lr_scheduler_torch = lr_scheduler.StepLR(
opt_torch, step_size=max_iter // 3, gamma=lr_gamma)
# prepare logger & load data
reprod_logger = ReprodLogger()
inputs = np.load("./data/fake_data.npy")
labels = np.load("./data/fake_label.npy")
train_one_epoch_paddle(inputs, labels, paddle_model, criterion_paddle,
opt_paddle, lr_scheduler_paddle, max_iter,
reprod_logger)
train_one_epoch_torch(inputs, labels, torch_model, criterion_torch,
opt_torch, lr_scheduler_torch, max_iter,
reprod_logger)
if __name__ == "__main__":
test_backward()
# load data
diff_helper = ReprodDiffHelper()
torch_info = diff_helper.load_info("./result/losses_ref.npy")
paddle_info = diff_helper.load_info("./result/losses_paddle.npy")
# compare result and produce log
diff_helper.compare_info(torch_info, paddle_info)
diff_helper.report(path="./result/log/backward_diff.log")
# MobileNetV3
## 目录
- [1. 简介](#1)
- [2. 复现流程](#2)
- [2.1 reprod_log简介](#2.1)
- [3. 准备数据与环境](#3)
- [3.1 准备环境](#3.1)
- [3.2 生成伪数据](#3.2)
- [3.3 准备模型](#3.3)
- [4. 开始使用](#4)
- [4.1 模型前向对齐](#4.1)
- [4.2 数据加载对齐](#4.2)
- [4.3 评估指标对齐](#4.3)
- [4.4 损失对齐](#4.4)
- [4.5 反向梯度对齐](#4.5)
- [4.6 训练对齐](#4.6)
<a name="1"></a>
## 1. 简介
本部分内容包含基于 [MobileNetV3](https://arxiv.org/abs/1905.02244) 的复现对齐过程,可以结合[论文复现指南]()进行学习。
<a name="2"></a>
## 2. 复现流程
在论文复现中我们可以根据网络训练的流程,将对齐流程划分为数据加载对齐、模型前向对齐、评估指标对齐、反向梯度对齐和训练对齐。其中不同对齐部分我们会在下方详细介绍。
在对齐验证的流程中,我们依靠 reprod_log 日志工具查看 paddle 和官方同样输入下的输出是否相同,这样的查看方式具有标准统一,比较过程方便等优势。
<a name="2.1"></a>
### 2.1 reprod_log 简介
Reprod_log 是一个用于 numpy 数据记录和对比工具,通过传入需要对比的两个 numpy 数组就可以在指定的规则下得到数据之差是否满足期望的结论。其主要接口的说明可以看它的 [github 主页](https://github.com/WenmuZhou/reprod_log)
<a name="3"></a>
## 3. 准备数据和环境
在进行我们的对齐验证之前,我们需要准备运行环境、用于输入的伪数据、paddle 模型参数和官方模型权重参数。
<a name="3.1"></a>
### 3.1 准备环境
* 克隆本项目
```bash
git clone https://github.com/PaddlePaddle/models.git
cd model/tutorials/mobilenetv3_prod/
```
* 安装paddlepaddle
```bash
# 需要安装2.2及以上版本的Paddle,如果
# 安装GPU版本的Paddle
pip install paddlepaddle-gpu==2.2.0
# 安装CPU版本的Paddle
pip install paddlepaddle==2.2.0
```
更多版本或者环境下的安装可以参考:[Paddle安装指南](https://www.paddlepaddle.org.cn/)
* 安装requirements
```bash
pip install -r requirements.txt
```
<a name="3.2"></a>
### 3.2 生成伪数据
为了保证模型对齐不会受到输入数据的影响,我们生成一组数据作为两个模型的输入。
伪数据可以通过如下代码生成,我们在本地目录下也提供了好的伪数据(./data/fake_*.npy)。
```python
def gen_fake_data():
fake_data = np.random.rand(1, 3, 224, 224).astype(np.float32) - 0.5
fake_label = np.arange(1).astype(np.int64)
np.save("fake_data.npy", fake_data)
np.save("fake_label.npy", fake_label)
```
<a name="3.3"></a>
### 3.3 准备模型
为了保证模型前向对齐不受到模型参数不一致的影响,我们使用相同的权重参数对模型进行初始化。
生成相同权重参数分为以下 2 步:
1. 随机初始化官方模型参数并保存成 mobilenet_v3_small-047dcff4.pth
2. 将 model.pth 通过 ./torch2paddle.py 生成mv3_small_paddle.pdparams
转换模型时,torch 和 paddle 存在参数需要转换的部分,主要是bn层、全连接层、num_batches_tracked等,可以参见转换脚本(./torch2paddle.py)。
<a name="4"></a>
## 4. 开始使用
准备好数据之后,我们通过下面对应训练流程的拆解步骤进行复现对齐。
<a name="4.1"></a>
### 4.1 模型前向对齐
论文复现中,最重要的来到前向对齐的验证,验证流程如下图所示:
<div align="center">
<img src="./images/forward.png" width=500">
</div>
这里,为了判断判断模型组网部分能获得和原论文同样的输出,我们将两个模型参数固定,并输入相同伪数据,观察 paddle 模型产出的 logit 是否和官方模型一致。
我们的示例代码如下所示:
```python
def test_forward():
# load paddle model
paddle_model = mv3_small_paddle()
paddle_model.eval()
paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams")
paddle_model.set_dict(paddle_state_dict)
# load torch model
torch_model = mv3_small_torch()
torch_model.eval()
torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth")
torch_model.load_state_dict(torch_state_dict)
# load data
inputs = np.load("./data/fake_data.npy")
# save the paddle output
reprod_logger = ReprodLogger()
paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
reprod_logger.add("logits", paddle_out.cpu().detach().numpy())
reprod_logger.save("./result/forward_paddle.npy")
# save the torch output
torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
reprod_logger.add("logits", torch_out.cpu().detach().numpy())
reprod_logger.save("./result/forward_torch.npy")
```
可以看到,我们在代码中加载准备的相同的模型参数、并固定输入,从而获得两个模型的输出。输出结果使用相同的 key 值存到 numpy 文件中,随后使用下列代码加载并比较:
```python
# load data
diff_helper = ReprodDiffHelper()
torch_info = diff_helper.load_info("./result/forward_torch.npy")
paddle_info = diff_helper.load_info("./result/forward_paddle.npy")
# compare result and produce log
diff_helper.compare_info(torch_info, paddle_info)
diff_helper.report(path="./result/log/forward_diff.log")
```
在代码示例中也可以学习到 reprod_log的主要接口,包含add、save、load_infor、compare_infor、report的用法。
**运行文件**
通过运行以下代码,我们验证前向对齐效果。
```bash
cd models/tutorials/mobilenetv3_prod/
python 01_test_forward.py
```
**获得结果**
根据示例代码可以看到,我们将结果保存在`result/log/forward_diff.log`中,打开对应文件或者直接观察命令行输出,就会有下列结果:
```bash
[2021/12/21 15:00:38] root INFO: logits:
[2021/12/21 15:00:38] root INFO: mean diff: check passed: False, value: 2.308018565599923e-06
[2021/12/21 15:00:38] root INFO: diff check failed
```
这里我们发现在`reprod_log`默认的平均差异小于1e-6的标准下,当前前向对齐是不符合条件的,但是这是由于前向 op 计算导致的微小的差异。
一般说来前向误差在 1e-5 左右都是可以接受的,到这里我们就验证了网络的前向是对齐的,完成了第一个打卡点。
<a name="4.2"></a>
### 4.2 数据加载对齐
在验证了模型的前向对齐之后,我们验证数据读取部分,这一部分,我们比较从数据读取到模型传入之间我们进行的操作是否和参考操作一致。
主要代码如下所示,我们读取相同的输入,比较数据增强后输出之间的差异,即可知道我们的数据增强是否和参考实现保持一致:
```python
def build_torch_data_pipeline():
dataset_test = torchvision.datasets.ImageFolder(
"./lite_data/val/",
presets_torch.ClassificationPresetEval(
crop_size=224, resize_size=256), is_valid_file=None)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
data_loader_test = torch.utils.data.DataLoader(
dataset_test,
batch_size=4,
sampler=test_sampler,
num_workers=0,
pin_memory=True)
return dataset_test, data_loader_test
def test_data_pipeline():
paddle_dataset, paddle_dataloader = build_paddle_data_pipeline()
torch_dataset, torch_dataloader = build_torch_data_pipeline()
logger_paddle_data = ReprodLogger()
logger_torch_data = ReprodLogger()
logger_paddle_data.add("length", np.array(len(paddle_dataset)))
logger_torch_data.add("length", np.array(len(torch_dataset)))
for idx, (paddle_batch, torch_batch
) in enumerate(zip(paddle_dataloader, torch_dataloader)):
if idx >= 5:
break
logger_paddle_data.add(f"dataloader_{idx}", paddle_batch[0].numpy())
logger_torch_data.add(f"dataloader_{idx}",
torch_batch[0].detach().cpu().numpy())
logger_paddle_data.save("./result/data_paddle.npy")
logger_torch_data.save("./result/data_ref.npy")
```
**运行文件**
通过运行以下指令,我们进行测试,测试数据可以解压我们准备的 [lite_data.tar](https://github.com/PaddlePaddle/models/blob/release%2F2.2/tutorials/mobilenetv3_prod/Step6/test_images/lite_data.tar) 获得,对于自身的数据,也可以抽取几张 validationset 的图片用作验证。
```python
cd models/tutorials/mobilenetv3_prod/
tar -xvf lite_data.rar
python 02_test_data.py
```
**获得结果**
运行文件之后,我们获得以下命令行输出,可以发现我们的验证结果满足预期,数据加载部分验证通过:
```bash
[2021/12/23 17:21:22] root INFO: length:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_0:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_1:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_2:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_3:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: diff check passed
```
<a name="4.3"></a>
### 4.3 评估指标对齐
随后我们来到评估指标对齐,对齐流程如图所示:
<div align="center">
<img src="./images/metric.png" width=500">
</div>
这部分的对齐流程主要差异在于我们在模型基础上添加了对应参考代码实现 metric,并导入到测试文件中。在论文复现中,我们尽量将模型的不同部分封装起来,之后就可以通过我们这样导入的方式进行验证。
这部分的参考代码如下:
```python
def evaluate(image, labels, model, acc, tag, reprod_logger):
model.eval()
output = model(image)
accracy = acc(output, labels, topk=(1, 5))
reprod_logger.add("acc_top1", np.array(accracy[0]))
reprod_logger.add("acc_top5", np.array(accracy[1]))
reprod_logger.save("./result/metric_{}.npy".format(tag))
def test_forward():
# load model & data
evaluate(
paddle.to_tensor(
inputs, dtype="float32"),
paddle.to_tensor(
labels, dtype="int64"),
paddle_model,
accuracy_paddle,
'paddle', reprod_logger)
evaluate(
torch.tensor(
inputs, dtype=torch.float32),
torch.tensor(
labels, dtype=torch.int64),
torch_model,
accuracy_torch,
'ref', reprod_logger)
```
这部分模型和输入的导入均和之前一致,只是在之前的基础上增加了模型计算评估指标的部分。
由于我们之前验证了模型的输出一致。那么也就是评估指标的输入相同,我们只需要对比输出是否一致,即可确定评估指标的实现是否正确。
**运行文件**
通过运行以下代码,我们验证评估指标对齐效果。
```bash
cd models/tutorials/mobilenetv3_prod/
python 03_test_metric.py
```
**获得结果**
进入`result/log/metric_diff.log`中,就会有下列结果,而结果说明我们评估指标的实现正确, 从而完成第二个打卡点:
```bash
[2021/12/21 19:28:49] root INFO: acc_top1:
[2021/12/21 19:28:49] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/21 19:28:49] root INFO: acc_top5:
[2021/12/21 19:28:49] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/21 19:28:49] root INFO: diff check passed
```
<a name="4.4"></a>
### 4.4 损失对齐
进一步,我们验证损失实现的正确性,验证流程如下:
<div align="center">
<img src="./images/losses.png" width=500">
</div>
这部分的对齐流程主要差异在于我们在模型基础上添加了对应参考代码实现的 loss。这部分的参考代码如下:
```python
def test_forward():
# init loss
criterion_paddle = paddle.nn.CrossEntropyLoss()
criterion_torch = torch.nn.CrossEntropyLoss()
# load model & data
# save the paddle output
paddle_out = paddle_model(paddle.to_tensor(inputs, dtype="float32"))
loss_paddle = criterion_paddle(
paddle_out, paddle.to_tensor(
labels, dtype="int64"))
reprod_logger.add("loss", loss_paddle.cpu().detach().numpy())
reprod_logger.save("./result/loss_paddle.npy")
# save the torch output
torch_out = torch_model(torch.tensor(inputs, dtype=torch.float32))
loss_torch = criterion_torch(
torch_out, torch.tensor(
labels, dtype=torch.int64))
reprod_logger.add("loss", loss_torch.cpu().detach().numpy())
reprod_logger.save("./result/loss_ref.npy")
```
这部分代码进一步增加损失导入的部分,由于我们之前验证了模型的输出一致,也就是损失的输入相同,我们只需要对比输出是否一致,即可确定损失的实现是否正确。
**运行文件**
通过运行以下代码,我们验证评估指标对齐效果。
```bash
cd models/tutorials/mobilenetv3_prod/
python 04_test_loss.py
```
**获得结果**
进入`result/log/loss_diff.log`中,就会有下列结果,而结果说明我们评估指标的实现正确,完成第三个打卡点:
```bash
[2021/12/22 20:13:41] root INFO: loss:
[2021/12/22 20:13:41] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/22 20:13:41] root INFO: diff check passed
```
<a name="4.5"></a>
### 4.5 反向梯度对齐
结合模型和损失,我们就可以验证反向过程,反向梯度传导的是否正确包含了优化器,学习率以及梯度的计算,而验证过程只需要多观察几轮损失即可明确反向是否正确传导,主要验证流程如下所示:
<div align="center">
<img src="./images/backward.png" width=500">
</div>
以上参考流程可以使用以下代码实现:
```python
def train_one_epoch_torch(inputs, labels, model, criterion, optimizer,
max_iter, reprod_logger):
for idx in range(max_iter):
image = torch.tensor(inputs, dtype=torch.float32).cuda()
target = torch.tensor(labels, dtype=torch.int64).cuda()
model = model.cuda()
output = model(image)
loss = criterion(output, target)
reprod_logger.add("loss_{}".format(idx), loss.cpu().detach().numpy())
optimizer.zero_grad()
loss.backward()
optimizer.step()
reprod_logger.save("./result/losses_ref.npy")
def test_backward():
max_iter = 3
lr = 1e-3
momentum = 0.9
# load model, loss, data
# init optimizer
opt_paddle = paddle.optimizer.Momentum(
learning_rate=lr,
momentum=momentum,
parameters=paddle_model.parameters())
opt_torch = torch.optim.SGD(torch_model.parameters(), lr=lr, momentum=momentum)
train_one_epoch_paddle(inputs, labels, paddle_model, criterion_paddle,
opt_paddle, max_iter, reprod_logger)
train_one_epoch_torch(inputs, labels, torch_model, criterion_torch,
opt_torch, max_iter, reprod_logger)
```
代码中增加了optimizer用于迭代网络参数,其他则基本一致。
**运行文件**
通过运行以下代码,我们验证反向传播对齐效果。
```bash
cd models/tutorials/mobilenetv3_prod/
python 05_test_backward.py
```
**获得结果**
进入`result/log/loss_diff.log`中,就会有下列结果,结果表示三轮损失的差异在 1e-6 附近,说明我们反向传播的实现对齐, 完成第四个打卡点:
```bash
[2021/12/23 15:51:16] root INFO: loss_0:
[2021/12/23 15:51:16] root INFO: mean diff: check passed: False, value: 1.9073486328125e-06
[2021/12/23 15:51:16] root INFO: lr_0:
[2021/12/23 15:51:16] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 15:51:16] root INFO: loss_1:
[2021/12/23 15:51:16] root INFO: mean diff: check passed: False, value: 2.384185791015625e-06
[2021/12/23 15:51:16] root INFO: lr_1:
[2021/12/23 15:51:16] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 15:51:16] root INFO: loss_2:
[2021/12/23 15:51:16] root INFO: mean diff: check passed: False, value: 1.1920928955078125e-05
[2021/12/23 15:51:16] root INFO: lr_2:
[2021/12/23 15:51:16] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 15:51:16] root INFO: diff check failed
```
<a name="4.6"></a>
### 4.6 训练对齐
通过以上步骤,我们验证了模型、数据、评估指标、损失、反向传播的正确性,也就为我们的训练对齐打下了良好的基础。
接下来,我们按照以下流程验证训练对齐结果,即对网络进行训练,并在训练后验证精度是否达到指标:
<div align="center">
<img src="./images/train.png" width=500">
</div>
我们可以使用reprd logger对比精度,也可以直接肉眼观察结果对比:
```python
if paddle.distributed.get_rank() == 0:
reprod_logger = ReprodLogger()
reprod_logger.add("top1", np.array([top1]))
reprod_logger.save("train_align_paddle.npy")
```
**运行文件**
```bash
cd models/tutorials/mobilenetv3_prod/Checkpoint6
python train.py
```
**获得结果**
最终训练精度超过原模型精度,我们的复现到这里就圆满结束,如果还有任何问题,欢迎随时向我们[提问](https://github.com/PaddlePaddle/Paddle/issues)
from .metric import accuracy_paddle
from .presets import *
\ No newline at end of file
import paddle
def accuracy_paddle(output, target, topk=(1, )):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with paddle.no_grad():
maxk = max(topk)
batch_size = target.shape[0]
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.equal(target)
res = []
for k in topk:
correct_k = correct.astype(paddle.int32)[:k].flatten().sum(
dtype='float32')
res.append(correct_k / batch_size)
return res
from .datasets import *
from .models import *
from .transforms import *
\ No newline at end of file
from .folder import ImageFolder, DatasetFolder
from .vision import VisionDataset
__all__ = ('ImageFolder', 'DatasetFolder', 'VisionDataset')
\ No newline at end of file
from .vision import VisionDataset
from PIL import Image
import os
import os.path
from typing import Any, Callable, cast, Dict, List, Optional, Tuple
def has_file_allowed_extension(filename: str,
extensions: Tuple[str, ...]) -> bool:
"""Checks if a file is an allowed extension.
Args:
filename (string): path to a file
extensions (tuple of strings): extensions to consider (lowercase)
Returns:
bool: True if the filename ends with one of given extensions
"""
return filename.lower().endswith(extensions)
def is_image_file(filename: str) -> bool:
"""Checks if a file is an allowed image extension.
Args:
filename (string): path to a file
Returns:
bool: True if the filename ends with a known image extension
"""
return has_file_allowed_extension(filename, IMG_EXTENSIONS)
def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
"""Finds the class folders in a dataset.
See :class:`DatasetFolder` for details.
"""
classes = sorted(
entry.name for entry in os.scandir(directory) if entry.is_dir())
if not classes:
raise FileNotFoundError(
f"Couldn't find any class folder in {directory}.")
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
return classes, class_to_idx
def make_dataset(
directory: str,
class_to_idx: Optional[Dict[str, int]]=None,
extensions: Optional[Tuple[str, ...]]=None,
is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[Tuple[
str, int]]:
"""Generates a list of samples of a form (path_to_sample, class).
See :class:`DatasetFolder` for details.
Note: The class_to_idx parameter is here optional and will use the logic of the ``find_classes`` function
by default.
"""
directory = os.path.expanduser(directory)
if class_to_idx is None:
_, class_to_idx = find_classes(directory)
elif not class_to_idx:
raise ValueError(
"'class_to_index' must have at least one entry to collect any samples."
)
both_none = extensions is None and is_valid_file is None
both_something = extensions is not None and is_valid_file is not None
if both_none or both_something:
raise ValueError(
"Both extensions and is_valid_file cannot be None or not None at the same time"
)
if extensions is not None:
def is_valid_file(x: str) -> bool:
return has_file_allowed_extension(
x, cast(Tuple[str, ...], extensions))
is_valid_file = cast(Callable[[str], bool], is_valid_file)
instances = []
available_classes = set()
for target_class in sorted(class_to_idx.keys()):
class_index = class_to_idx[target_class]
target_dir = os.path.join(directory, target_class)
if not os.path.isdir(target_dir):
continue
for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
for fname in sorted(fnames):
if is_valid_file(fname):
path = os.path.join(root, fname)
item = path, class_index
instances.append(item)
if target_class not in available_classes:
available_classes.add(target_class)
# print(fname)
# exit()
# empty_classes = set(class_to_idx.keys()) - available_classes
# if empty_classes:
# msg = f"Found no valid file for the classes {', '.join(sorted(empty_classes))}. "
# if extensions is not None:
# msg += f"Supported extensions are: {', '.join(extensions)}"
# raise FileNotFoundError(msg)
return instances
class DatasetFolder(VisionDataset):
"""A generic data loader.
This default directory structure can be customized by overriding the
:meth:`find_classes` method.
Args:
root (string): Root directory path.
loader (callable): A function to load a sample given its path.
extensions (tuple[string]): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable, optional): A function/transform that takes in
a sample and returns a transformed version.
E.g, ``transforms.RandomCrop`` for images.
target_transform (callable, optional): A function/transform that takes
in the target and transforms it.
is_valid_file (callable, optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Attributes:
classes (list): List of the class names sorted alphabetically.
class_to_idx (dict): Dict with items (class_name, class_index).
samples (list): List of (sample path, class_index) tuples
targets (list): The class_index value for each image in the dataset
"""
def __init__(
self,
root: str,
loader: Callable[[str], Any],
extensions: Optional[Tuple[str, ...]]=None,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None,
is_valid_file: Optional[Callable[[str], bool]]=None, ) -> None:
super(DatasetFolder, self).__init__(
root, transform=transform, target_transform=target_transform)
classes, class_to_idx = self.find_classes(self.root)
samples = self.make_dataset(self.root, class_to_idx, extensions,
is_valid_file)
self.loader = loader
self.extensions = extensions
self.classes = classes
self.class_to_idx = class_to_idx
self.samples = samples
self.targets = [s[1] for s in samples]
@staticmethod
def make_dataset(
directory: str,
class_to_idx: Dict[str, int],
extensions: Optional[Tuple[str, ...]]=None,
is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[
Tuple[str, int]]:
"""Generates a list of samples of a form (path_to_sample, class).
This can be overridden to e.g. read files from a compressed zip file instead of from the disk.
Args:
directory (str): root dataset directory, corresponding to ``self.root``.
class_to_idx (Dict[str, int]): Dictionary mapping class name to class index.
extensions (optional): A list of allowed extensions.
Either extensions or is_valid_file should be passed. Defaults to None.
is_valid_file (optional): A function that takes path of a file
and checks if the file is a valid file
(used to check of corrupt files) both extensions and
is_valid_file should not be passed. Defaults to None.
Raises:
ValueError: In case ``class_to_idx`` is empty.
ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None.
FileNotFoundError: In case no valid file was found for any class.
Returns:
List[Tuple[str, int]]: samples of a form (path_to_sample, class)
"""
if class_to_idx is None:
# prevent potential bug since make_dataset() would use the class_to_idx logic of the
# find_classes() function, instead of using that of the find_classes() method, which
# is potentially overridden and thus could have a different logic.
raise ValueError("The class_to_idx parameter cannot be None.")
return make_dataset(
directory,
class_to_idx,
extensions=extensions,
is_valid_file=is_valid_file)
def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]:
"""Find the class folders in a dataset structured as follows::
directory/
├── class_x
│ ├── xxx.ext
│ ├── xxy.ext
│ └── ...
│ └── xxz.ext
└── class_y
├── 123.ext
├── nsdf3.ext
└── ...
└── asd932_.ext
This method can be overridden to only consider
a subset of classes, or to adapt to a different dataset directory structure.
Args:
directory(str): Root directory path, corresponding to ``self.root``
Raises:
FileNotFoundError: If ``dir`` has no class folders.
Returns:
(Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index.
"""
return find_classes(directory)
def __getitem__(self, index: int) -> Tuple[Any, Any]:
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
def __len__(self) -> int:
return len(self.samples)
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
'.tiff', '.webp')
def pil_loader(path: str) -> Image.Image:
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def default_loader(path: str) -> Any:
return pil_loader(path)
class ImageFolder(DatasetFolder):
"""A generic data loader where the images are arranged in this way by default: ::
root/dog/xxx.png
root/dog/xxy.png
root/dog/[...]/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/[...]/asd932_.png
Args:
root (string): Root directory path.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
loader (callable, optional): A function to load an image given its path.
is_valid_file (callable, optional): A function that takes path of an Image file
and check if the file is a valid file (used to check of corrupt files)
Attributes:
classes (list): List of the class names sorted alphabetically.
class_to_idx (dict): Dict with items (class_name, class_index).
imgs (list): List of (image path, class_index) tuples
"""
def __init__(
self,
root: str,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None,
loader: Callable[[str], Any]=default_loader,
is_valid_file: Optional[Callable[[str], bool]]=None, ):
super(ImageFolder, self).__init__(
root,
loader,
IMG_EXTENSIONS if is_valid_file is None else None,
transform=transform,
target_transform=target_transform,
is_valid_file=is_valid_file)
self.imgs = self.samples
import os
import paddle
from typing import Any, Callable, List, Optional, Tuple
class VisionDataset(paddle.io.Dataset):
"""
Base Class For making datasets which are compatible with our model.
It is necessary to override the ``__getitem__`` and ``__len__`` method.
Args:
root (string): Root directory of dataset.
transforms (callable, optional): A function/transforms that takes in
an image and a label and returns the transformed versions of both.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
.. note::
:attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
"""
_repr_indent = 4
def __init__(
self,
root: str,
transforms: Optional[Callable]=None,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None, ) -> None:
if isinstance(root, (str, bytes())):
root = os.path.expanduser(root)
self.root = root
has_transforms = transforms is not None
has_separate_transform = transform is not None or target_transform is not None
if has_transforms and has_separate_transform:
raise ValueError(
"Only transforms or transform/target_transform can "
"be passed as argument")
# for backwards-compatibility
self.transform = transform
self.target_transform = target_transform
if has_separate_transform:
transforms = StandardTransform(transform, target_transform)
self.transforms = transforms
def __getitem__(self, index: int) -> Any:
"""
Args:
index (int): Index
Returns:
(Any): Sample and meta data, optionally transformed by the respective transforms.
"""
raise NotImplementedError
def __len__(self) -> int:
raise NotImplementedError
def __repr__(self) -> str:
head = "Dataset " + self.__class__.__name__
body = ["Number of datapoints: {}".format(self.__len__())]
if self.root is not None:
body.append("Root location: {}".format(self.root))
body += self.extra_repr().splitlines()
if hasattr(self, "transforms") and self.transforms is not None:
body += [repr(self.transforms)]
lines = [head] + [" " * self._repr_indent + line for line in body]
return '\n'.join(lines)
def _format_transform_repr(self, transform: Callable,
head: str) -> List[str]:
lines = transform.__repr__().splitlines()
return (["{}{}".format(head, lines[0])] +
["{}{}".format(" " * len(head), line) for line in lines[1:]])
def extra_repr(self) -> str:
return ""
class StandardTransform(object):
def __init__(self,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None) -> None:
self.transform = transform
self.target_transform = target_transform
def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
if self.transform is not None:
input = self.transform(input)
if self.target_transform is not None:
target = self.target_transform(target)
return input, target
def _format_transform_repr(self, transform: Callable,
head: str) -> List[str]:
lines = transform.__repr__().splitlines()
return (["{}{}".format(head, lines[0])] +
["{}{}".format(" " * len(head), line) for line in lines[1:]])
def __repr__(self) -> str:
body = [self.__class__.__name__]
if self.transform is not None:
body += self._format_transform_repr(self.transform, "Transform: ")
if self.target_transform is not None:
body += self._format_transform_repr(self.target_transform,
"Target transform: ")
return '\n'.join(body)
from .mobilenet_v3_paddle import mobilenet_v3_large, mobilenet_v3_small
from typing import Any, Callable, List, Optional, Sequence
import paddle
import paddle.nn as nn
class ConvNormActivation(nn.Sequential):
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int=3,
stride: int=1,
padding: Optional[int]=None,
groups: int=1,
norm_layer: Optional[Callable[..., nn.Layer]]=nn.BatchNorm2D,
activation_layer: Optional[Callable[..., nn.Layer]]=nn.ReLU,
dilation: int=1,
bias: Optional[bool]=None, ) -> None:
if padding is None:
padding = (kernel_size - 1) // 2 * dilation
if bias is None:
bias = norm_layer is None
layers = [
nn.Conv2D(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=dilation,
groups=groups,
bias_attr=bias, )
]
if norm_layer is not None:
layers.append(norm_layer(out_channels))
if activation_layer is not None:
layers.append(activation_layer())
super().__init__(*layers)
self.out_channels = out_channels
class SqueezeExcitation(nn.Layer):
def __init__(
self,
input_channels: int,
squeeze_channels: int,
activation: Callable[..., nn.Layer]=nn.ReLU,
scale_activation: Callable[..., nn.Layer]=nn.Sigmoid, ) -> None:
super().__init__()
self.avgpool = nn.AdaptiveAvgPool2D(1)
self.fc1 = nn.Conv2D(input_channels, squeeze_channels, 1)
self.fc2 = nn.Conv2D(squeeze_channels, input_channels, 1)
self.activation = activation()
self.scale_activation = scale_activation()
def _scale(self, input: paddle.Tensor) -> paddle.Tensor:
scale = self.avgpool(input)
scale = self.fc1(scale)
scale = self.activation(scale)
scale = self.fc2(scale)
return self.scale_activation(scale)
def forward(self, input: paddle.Tensor) -> paddle.Tensor:
scale = self._scale(input)
return scale * input
import warnings
from functools import partial
from typing import Any, Callable, List, Optional, Sequence
import paddle
import paddle.nn as nn
from .misc_paddle import ConvNormActivation, SqueezeExcitation as SElayer
__all__ = ["MobileNetV3", "mobilenet_v3_large", "mobilenet_v3_small"]
def _make_divisible(v: float, divisor: int,
min_value: Optional[int]=None) -> int:
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class SqueezeExcitation(SElayer):
def __init__(self, input_channels: int, squeeze_factor: int=4):
squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
super().__init__(
input_channels, squeeze_channels, scale_activation=nn.Hardsigmoid)
self.relu = self.activation
delattr(self, "activation")
class InvertedResidualConfig:
# Stores information listed at Tables 1 and 2 of the MobileNetV3 paper
def __init__(
self,
input_channels: int,
kernel: int,
expanded_channels: int,
out_channels: int,
use_se: bool,
activation: str,
stride: int,
dilation: int,
width_mult: float, ):
self.input_channels = self.adjust_channels(input_channels, width_mult)
self.kernel = kernel
self.expanded_channels = self.adjust_channels(expanded_channels,
width_mult)
self.out_channels = self.adjust_channels(out_channels, width_mult)
self.use_se = use_se
self.use_hs = activation == "HS"
self.stride = stride
self.dilation = dilation
@staticmethod
def adjust_channels(channels: int, width_mult: float):
return _make_divisible(channels * width_mult, 8)
class InvertedResidual(nn.Layer):
# Implemented as described at section 5 of MobileNetV3 paper
def __init__(
self,
cnf: InvertedResidualConfig,
norm_layer: Callable[..., nn.Layer],
se_layer: Callable[..., nn.Layer]=partial(
SElayer, scale_activation=nn.Hardsigmoid), ):
super().__init__()
if not (1 <= cnf.stride <= 2):
raise ValueError("illegal stride value")
self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
layers: List[nn.Layer] = []
activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU
# expand
if cnf.expanded_channels != cnf.input_channels:
layers.append(
ConvNormActivation(
cnf.input_channels,
cnf.expanded_channels,
kernel_size=1,
norm_layer=norm_layer,
activation_layer=activation_layer, ))
# depthwise
stride = 1 if cnf.dilation > 1 else cnf.stride
layers.append(
ConvNormActivation(
cnf.expanded_channels,
cnf.expanded_channels,
kernel_size=cnf.kernel,
stride=stride,
dilation=cnf.dilation,
groups=cnf.expanded_channels,
norm_layer=norm_layer,
activation_layer=activation_layer, ))
if cnf.use_se:
squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8)
layers.append(se_layer(cnf.expanded_channels, squeeze_channels))
# project
layers.append(
ConvNormActivation(
cnf.expanded_channels,
cnf.out_channels,
kernel_size=1,
norm_layer=norm_layer,
activation_layer=None))
self.block = nn.Sequential(*layers)
self.out_channels = cnf.out_channels
self._is_cn = cnf.stride > 1
def forward(self, input: paddle.Tensor) -> paddle.Tensor:
result = self.block(input)
if self.use_res_connect:
result += input
return result
class MobileNetV3(nn.Layer):
def __init__(
self,
inverted_residual_setting: List[InvertedResidualConfig],
last_channel: int,
num_classes: int=1000,
block: Optional[Callable[..., nn.Layer]]=None,
norm_layer: Optional[Callable[..., nn.Layer]]=None,
dropout: float=0.2,
**kwargs: Any, ) -> None:
"""
MobileNet V3 main class
Args:
inverted_residual_setting (List[InvertedResidualConfig]): Network structure
last_channel (int): The number of channels on the penultimate layer
num_classes (int): Number of classes
block (Optional[Callable[..., nn.Layer]]): Module specifying inverted residual building block for mobilenet
norm_layer (Optional[Callable[..., nn.Layer]]): Module specifying the normalization layer to use
dropout (float): The droupout probability
"""
super().__init__()
if not inverted_residual_setting:
raise ValueError(
"The inverted_residual_setting should not be empty")
elif not (isinstance(inverted_residual_setting, Sequence) and all([
isinstance(s, InvertedResidualConfig)
for s in inverted_residual_setting
])):
raise TypeError(
"The inverted_residual_setting should be List[InvertedResidualConfig]"
)
if block is None:
block = InvertedResidual
if norm_layer is None:
norm_layer = partial(nn.BatchNorm2D, epsilon=0.001, momentum=0.01)
layers: List[nn.Layer] = []
# building first layer
firstconv_output_channels = inverted_residual_setting[0].input_channels
layers.append(
ConvNormActivation(
3,
firstconv_output_channels,
kernel_size=3,
stride=2,
norm_layer=norm_layer,
activation_layer=nn.Hardswish, ))
# building inverted residual blocks
for cnf in inverted_residual_setting:
layers.append(block(cnf, norm_layer))
# building last several layers
lastconv_input_channels = inverted_residual_setting[-1].out_channels
lastconv_output_channels = 6 * lastconv_input_channels
layers.append(
ConvNormActivation(
lastconv_input_channels,
lastconv_output_channels,
kernel_size=1,
norm_layer=norm_layer,
activation_layer=nn.Hardswish, ))
self.features = nn.Sequential(*layers)
self.avgpool = nn.AdaptiveAvgPool2D(1)
self.classifier = nn.Sequential(
nn.Linear(lastconv_output_channels, last_channel),
nn.Hardswish(),
nn.Dropout(p=dropout),
nn.Linear(last_channel, num_classes), )
def forward(self, x: paddle.Tensor) -> paddle.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = paddle.flatten(x, 1)
x = self.classifier(x)
return x
def _mobilenet_v3_conf(arch: str,
width_mult: float=1.0,
reduced_tail: bool=False,
dilated: bool=False,
**kwargs: Any):
reduce_divider = 2 if reduced_tail else 1
dilation = 2 if dilated else 1
bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult)
adjust_channels = partial(
InvertedResidualConfig.adjust_channels, width_mult=width_mult)
if arch == "mobilenet_v3_large":
inverted_residual_setting = [
bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1
bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2
bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3
bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2,
dilation), # C4
bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider,
160 // reduce_divider, True, "HS", 1, dilation),
bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider,
160 // reduce_divider, True, "HS", 1, dilation),
]
last_channel = adjust_channels(1280 // reduce_divider) # C5
elif arch == "mobilenet_v3_small":
inverted_residual_setting = [
bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1
bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2
bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3
bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2,
dilation), # C4
bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider,
96 // reduce_divider, True, "HS", 1, dilation),
bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider,
96 // reduce_divider, True, "HS", 1, dilation),
]
last_channel = adjust_channels(1024 // reduce_divider) # C5
else:
raise ValueError(f"Unsupported model type {arch}")
return inverted_residual_setting, last_channel
def _mobilenet_v3(
arch: str,
inverted_residual_setting: List[InvertedResidualConfig],
last_channel: int,
pretrained: bool,
progress: bool,
**kwargs: Any, ):
model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs)
if pretrained:
state_dict = paddle.load(pretrained)
model.set_dict(state_dict)
return model
def mobilenet_v3_large(pretrained: bool=False,
progress: bool=True,
**kwargs: Any) -> MobileNetV3:
"""
Constructs a large MobileNetV3 architecture from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
arch = "mobilenet_v3_large"
inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch,
**kwargs)
return _mobilenet_v3(arch, inverted_residual_setting, last_channel,
pretrained, progress, **kwargs)
def mobilenet_v3_small(pretrained: bool=False,
progress: bool=True,
**kwargs: Any) -> MobileNetV3:
"""
Constructs a small MobileNetV3 architecture from
`"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
arch = "mobilenet_v3_small"
inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch,
**kwargs)
return _mobilenet_v3(arch, inverted_residual_setting, last_channel,
pretrained, progress, **kwargs)
import math
import paddle
from enum import Enum
from paddle import Tensor
from typing import List, Tuple, Optional
from . import functional as f
from .functional import InterpolationMode
__all__ = ["AutoAugmentPolicy", "AutoAugment"]
class AutoAugmentPolicy(Enum):
"""AutoAugment policies learned on different datasets.
Available policies are IMAGENET, CIFAR10 and SVHN.
"""
IMAGENET = "imagenet"
CIFAR10 = "cifar10"
SVHN = "svhn"
def _get_transforms(policy: AutoAugmentPolicy):
if policy == AutoAugmentPolicy.IMAGENET:
return [
(("Posterize", 0.4, 8), ("Rotate", 0.6, 9)),
(("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
(("Equalize", 0.8, None), ("Equalize", 0.6, None)),
(("Posterize", 0.6, 7), ("Posterize", 0.6, 6)),
(("Equalize", 0.4, None), ("Solarize", 0.2, 4)),
(("Equalize", 0.4, None), ("Rotate", 0.8, 8)),
(("Solarize", 0.6, 3), ("Equalize", 0.6, None)),
(("Posterize", 0.8, 5), ("Equalize", 1.0, None)),
(("Rotate", 0.2, 3), ("Solarize", 0.6, 8)),
(("Equalize", 0.6, None), ("Posterize", 0.4, 6)),
(("Rotate", 0.8, 8), ("Color", 0.4, 0)),
(("Rotate", 0.4, 9), ("Equalize", 0.6, None)),
(("Equalize", 0.0, None), ("Equalize", 0.8, None)),
(("Invert", 0.6, None), ("Equalize", 1.0, None)),
(("Color", 0.6, 4), ("Contrast", 1.0, 8)),
(("Rotate", 0.8, 8), ("Color", 1.0, 2)),
(("Color", 0.8, 8), ("Solarize", 0.8, 7)),
(("Sharpness", 0.4, 7), ("Invert", 0.6, None)),
(("ShearX", 0.6, 5), ("Equalize", 1.0, None)),
(("Color", 0.4, 0), ("Equalize", 0.6, None)),
(("Equalize", 0.4, None), ("Solarize", 0.2, 4)),
(("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
(("Invert", 0.6, None), ("Equalize", 1.0, None)),
(("Color", 0.6, 4), ("Contrast", 1.0, 8)),
(("Equalize", 0.8, None), ("Equalize", 0.6, None)),
]
elif policy == AutoAugmentPolicy.CIFAR10:
return [
(("Invert", 0.1, None), ("Contrast", 0.2, 6)),
(("Rotate", 0.7, 2), ("TranslateX", 0.3, 9)),
(("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
(("ShearY", 0.5, 8), ("TranslateY", 0.7, 9)),
(("AutoContrast", 0.5, None), ("Equalize", 0.9, None)),
(("ShearY", 0.2, 7), ("Posterize", 0.3, 7)),
(("Color", 0.4, 3), ("Brightness", 0.6, 7)),
(("Sharpness", 0.3, 9), ("Brightness", 0.7, 9)),
(("Equalize", 0.6, None), ("Equalize", 0.5, None)),
(("Contrast", 0.6, 7), ("Sharpness", 0.6, 5)),
(("Color", 0.7, 7), ("TranslateX", 0.5, 8)),
(("Equalize", 0.3, None), ("AutoContrast", 0.4, None)),
(("TranslateY", 0.4, 3), ("Sharpness", 0.2, 6)),
(("Brightness", 0.9, 6), ("Color", 0.2, 8)),
(("Solarize", 0.5, 2), ("Invert", 0.0, None)),
(("Equalize", 0.2, None), ("AutoContrast", 0.6, None)),
(("Equalize", 0.2, None), ("Equalize", 0.6, None)),
(("Color", 0.9, 9), ("Equalize", 0.6, None)),
(("AutoContrast", 0.8, None), ("Solarize", 0.2, 8)),
(("Brightness", 0.1, 3), ("Color", 0.7, 0)),
(("Solarize", 0.4, 5), ("AutoContrast", 0.9, None)),
(("TranslateY", 0.9, 9), ("TranslateY", 0.7, 9)),
(("AutoContrast", 0.9, None), ("Solarize", 0.8, 3)),
(("Equalize", 0.8, None), ("Invert", 0.1, None)),
(("TranslateY", 0.7, 9), ("AutoContrast", 0.9, None)),
]
elif policy == AutoAugmentPolicy.SVHN:
return [
(("ShearX", 0.9, 4), ("Invert", 0.2, None)),
(("ShearY", 0.9, 8), ("Invert", 0.7, None)),
(("Equalize", 0.6, None), ("Solarize", 0.6, 6)),
(("Invert", 0.9, None), ("Equalize", 0.6, None)),
(("Equalize", 0.6, None), ("Rotate", 0.9, 3)),
(("ShearX", 0.9, 4), ("AutoContrast", 0.8, None)),
(("ShearY", 0.9, 8), ("Invert", 0.4, None)),
(("ShearY", 0.9, 5), ("Solarize", 0.2, 6)),
(("Invert", 0.9, None), ("AutoContrast", 0.8, None)),
(("Equalize", 0.6, None), ("Rotate", 0.9, 3)),
(("ShearX", 0.9, 4), ("Solarize", 0.3, 3)),
(("ShearY", 0.8, 8), ("Invert", 0.7, None)),
(("Equalize", 0.9, None), ("TranslateY", 0.6, 6)),
(("Invert", 0.9, None), ("Equalize", 0.6, None)),
(("Contrast", 0.3, 3), ("Rotate", 0.8, 4)),
(("Invert", 0.8, None), ("TranslateY", 0.0, 2)),
(("ShearY", 0.7, 6), ("Solarize", 0.4, 8)),
(("Invert", 0.6, None), ("Rotate", 0.8, 4)),
(("ShearY", 0.3, 7), ("TranslateX", 0.9, 3)),
(("ShearX", 0.1, 6), ("Invert", 0.6, None)),
(("Solarize", 0.7, 2), ("TranslateY", 0.6, 7)),
(("ShearY", 0.8, 4), ("Invert", 0.8, None)),
(("ShearX", 0.7, 9), ("TranslateY", 0.8, 3)),
(("ShearY", 0.8, 5), ("AutoContrast", 0.7, None)),
(("ShearX", 0.7, 2), ("Invert", 0.1, None)),
]
def _get_magnitudes():
_BINS = 10
return {
# name: (magnitudes, signed)
"ShearX": (paddle.linspace(0.0, 0.3, _BINS), True),
"ShearY": (paddle.linspace(0.0, 0.3, _BINS), True),
"TranslateX": (paddle.linspace(0.0, 150.0 / 331.0, _BINS), True),
"TranslateY": (paddle.linspace(0.0, 150.0 / 331.0, _BINS), True),
"Rotate": (paddle.linspace(0.0, 30.0, _BINS), True),
"Brightness": (paddle.linspace(0.0, 0.9, _BINS), True),
"Color": (paddle.linspace(0.0, 0.9, _BINS), True),
"Contrast": (paddle.linspace(0.0, 0.9, _BINS), True),
"Sharpness": (paddle.linspace(0.0, 0.9, _BINS), True),
"Posterize": (paddle.tensor([8, 8, 7, 7, 6, 6, 5, 5, 4, 4]), False),
"Solarize": (paddle.linspace(256.0, 0.0, _BINS), False),
"AutoContrast": (None, None),
"Equalize": (None, None),
"Invert": (None, None),
}
class AutoAugment(paddle.nn.Layer):
r"""AutoAugment data augmentation method based on
`"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_.
If the image is paddle Tensor, it should be of type paddle.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
policy (AutoAugmentPolicy): Default is ``AutoAugmentPolicy.IMAGENET``.
interpolation (InterpolationMode): Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
def __init__(self,
policy: AutoAugmentPolicy=AutoAugmentPolicy.IMAGENET,
interpolation: InterpolationMode=InterpolationMode.NEAREST,
fill: Optional[List[float]]=None):
super().__init__()
self.policy = policy
self.interpolation = interpolation
self.fill = fill
self.transforms = _get_transforms(policy)
if self.transforms is None:
raise ValueError(
"The provided policy {} is not recognized.".format(policy))
self._op_meta = _get_magnitudes()
@staticmethod
def get_params(transform_num: int) -> Tuple[int, Tensor, Tensor]:
"""Get parameters for autoaugment transformation
Returns:
params required by the autoaugment transformation
"""
policy_id = int(paddle.randint(low=0, high=transform_num, shape=(1, )))
probs = paddle.rand((2, ))
signs = paddle.randint(low=0, high=2, shape=(2, ))
return policy_id, probs, signs
def _get_op_meta(self,
name: str) -> Tuple[Optional[Tensor], Optional[bool]]:
return self._op_meta[name]
def forward(self, img: Tensor):
"""
img (PIL Image or Tensor): Image to be transformed.
Returns:
PIL Image or Tensor: AutoAugmented image.
"""
fill = self.fill
if isinstance(img, Tensor):
if isinstance(fill, (int, float)):
fill = [float(fill)] * F._get_image_num_channels(img)
elif fill is not None:
fill = [float(f) for f in fill]
transform_id, probs, signs = self.get_params(len(self.transforms))
for i, (op_name, p,
magnitude_id) in enumerate(self.transforms[transform_id]):
if probs[i] <= p:
magnitudes, signed = self._get_op_meta(op_name)
magnitude = float(magnitudes[magnitude_id].item()) \
if magnitudes is not None and magnitude_id is not None else 0.0
if signed is not None and signed and signs[i] == 0:
magnitude *= -1.0
if op_name == "ShearX":
img = F.affine(
img,
angle=0.0,
translate=[0, 0],
scale=1.0,
shear=[math.degrees(magnitude), 0.0],
interpolation=self.interpolation,
fill=fill)
elif op_name == "ShearY":
img = F.affine(
img,
angle=0.0,
translate=[0, 0],
scale=1.0,
shear=[0.0, math.degrees(magnitude)],
interpolation=self.interpolation,
fill=fill)
elif op_name == "TranslateX":
img = F.affine(
img,
angle=0.0,
translate=[
int(F._get_image_size(img)[0] * magnitude), 0
],
scale=1.0,
interpolation=self.interpolation,
shear=[0.0, 0.0],
fill=fill)
elif op_name == "TranslateY":
img = F.affine(
img,
angle=0.0,
translate=[
0, int(F._get_image_size(img)[1] * magnitude)
],
scale=1.0,
interpolation=self.interpolation,
shear=[0.0, 0.0],
fill=fill)
elif op_name == "Rotate":
img = F.rotate(
img,
magnitude,
interpolation=self.interpolation,
fill=fill)
elif op_name == "Brightness":
img = F.adjust_brightness(img, 1.0 + magnitude)
elif op_name == "Color":
img = F.adjust_saturation(img, 1.0 + magnitude)
elif op_name == "Contrast":
img = F.adjust_contrast(img, 1.0 + magnitude)
elif op_name == "Sharpness":
img = F.adjust_sharpness(img, 1.0 + magnitude)
elif op_name == "Posterize":
img = F.posterize(img, int(magnitude))
elif op_name == "Solarize":
img = F.solarize(img, magnitude)
elif op_name == "AutoContrast":
img = F.autocontrast(img)
elif op_name == "Equalize":
img = F.equalize(img)
elif op_name == "Invert":
img = F.invert(img)
else:
raise ValueError(
"The provided operator {} is not recognized.".format(
op_name))
return img
def __repr__(self):
return self.__class__.__name__ + '(policy={}, fill={})'.format(
self.policy, self.fill)
import numbers
import warnings
from enum import Enum
import numpy as np
import paddle
from paddle import Tensor
from typing import List, Tuple, Any, Optional
try:
import accimage
except ImportError:
accimage = None
from . import functional_pil as F_pil
from . import functional_tensor as F_t
class InterpolationMode(Enum):
"""Interpolation modes
Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``.
"""
NEAREST = "nearest"
BILINEAR = "bilinear"
BICUBIC = "bicubic"
# For PIL compatibility
BOX = "box"
HAMMING = "hamming"
LANCZOS = "lanczos"
def _interpolation_modes_from_int(i: int) -> InterpolationMode:
inverse_modes_mapping = {
0: InterpolationMode.NEAREST,
2: InterpolationMode.BILINEAR,
3: InterpolationMode.BICUBIC,
4: InterpolationMode.BOX,
5: InterpolationMode.HAMMING,
1: InterpolationMode.LANCZOS,
}
return inverse_modes_mapping[i]
pil_modes_mapping = {
InterpolationMode.NEAREST: 0,
InterpolationMode.BILINEAR: 2,
InterpolationMode.BICUBIC: 3,
InterpolationMode.BOX: 4,
InterpolationMode.HAMMING: 5,
InterpolationMode.LANCZOS: 1,
}
def _is_numpy(img: Any) -> bool:
return isinstance(img, np.ndarray)
def _is_numpy_image(img: Any) -> bool:
return img.ndim in {2, 3}
def to_tensor(pic):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
See :class:`~paddlevision.transforms.ToTensor` for more details.
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
if not (F_pil._is_pil_image(pic) or _is_numpy(pic)):
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(
type(pic)))
if _is_numpy(pic) and not _is_numpy_image(pic):
raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.
format(pic.ndim))
default_float_dtype = paddle.get_default_dtype()
if isinstance(pic, np.ndarray):
# handle numpy array
if pic.ndim == 2:
pic = pic[:, :, None]
img = paddle.to_tensor(pic.transpose((2, 0, 1)))
# backward compatibility
if not img.dtype == default_float_dtype:
img = img.astype(dtype=default_float_dtype)
return img.divide(paddle.full_like(img, 255))
else:
return img
if accimage is not None and isinstance(pic, accimage.Image):
nppic = np.zeros(
[pic.channels, pic.height, pic.width], dtype=np.float32)
pic.copyto(nppic)
return paddle.to_tensor(nppic).astype(dtype=default_float_dtype)
# handle PIL Image
mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32}
img = paddle.to_tensor(
np.array(
pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
if pic.mode == '1':
img = 255 * img
img = img.reshape([pic.size[1], pic.size[0], len(pic.getbands())])
if not img.dtype == default_float_dtype:
img = img.astype(dtype=default_float_dtype)
# put it from HWC to CHW format
img = img.transpose((2, 0, 1))
return img.divide(paddle.full_like(img, 255))
else:
# put it from HWC to CHW format
img = img.transpose((2, 0, 1))
return img
def normalize(tensor: Tensor,
mean: List[float],
std: List[float],
inplace: bool=False) -> Tensor:
"""Normalize a float tensor image with mean and standard deviation.
This transform does not support PIL Image.
.. note::
This transform acts out of place by default, i.e., it does not mutates the input tensor.
See :class:`~paddlevision.transforms.Normalize` for more details.
Args:
tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation inplace.
Returns:
Tensor: Normalized Tensor image.
"""
if not isinstance(tensor, paddle.Tensor):
raise TypeError('Input tensor should be a paddle tensor. Got {}.'.
format(type(tensor)))
if not tensor.dtype in (paddle.float16, paddle.float32, paddle.float64):
raise TypeError('Input tensor should be a float tensor. Got {}.'.
format(tensor.dtype))
if tensor.ndim < 3:
raise ValueError(
'Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.shape() = '
'{}.'.format(tensor.shape))
if not inplace:
tensor = tensor.clone()
dtype = tensor.dtype
mean = paddle.to_tensor(mean, dtype=dtype, place=tensor.place)
std = paddle.to_tensor(std, dtype=dtype, place=tensor.place)
if (std == 0).any():
raise ValueError('std evaluated to zero, leading to division by zero.')
if mean.ndim == 1:
mean = mean.reshape((-1, 1, 1))
if std.ndim == 1:
std = std.reshape((-1, 1, 1))
tensor = tensor.subtract(mean).divide(std)
return tensor
def resize(img: Tensor,
size: List[int],
interpolation: InterpolationMode=InterpolationMode.BILINEAR,
max_size: Optional[int]=None,
antialias: Optional[bool]=None) -> Tensor:
r"""Resize the input image to the given size.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
.. warning::
The output image might be different depending on its type: when downsampling, the interpolation of PIL images
and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
closer.
Args:
img (PIL Image or Tensor): Image to be resized.
size (sequence or int): Desired output size. If size is a sequence like
(h, w), the output size will be matched to this. If size is an int,
the smaller edge of the image will be matched to this number maintaining
the aspect ratio. i.e, if height > width, then image will be rescaled to
:math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`paddlevision.transforms.InterpolationMode`.
Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
max_size (int, optional): The maximum allowed for the longer edge of
the resized image: if the longer edge of the image is greater
than ``max_size`` after being resized according to ``size``, then
the image is resized again so that the longer edge is equal to
``max_size``. As a result, ``size`` might be overruled, i.e the
smaller edge may be shorter than ``size``.
antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
closer.
.. warning::
There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.
Returns:
PIL Image or Tensor: Resized image.
"""
# Backward compatibility with integer value
if isinstance(interpolation, int):
warnings.warn(
"Argument interpolation should be of type InterpolationMode instead of int. "
"Please, use InterpolationMode enum.")
interpolation = _interpolation_modes_from_int(interpolation)
if not isinstance(interpolation, InterpolationMode):
raise TypeError("Argument interpolation should be a InterpolationMode")
if not isinstance(img, paddle.Tensor):
if antialias is not None and not antialias:
warnings.warn(
"Anti-alias option is always applied for PIL Image input. Argument antialias is ignored."
)
pil_interpolation = pil_modes_mapping[interpolation]
return F_pil.resize(
img, size=size, interpolation=pil_interpolation, max_size=max_size)
return F_t.resize(
img,
size=size,
interpolation=interpolation.value,
max_size=max_size,
antialias=antialias)
def _get_image_size(img: Tensor) -> List[int]:
"""Returns image size as [w, h]
"""
if isinstance(img, paddle.Tensor):
return F_t._get_image_size(img)
return F_pil._get_image_size(img)
def pad(img: Tensor,
padding: List[int],
fill: int=0,
padding_mode: str="constant") -> Tensor:
r"""Pad the given image on all sides with the given "pad" value.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
at most 3 leading dimensions for mode edge,
and an arbitrary number of leading dimensions for mode constant
Args:
img (PIL Image or Tensor): Image to be padded.
padding (int or sequence): Padding on each border. If a single int is provided this
is used to pad all borders. If sequence of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a sequence of length 4 is provided
this is the padding for the left, top, right and bottom borders respectively.
fill (number or str or tuple): Pixel fill value for constant fill. Default is 0.
If a tuple of length 3, it is used to fill R, G, B channels respectively.
This value is only used when the padding_mode is constant.
Only number is supported for paddle Tensor.
Only int or str or tuple value is supported for PIL Image.
padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
Default is constant.
- constant: pads with a constant value, this value is specified with fill
- edge: pads with the last value at the edge of the image.
If input a 5D paddle Tensor, the last 3 dimensions will be padded instead of the last 2
- reflect: pads with reflection of image without repeating the last value on the edge.
For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
will result in [3, 2, 1, 2, 3, 4, 3, 2]
- symmetric: pads with reflection of image repeating the last value on the edge.
For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
will result in [2, 1, 1, 2, 3, 4, 4, 3]
Returns:
PIL Image or Tensor: Padded image.
"""
if not isinstance(img, paddle.Tensor):
return F_pil.pad(img,
padding=padding,
fill=fill,
padding_mode=padding_mode)
return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
"""Crop the given image at specified location and output size.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
If image size is smaller than output size along any edge, image is padded with 0 and then cropped.
Args:
img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
top (int): Vertical component of the top left corner of the crop box.
left (int): Horizontal component of the top left corner of the crop box.
height (int): Height of the crop box.
width (int): Width of the crop box.
Returns:
PIL Image or Tensor: Cropped image.
"""
if not isinstance(img, paddle.Tensor):
return F_pil.crop(img, top, left, height, width)
return F_t.crop(img, top, left, height, width)
def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
"""Crops the given image at the center.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
Args:
img (PIL Image or Tensor): Image to be cropped.
output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
it is used for both directions.
Returns:
PIL Image or Tensor: Cropped image.
"""
if isinstance(output_size, numbers.Number):
output_size = (int(output_size), int(output_size))
elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
output_size = (output_size[0], output_size[0])
image_width, image_height = _get_image_size(img)
crop_height, crop_width = output_size
if crop_width > image_width or crop_height > image_height:
padding_ltrb = [
(crop_width - image_width) // 2 if crop_width > image_width else 0,
(crop_height - image_height) // 2
if crop_height > image_height else 0,
(crop_width - image_width + 1) // 2
if crop_width > image_width else 0,
(crop_height - image_height + 1) // 2
if crop_height > image_height else 0,
]
img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0
image_width, image_height = _get_image_size(img)
if crop_width == image_width and crop_height == image_height:
return img
crop_top = int(round((image_height - crop_height) / 2.))
crop_left = int(round((image_width - crop_width) / 2.))
return crop(img, crop_top, crop_left, crop_height, crop_width)
def resized_crop(
img: Tensor,
top: int,
left: int,
height: int,
width: int,
size: List[int],
interpolation: InterpolationMode=InterpolationMode.BILINEAR) -> Tensor:
"""Crop the given image and resize it to desired size.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args:
img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
top (int): Vertical component of the top left corner of the crop box.
left (int): Horizontal component of the top left corner of the crop box.
height (int): Height of the crop box.
width (int): Width of the crop box.
size (sequence or int): Desired output size. Same semantics as ``resize``.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`paddlevision.transforms.InterpolationMode`.
Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
Returns:
PIL Image or Tensor: Cropped image.
"""
img = crop(img, top, left, height, width)
img = resize(img, size, interpolation)
return img
import numbers
from typing import Any, List, Sequence
import numpy as np
from PIL import Image, ImageOps, ImageEnhance
try:
import accimage
except ImportError:
accimage = None
def _is_pil_image(img: Any) -> bool:
if accimage is not None:
return isinstance(img, (Image.Image, accimage.Image))
else:
return isinstance(img, Image.Image)
def _get_image_size(img: Any) -> List[int]:
if _is_pil_image(img):
return img.size
raise TypeError("Unexpected type {}".format(type(img)))
def _get_image_num_channels(img: Any) -> int:
if _is_pil_image(img):
return 1 if img.mode == 'L' else 3
raise TypeError("Unexpected type {}".format(type(img)))
def hflip(img):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.transpose(Image.FLIP_LEFT_RIGHT)
def vflip(img):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.transpose(Image.FLIP_TOP_BOTTOM)
def adjust_brightness(img, brightness_factor):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Brightness(img)
img = enhancer.enhance(brightness_factor)
return img
def adjust_contrast(img, contrast_factor):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(contrast_factor)
return img
def adjust_saturation(img, saturation_factor):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Color(img)
img = enhancer.enhance(saturation_factor)
return img
def adjust_hue(img, hue_factor):
if not (-0.5 <= hue_factor <= 0.5):
raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(
hue_factor))
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
input_mode = img.mode
if input_mode in {'L', '1', 'I', 'F'}:
return img
h, s, v = img.convert('HSV').split()
np_h = np.array(h, dtype=np.uint8)
# uint8 addition take cares of rotation across boundaries
with np.errstate(over='ignore'):
np_h += np.uint8(hue_factor * 255)
h = Image.fromarray(np_h, 'L')
img = Image.merge('HSV', (h, s, v)).convert(input_mode)
return img
def adjust_gamma(img, gamma, gain=1):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if gamma < 0:
raise ValueError('Gamma should be a non-negative real number')
input_mode = img.mode
img = img.convert('RGB')
gamma_map = [(255 + 1 - 1e-3) * gain * pow(ele / 255., gamma)
for ele in range(256)] * 3
img = img.point(
gamma_map) # use PIL's point-function to accelerate this part
img = img.convert(input_mode)
return img
def pad(img, padding, fill=0, padding_mode="constant"):
if not _is_pil_image(img):
raise TypeError("img should be PIL Image. Got {}".format(type(img)))
if not isinstance(padding, (numbers.Number, tuple, list)):
raise TypeError("Got inappropriate padding arg")
if not isinstance(fill, (numbers.Number, str, tuple)):
raise TypeError("Got inappropriate fill arg")
if not isinstance(padding_mode, str):
raise TypeError("Got inappropriate padding_mode arg")
if isinstance(padding, list):
padding = tuple(padding)
if isinstance(padding, tuple) and len(padding) not in [1, 2, 4]:
raise ValueError(
"Padding must be an int or a 1, 2, or 4 element tuple, not a " +
"{} element tuple".format(len(padding)))
if isinstance(padding, tuple) and len(padding) == 1:
# Compatibility with `functional_tensor.pad`
padding = padding[0]
if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
raise ValueError(
"Padding mode should be either constant, edge, reflect or symmetric"
)
if padding_mode == "constant":
opts = _parse_fill(fill, img, name="fill")
if img.mode == "P":
palette = img.getpalette()
image = ImageOps.expand(img, border=padding, **opts)
image.putpalette(palette)
return image
return ImageOps.expand(img, border=padding, **opts)
else:
if isinstance(padding, int):
pad_left = pad_right = pad_top = pad_bottom = padding
if isinstance(padding, tuple) and len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
if isinstance(padding, tuple) and len(padding) == 4:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]
p = [pad_left, pad_top, pad_right, pad_bottom]
cropping = -np.minimum(p, 0)
if cropping.any():
crop_left, crop_top, crop_right, crop_bottom = cropping
img = img.crop((crop_left, crop_top, img.width - crop_right,
img.height - crop_bottom))
pad_left, pad_top, pad_right, pad_bottom = np.maximum(p, 0)
if img.mode == 'P':
palette = img.getpalette()
img = np.asarray(img)
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)),
padding_mode)
img = Image.fromarray(img)
img.putpalette(palette)
return img
img = np.asarray(img)
# RGB image
if len(img.shape) == 3:
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right),
(0, 0)), padding_mode)
# Grayscale image
if len(img.shape) == 2:
img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)),
padding_mode)
return Image.fromarray(img)
def crop(img: Image.Image, top: int, left: int, height: int,
width: int) -> Image.Image:
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.crop((left, top, left + width, top + height))
def resize(img, size, interpolation=Image.BILINEAR, max_size=None):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if not (isinstance(size, int) or
(isinstance(size, Sequence) and len(size) in (1, 2))):
raise TypeError('Got inappropriate size arg: {}'.format(size))
if isinstance(size, Sequence) and len(size) == 1:
size = size[0]
if isinstance(size, int):
w, h = img.size
short, long = (w, h) if w <= h else (h, w)
if short == size:
return img
new_short, new_long = size, int(size * long / short)
if max_size is not None:
if max_size <= size:
raise ValueError(
f"max_size = {max_size} must be strictly greater than the requested "
f"size for the smaller edge size = {size}")
if new_long > max_size:
new_short, new_long = int(max_size * new_short /
new_long), max_size
new_w, new_h = (new_short, new_long) if w <= h else (new_long,
new_short)
return img.resize((new_w, new_h), interpolation)
else:
if max_size is not None:
raise ValueError(
"max_size should only be passed if size specifies the length of the smaller edge, "
"i.e. size should be an int or a sequence of length 1 in deploy mode."
)
return img.resize(size[::-1], interpolation)
def _parse_fill(fill, img, name="fillcolor"):
# Process fill color for affine transforms
num_bands = len(img.getbands())
if fill is None:
fill = 0
if isinstance(fill, (int, float)) and num_bands > 1:
fill = tuple([fill] * num_bands)
if isinstance(fill, (list, tuple)):
if len(fill) != num_bands:
msg = (
"The number of elements in 'fill' does not match the number of "
"bands of the image ({} != {})")
raise ValueError(msg.format(len(fill), num_bands))
fill = tuple(fill)
return {name: fill}
def affine(img, matrix, interpolation=0, fill=None):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
output_size = img.size
opts = _parse_fill(fill, img)
return img.transform(output_size, Image.AFFINE, matrix, interpolation,
**opts)
def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None):
if not _is_pil_image(img):
raise TypeError("img should be PIL Image. Got {}".format(type(img)))
opts = _parse_fill(fill, img)
return img.rotate(angle, interpolation, expand, center, **opts)
def perspective(img,
perspective_coeffs,
interpolation=Image.BICUBIC,
fill=None):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
opts = _parse_fill(fill, img)
return img.transform(img.size, Image.PERSPECTIVE, perspective_coeffs,
interpolation, **opts)
def to_grayscale(img, num_output_channels):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if num_output_channels == 1:
img = img.convert('L')
elif num_output_channels == 3:
img = img.convert('L')
np_img = np.array(img, dtype=np.uint8)
np_img = np.dstack([np_img, np_img, np_img])
img = Image.fromarray(np_img, 'RGB')
else:
raise ValueError('num_output_channels should be either 1 or 3')
return img
def invert(img):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return ImageOps.invert(img)
def posterize(img, bits):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return ImageOps.posterize(img, bits)
def solarize(img, threshold):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return ImageOps.solarize(img, threshold)
def adjust_sharpness(img, sharpness_factor):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Sharpness(img)
img = enhancer.enhance(sharpness_factor)
return img
def autocontrast(img):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return ImageOps.autocontrast(img)
def equalize(img):
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return ImageOps.equalize(img)
import warnings
import paddle
from paddle import Tensor
from paddle.nn.functional import grid_sample, conv2d, interpolate, pad as paddle_pad
from typing import Optional, Tuple, List
def _is_tensor_a_paddle_image(x: Tensor) -> bool:
return x.ndim >= 2
def _assert_image_tensor(img):
if not _is_tensor_a_paddle_image(img):
raise TypeError("Tensor is not a paddle image.")
def _get_image_size(img: Tensor) -> List[int]:
# Returns (w, h) of tensor image
_assert_image_tensor(img)
return [img.shape[-1], img.shape[-2]]
def _cast_squeeze_in(img: Tensor, req_dtypes: List[paddle.dtype]) -> Tuple[
Tensor, bool, bool, paddle.dtype]:
need_squeeze = False
# make image NCHW
if img.ndim < 4:
img = img.unsqueeze(dim=0)
need_squeeze = True
out_dtype = img.dtype
need_cast = False
if out_dtype not in req_dtypes:
need_cast = True
req_dtype = req_dtypes[0]
img = img.as_type(req_dtype)
return img, need_cast, need_squeeze, out_dtype
def _cast_squeeze_out(img: Tensor,
need_cast: bool,
need_squeeze: bool,
out_dtype: paddle.dtype):
if need_squeeze:
img = img.squeeze(dim=0)
if need_cast:
if out_dtype in (paddle.uint8, paddle.int8, paddle.int16, paddle.int32,
paddle.int64):
# it is better to round before cast
img = paddle.round(img)
img = img.as_type(out_dtype)
return img
def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
# padding is left, right, top, bottom
# crop if needed
if padding[0] < 0 or padding[1] < 0 or padding[2] < 0 or padding[3] < 0:
crop_left, crop_right, crop_top, crop_bottom = [
-min(x, 0) for x in padding
]
img = img[..., crop_top:img.shape[-2] - crop_bottom, crop_left:
img.shape[-1] - crop_right]
padding = [max(x, 0) for x in padding]
in_sizes = img.size()
x_indices = [i for i in range(in_sizes[-1])] # [0, 1, 2, 3, ...]
left_indices = [i for i in range(padding[0] - 1, -1, -1)
] # e.g. [3, 2, 1, 0]
right_indices = [-(i + 1) for i in range(padding[1])] # e.g. [-1, -2, -3]
x_indices = paddle.to_tensor(
left_indices + x_indices + right_indices, device=img.device)
y_indices = [i for i in range(in_sizes[-2])]
top_indices = [i for i in range(padding[2] - 1, -1, -1)]
bottom_indices = [-(i + 1) for i in range(padding[3])]
y_indices = paddle.to_tensor(
top_indices + y_indices + bottom_indices, device=img.device)
ndim = img.ndim
if ndim == 3:
return img[:, y_indices[:, None], x_indices[None, :]]
elif ndim == 4:
return img[:, :, y_indices[:, None], x_indices[None, :]]
else:
raise RuntimeError(
"Symmetric padding of N-D tensors are not supported yet")
def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
_assert_image_tensor(img)
w, h = _get_image_size(img)
right = left + width
bottom = top + height
if left < 0 or top < 0 or right > w or bottom > h:
padding_ltrb = [
max(-left, 0), max(-top, 0), max(right - w, 0), max(bottom - h, 0)
]
return pad(img[..., max(top, 0):bottom, max(left, 0):right],
padding_ltrb,
fill=0)
return img[..., top:bottom, left:right]
def pad(img: Tensor,
padding: List[int],
fill: int=0,
padding_mode: str="constant") -> Tensor:
_assert_image_tensor(img)
if not isinstance(padding, (int, tuple, list)):
raise TypeError("Got inappropriate padding arg")
if not isinstance(fill, (int, float)):
raise TypeError("Got inappropriate fill arg")
if not isinstance(padding_mode, str):
raise TypeError("Got inappropriate padding_mode arg")
if isinstance(padding, tuple):
padding = list(padding)
if isinstance(padding, list) and len(padding) not in [1, 2, 4]:
raise ValueError(
"Padding must be an int or a 1, 2, or 4 element tuple, not a " +
"{} element tuple".format(len(padding)))
if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
raise ValueError(
"Padding mode should be either constant, edge, reflect or symmetric"
)
if isinstance(padding, int):
pad_left = pad_right = pad_top = pad_bottom = padding
elif len(padding) == 1:
pad_left = pad_right = pad_top = pad_bottom = padding[0]
elif len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
else:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]
p = [pad_left, pad_right, pad_top, pad_bottom]
if padding_mode == "edge":
# remap padding_mode str
padding_mode = "replicate"
elif padding_mode == "symmetric":
# route to another implementation
return _pad_symmetric(img, p)
need_squeeze = False
if img.ndim < 4:
img = img.unsqueeze(dim=0)
need_squeeze = True
out_dtype = img.dtype
need_cast = False
if (padding_mode != "constant") and img.dtype not in (paddle.float32,
paddle.float64):
# Here we temporary cast input tensor to float
need_cast = True
img = img.as_type(paddle.float32)
img = paddle_pad(img, p, mode=padding_mode, value=float(fill))
if need_squeeze:
img = img.squeeze(axis=0)
if need_cast:
img = img.as_type(out_dtype)
return img
def resize(img: Tensor,
size: List[int],
interpolation: str="bilinear",
max_size: Optional[int]=None,
antialias: Optional[bool]=None) -> Tensor:
_assert_image_tensor(img)
if not isinstance(size, (int, tuple, list)):
raise TypeError("Got inappropriate size arg")
if not isinstance(interpolation, str):
raise TypeError("Got inappropriate interpolation arg")
if interpolation not in ["nearest", "bilinear", "bicubic"]:
raise ValueError(
"This interpolation mode is unsupported with Tensor input")
if isinstance(size, tuple):
size = list(size)
if isinstance(size, list):
if len(size) not in [1, 2]:
raise ValueError(
"Size must be an int or a 1 or 2 element tuple/list, not a "
"{} element tuple/list".format(len(size)))
if max_size is not None and len(size) != 1:
raise ValueError(
"max_size should only be passed if size specifies the length of the smaller edge."
)
if antialias is None:
antialias = False
if antialias and interpolation not in ["bilinear", "bicubic"]:
raise ValueError(
"Antialias option is supported for bilinear and bicubic interpolation modes only"
)
w, h = _get_image_size(img)
if isinstance(size, int) or len(
size) == 1: # specified size only for the smallest edge
short, long = (w, h) if w <= h else (h, w)
requested_new_short = size if isinstance(size, int) else size[0]
if short == requested_new_short:
return img
new_short, new_long = requested_new_short, int(requested_new_short *
long / short)
if max_size is not None:
if max_size <= requested_new_short:
raise ValueError(
f"max_size = {max_size} must be strictly greater than the requested "
f"size for the smaller edge size = {size}")
if new_long > max_size:
new_short, new_long = int(max_size * new_short /
new_long), max_size
new_w, new_h = (new_short, new_long) if w <= h else (new_long,
new_short)
else: # specified both h and w
new_w, new_h = size[1], size[0]
img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(
img, [paddle.float32, paddle.float64])
# Define align_corners to avoid warnings
align_corners = False if interpolation in ["bilinear", "bicubic"] else None
img = interpolate(
img,
size=[new_h, new_w],
mode=interpolation,
align_corners=align_corners)
if interpolation == "bicubic" and out_dtype == paddle.uint8:
img = img.clamp(min=0, max=255)
img = _cast_squeeze_out(
img,
need_cast=need_cast,
need_squeeze=need_squeeze,
out_dtype=out_dtype)
return img
import math
import numbers
import warnings
from collections.abc import Sequence
from typing import Tuple, List
import paddle
from paddle import Tensor
try:
import accimage
except ImportError:
accimage = None
from . import functional as F
from .functional import InterpolationMode, _interpolation_modes_from_int
__all__ = [
"Compose", "ToTensor", "Normalize", "Resize", "CenterCrop",
"RandomResizedCrop"
]
class Compose:
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
Example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
class ToTensor:
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a paddle tensor of shape (C x H x W) in the range [0.0, 1.0]
if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
or if the numpy.ndarray has dtype = np.uint8
In the other cases, tensors are returned without scaling.
.. note::
Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
transforming target image masks. See the `references`_ for implementing the transforms for image masks.
"""
def __call__(self, pic):
"""
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
return F.to_tensor(pic)
def __repr__(self):
return self.__class__.__name__ + '()'
class Normalize(paddle.nn.Layer):
"""Normalize a tensor image with mean and standard deviation.
This transform does not support PIL Image.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``paddle.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
def __init__(self, mean, std, inplace=False):
super().__init__()
self.mean = mean
self.std = std
self.inplace = inplace
def forward(self, tensor: Tensor) -> Tensor:
"""
Args:
tensor (Tensor): Tensor image to be normalized.
Returns:
Tensor: Normalized Tensor image.
"""
return F.normalize(tensor, self.mean, self.std, self.inplace)
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(
self.mean, self.std)
class Resize(paddle.nn.Layer):
"""Resize the input image to the given size.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
.. warning::
The output image might be different depending on its type: when downsampling, the interpolation of PIL images
and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
closer.
Args:
size (sequence or int): Desired output size. If size is a sequence like
(h, w), output size will be matched to this. If size is an int,
smaller edge of the image will be matched to this number.
i.e, if height > width, then image will be rescaled to
(size * height / width, size).
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`paddlevision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
``InterpolationMode.BICUBIC`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
max_size (int, optional): The maximum allowed for the longer edge of
the resized image: if the longer edge of the image is greater
than ``max_size`` after being resized according to ``size``, then
the image is resized again so that the longer edge is equal to
``max_size``. As a result, ``size`` might be overruled, i.e the
smaller edge may be shorter than ``size``.
antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
closer.
.. warning::
There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.
"""
def __init__(self,
size,
interpolation=InterpolationMode.BILINEAR,
max_size=None,
antialias=None):
super().__init__()
if not isinstance(size, (int, Sequence)):
raise TypeError("Size should be int or sequence. Got {}".format(
type(size)))
if isinstance(size, Sequence) and len(size) not in (1, 2):
raise ValueError(
"If size is a sequence, it should have 1 or 2 values")
self.size = size
self.max_size = max_size
# Backward compatibility with integer value
if isinstance(interpolation, int):
warnings.warn(
"Argument interpolation should be of type InterpolationMode instead of int. "
"Please, use InterpolationMode enum.")
interpolation = _interpolation_modes_from_int(interpolation)
self.interpolation = interpolation
self.antialias = antialias
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be scaled.
Returns:
PIL Image or Tensor: Rescaled image.
"""
return F.resize(img, self.size, self.interpolation, self.max_size,
self.antialias)
def __repr__(self):
interpolate_str = self.interpolation.value
return self.__class__.__name__ + '(size={0}, interpolation={1}, max_size={2}, antialias={3})'.format(
self.size, interpolate_str, self.max_size, self.antialias)
class CenterCrop(paddle.nn.Layer):
"""Crops the given image at the center.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
"""
def __init__(self, size):
super().__init__()
self.size = _setup_size(
size,
error_msg="Please provide only two dimensions (h, w) for size.")
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be cropped.
Returns:
PIL Image or Tensor: Cropped image.
"""
return F.center_crop(img, self.size)
def __repr__(self):
return self.__class__.__name__ + '(size={0})'.format(self.size)
class RandomResizedCrop(paddle.nn.Layer):
"""Crop a random portion of image and resize it to a given size.
If the image is paddle Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
A crop of the original image is made: the crop has a random area (H * W)
and a random aspect ratio. This crop is finally resized to the given
size. This is popularly used to train the Inception networks.
Args:
size (int or sequence): expected output size of the crop, for each edge. If size is an
int instead of sequence like (h, w), a square output size ``(size, size)`` is
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
before resizing. The scale is defined with respect to the area of the original image.
ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
resizing.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`paddlevision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
``InterpolationMode.BICUBIC`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
"""
def __init__(self,
size,
scale=(0.08, 1.0),
ratio=(3. / 4., 4. / 3.),
interpolation=InterpolationMode.BILINEAR):
super().__init__()
self.size = _setup_size(
size,
error_msg="Please provide only two dimensions (h, w) for size.")
if not isinstance(scale, Sequence):
raise TypeError("Scale should be a sequence")
if not isinstance(ratio, Sequence):
raise TypeError("Ratio should be a sequence")
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
warnings.warn("Scale and ratio should be of kind (min, max)")
# Backward compatibility with integer value
if isinstance(interpolation, int):
warnings.warn(
"Argument interpolation should be of type InterpolationMode instead of int. "
"Please, use InterpolationMode enum.")
interpolation = _interpolation_modes_from_int(interpolation)
self.interpolation = interpolation
self.scale = scale
self.ratio = ratio
@staticmethod
def get_params(img: Tensor, scale: List[float],
ratio: List[float]) -> Tuple[int, int, int, int]:
"""Get parameters for ``crop`` for a random sized crop.
Args:
img (PIL Image or Tensor): Input image.
scale (list): range of scale of the origin size cropped
ratio (list): range of aspect ratio of the origin aspect ratio cropped
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
sized crop.
"""
width, height = F._get_image_size(img)
area = height * width
log_ratio = paddle.log(paddle.to_tensor(ratio))
for _ in range(10):
target_area = area * paddle.uniform(
shape=[1], min=scale[0], max=scale[1]).numpy().item()
aspect_ratio = paddle.exp(
paddle.uniform(
shape=[1], min=log_ratio[0], max=log_ratio[1])).numpy(
).item()
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < w <= width and 0 < h <= height:
i = paddle.randint(
0, height - h + 1, shape=(1, )).numpy().item()
j = paddle.randint(
0, width - w + 1, shape=(1, )).numpy().item()
return i, j, h, w
# Fallback to central crop
in_ratio = float(width) / float(height)
if in_ratio < min(ratio):
w = width
h = int(round(w / min(ratio)))
elif in_ratio > max(ratio):
h = height
w = int(round(h * max(ratio)))
else: # whole image
w = width
h = height
i = (height - h) // 2
j = (width - w) // 2
return i, j, h, w
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be cropped and resized.
Returns:
PIL Image or Tensor: Randomly cropped and resized image.
"""
i, j, h, w = self.get_params(img, self.scale, self.ratio)
return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
def __repr__(self):
interpolate_str = self.interpolation.value
format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
format_string += ', scale={0}'.format(
tuple(round(s, 4) for s in self.scale))
format_string += ', ratio={0}'.format(
tuple(round(r, 4) for r in self.ratio))
format_string += ', interpolation={0})'.format(interpolate_str)
return format_string
def _setup_size(size, error_msg):
if isinstance(size, numbers.Number):
return int(size), int(size)
if isinstance(size, Sequence) and len(size) == 1:
return size[0], size[0]
if len(size) != 2:
raise ValueError(error_msg)
return size
import os
import sys
sys.path.append(os.path.abspath(os.path.join(__file__, '../')))
from paddlevision.transforms import autoaugment, transforms
class ClassificationPresetTrain:
def __init__(self,
crop_size,
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
hflip_prob=0.5,
auto_augment_policy=None,
random_erase_prob=0.0):
trans = [transforms.RandomResizedCrop(crop_size)]
# if hflip_prob > 0:
# trans.append(transforms.RandomHorizontalFlip(hflip_prob))
if auto_augment_policy is not None:
aa_policy = autoaugment.AutoAugmentPolicy(auto_augment_policy)
trans.append(autoaugment.AutoAugment(policy=aa_policy))
trans.extend([
transforms.ToTensor(),
transforms.Normalize(
mean=mean, std=std),
])
# if random_erase_prob > 0:
# trans.append(transforms.RandomErasing(p=random_erase_prob))
self.transforms = transforms.Compose(trans)
def __call__(self, img):
return self.transforms(img)
class ClassificationPresetEval:
def __init__(self,
crop_size,
resize_size=256,
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225)):
self.transforms = transforms.Compose([
transforms.Resize(resize_size),
transforms.CenterCrop(crop_size),
transforms.ToTensor(),
transforms.Normalize(
mean=mean, std=std),
])
def __call__(self, img):
return self.transforms(img)
import datetime
import os
import sys
import time
import paddle
from paddle import nn
import paddlevision
import presets
import utils
import numpy as np
import random
apex = None
import numpy as np
from reprod_log import ReprodLogger
def train_one_epoch(
model,
criterion,
optimizer,
data_loader,
device,
epoch,
print_freq, ):
model.train()
# training log
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
acc1 = 0.0
acc5 = 0.0
reader_start = time.time()
batch_past = 0
for batch_idx, (image, target) in enumerate(data_loader):
train_reader_cost += time.time() - reader_start
train_start = time.time()
output = model(image)
loss = criterion(output, target)
loss.backward()
optimizer.step()
optimizer.clear_grad()
train_run_cost += time.time() - train_start
acc = utils.accuracy(output, target, topk=(1, 5))
acc1 += acc[0].item()
acc5 += acc[1].item()
total_samples += image.shape[0]
batch_past += 1
if batch_idx > 0 and batch_idx % print_freq == 0:
msg = "[Epoch {}, iter: {}] top1: {:.5f}, top5: {:.5f}, lr: {:.5f}, loss: {:.5f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {}, avg_ips: {:.5f} images/sec.".format(
epoch, batch_idx, acc1 / batch_past, acc5 / batch_past,
optimizer.get_lr(),
loss.item(), train_reader_cost / batch_past,
(train_reader_cost + train_run_cost) / batch_past,
total_samples / batch_past,
total_samples / (train_reader_cost + train_run_cost))
if paddle.distributed.get_rank() <= 0:
print(msg)
sys.stdout.flush()
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
acc1 = 0.0
acc5 = 0.0
batch_past = 0
reader_start = time.time()
def evaluate(model, criterion, data_loader, device, print_freq=100):
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
with paddle.no_grad():
for image, target in metric_logger.log_every(data_loader, print_freq,
header):
output = model(image)
loss = criterion(output, target)
acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
# FIXME need to take into account that the datasets
# could have been padded in distributed setup
batch_size = image.shape[0]
metric_logger.update(loss=loss.item())
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'.format(
top1=metric_logger.acc1, top5=metric_logger.acc5))
return metric_logger.acc1.global_avg
def load_data(traindir, valdir, args):
# Data loading code
print("Loading data")
resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (
256, 224)
print("Loading training data")
st = time.time()
auto_augment_policy = getattr(args, "auto_augment", None)
random_erase_prob = getattr(args, "random_erase", 0.0)
dataset = paddlevision.datasets.ImageFolder(
traindir,
presets.ClassificationPresetTrain(
crop_size=crop_size,
auto_augment_policy=auto_augment_policy,
random_erase_prob=random_erase_prob))
print("Took", time.time() - st)
print("Loading validation data")
dataset_test = paddlevision.datasets.ImageFolder(
valdir,
presets.ClassificationPresetEval(
crop_size=crop_size, resize_size=resize_size))
print("Creating data loaders")
train_sampler = paddle.io.DistributedBatchSampler(
dataset=dataset,
batch_size=args.batch_size,
shuffle=True,
drop_last=False)
test_sampler = paddle.io.SequenceSampler(dataset_test)
return dataset, dataset_test, train_sampler, test_sampler
def main(args):
if args.output_dir:
utils.mkdir(args.output_dir)
print(args)
device = paddle.set_device(args.device)
# multi cards
if paddle.distributed.get_world_size() > 1:
paddle.distributed.init_parallel_env()
train_dir = os.path.join(args.data_path, 'train')
val_dir = os.path.join(args.data_path, 'val')
dataset, dataset_test, train_sampler, test_sampler = load_data(
train_dir, val_dir, args)
train_batch_sampler = train_sampler
data_loader = paddle.io.DataLoader(
dataset=dataset,
num_workers=args.workers,
return_list=True,
batch_sampler=train_batch_sampler)
test_batch_sampler = paddle.io.BatchSampler(
sampler=test_sampler, batch_size=args.batch_size)
data_loader_test = paddle.io.DataLoader(
dataset_test,
batch_sampler=test_batch_sampler,
num_workers=args.workers)
print("Creating model")
model = paddlevision.models.__dict__[args.model](
pretrained=args.pretrained)
criterion = nn.CrossEntropyLoss()
lr_scheduler = paddle.optimizer.lr.StepDecay(
args.lr, step_size=args.lr_step_size, gamma=args.lr_gamma)
opt_name = args.opt.lower()
if opt_name == 'sgd':
optimizer = paddle.optimizer.Momentum(
learning_rate=lr_scheduler,
momentum=args.momentum,
parameters=model.parameters(),
weight_decay=args.weight_decay)
elif opt_name == 'rmsprop':
optimizer = paddle.optimizer.RMSprop(
learning_rate=lr_scheduler,
momentum=args.momentum,
parameters=model.parameters(),
weight_decay=args.weight_decay,
eps=0.0316,
alpha=0.9)
else:
raise RuntimeError(
"Invalid optimizer {}. Only SGD and RMSprop are supported.".format(
args.opt))
if args.resume:
layer_state_dict = paddle.load(os.path.join(args.resume, '.pdparams'))
model.set_state_dict(layer_state_dict)
opt_state_dict = paddle.load(os.path.join(args.resume, '.pdopt'))
optimizer.load_state_dict(opt_state_dict)
# multi cards
if paddle.distributed.get_world_size() > 1:
model = paddle.DataParallel(model)
if args.test_only and paddle.distributed.get_rank() == 0:
top1 = evaluate(model, criterion, data_loader_test, device=device)
return top1
print("Start training")
start_time = time.time()
best_top1 = 0.0
for epoch in range(args.start_epoch, args.epochs):
train_one_epoch(model, criterion, optimizer, data_loader, device,
epoch, args.print_freq)
lr_scheduler.step()
if paddle.distributed.get_rank() == 0:
top1 = evaluate(model, criterion, data_loader_test, device=device)
best_top1 = max(best_top1, top1)
if args.output_dir:
paddle.save(model.state_dict(),
os.path.join(args.output_dir,
'model_{}.pdparams'.format(epoch)))
paddle.save(optimizer.state_dict(),
os.path.join(args.output_dir,
'model_{}.pdopt'.format(epoch)))
paddle.save(model.state_dict(),
os.path.join(args.output_dir, 'latest.pdparams'))
paddle.save(optimizer.state_dict(),
os.path.join(args.output_dir, 'latest.pdopt'))
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
return best_top1
def get_args_parser(add_help=True):
import argparse
parser = argparse.ArgumentParser(
description='PaddlePaddle Classification Training', add_help=add_help)
parser.add_argument('--data-path', default='../data', help='dataset')
parser.add_argument('--model', default='alexnet', help='model')
parser.add_argument('--device', default='gpu', help='device')
parser.add_argument('-b', '--batch-size', default=32, type=int)
parser.add_argument(
'--epochs',
default=90,
type=int,
metavar='N',
help='number of total epochs to run')
parser.add_argument(
'-j',
'--workers',
default=8,
type=int,
metavar='N',
help='number of data loading workers (default: 16)')
parser.add_argument('--opt', default='sgd', type=str, help='optimizer')
parser.add_argument(
'--lr', default=0.00125, type=float, help='initial learning rate')
parser.add_argument(
'--momentum', default=0.9, type=float, metavar='M', help='momentum')
parser.add_argument(
'--wd',
'--weight-decay',
default=1e-4,
type=float,
metavar='W',
help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument(
'--lr-step-size',
default=30,
type=int,
help='decrease lr every step-size epochs')
parser.add_argument(
'--lr-gamma',
default=0.1,
type=float,
help='decrease lr by a factor of lr-gamma')
parser.add_argument(
'--print-freq', default=10, type=int, help='print frequency')
parser.add_argument('--output-dir', default='.', help='path where to save')
parser.add_argument('--resume', default='', help='resume from checkpoint')
parser.add_argument(
'--start-epoch', default=0, type=int, metavar='N', help='start epoch')
parser.add_argument(
"--sync-bn",
dest="sync_bn",
help="Use sync batch norm",
action="store_true", )
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true", )
parser.add_argument(
"--pretrained",
dest="pretrained",
help="Use pre-trained models from the modelzoo")
parser.add_argument(
'--auto-augment',
default=None,
help='auto augment policy (default: None)')
parser.add_argument(
'--random-erase',
default=0.0,
type=float,
help='random erasing probability (default: 0.0)')
# Mixed precision training parameters
parser.add_argument(
'--apex',
action='store_true',
help='Use apex for mixed precision training')
parser.add_argument(
'--apex-opt-level',
default='O1',
type=str,
help='For apex mixed precision training'
'O0 for FP32 training, O1 for mixed precision training.'
'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet'
)
return parser
if __name__ == "__main__":
args = get_args_parser().parse_args()
top1 = main(args)
if paddle.distributed.get_rank() == 0:
reprod_logger = ReprodLogger()
reprod_logger.add("top1", np.array([top1]))
reprod_logger.save("train_align_paddle.npy")
from collections import defaultdict, deque, OrderedDict
import copy
import datetime
import hashlib
import time
import paddle
import paddle.distributed as dist
import errno
import os
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{median:.4f} ({global_avg:.4f})"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
def synchronize_between_processes(self):
"""
Warning: does not synchronize the deque!
"""
t = paddle.to_tensor([self.count, self.total], dtype='float64')
t = t.numpy().tolist()
self.count = int(t[0])
self.total = t[1]
@property
def median(self):
d = paddle.to_tensor(list(self.deque))
return d.median().numpy().item()
@property
def avg(self):
d = paddle.to_tensor(list(self.deque), dtype='float32')
return d.mean().numpy().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value)
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, paddle.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, attr))
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append("{}: {}".format(name, str(meter)))
return self.delimiter.join(loss_str)
def synchronize_between_processes(self):
for meter in self.meters.values():
meter.synchronize_between_processes()
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, print_freq, header=None):
i = 0
if not header:
header = ''
start_time = time.time()
end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}')
data_time = SmoothedValue(fmt='{avg:.4f}')
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
log_msg = self.delimiter.join([
header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}',
'time: {time}', 'data: {data}'
])
for obj in iterable:
data_time.update(time.time() - end)
yield obj
iter_time.update(time.time() - end)
if i % print_freq == 0:
eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
print(
log_msg.format(
i,
len(iterable),
eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time)))
i += 1
end = time.time()
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {}'.format(header, total_time_str))
def accuracy(output, target, topk=(1, )):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with paddle.no_grad():
maxk = max(topk)
batch_size = target.shape[0]
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.equal(target)
res = []
for k in topk:
correct_k = correct.astype(paddle.int32)[:k].flatten().sum(
dtype='float32')
res.append(correct_k / batch_size)
return res
def get_world_size():
return dist.get_world_size()
def mkdir(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
from .metric import accuracy_torch
from .presets import *
import torch
def accuracy_torch(output, target, topk=(1, )):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target[None])
res = []
for k in topk:
correct_k = correct[:k].flatten().sum(dtype=torch.float32)
res.append(correct_k * (100.0 / batch_size))
return res
from torchvision.transforms import autoaugment, transforms
class ClassificationPresetTrain:
def __init__(self,
crop_size,
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
hflip_prob=0.5,
auto_augment_policy=None,
random_erase_prob=0.0):
trans = [transforms.RandomResizedCrop(crop_size)]
# if hflip_prob > 0:
# trans.append(transforms.RandomHorizontalFlip(hflip_prob))
if auto_augment_policy is not None:
aa_policy = autoaugment.AutoAugmentPolicy(auto_augment_policy)
trans.append(autoaugment.AutoAugment(policy=aa_policy))
trans.extend([
transforms.ToTensor(),
transforms.Normalize(
mean=mean, std=std),
])
# if random_erase_prob > 0:
# trans.append(transforms.RandomErasing(p=random_erase_prob))
self.transforms = transforms.Compose(trans)
def __call__(self, img):
return self.transforms(img)
class ClassificationPresetEval:
def __init__(self,
crop_size,
resize_size=256,
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225)):
self.transforms = transforms.Compose([
transforms.Resize(resize_size),
transforms.CenterCrop(crop_size),
transforms.ToTensor(),
transforms.Normalize(
mean=mean, std=std),
])
def __call__(self, img):
return self.transforms(img)
from . import datasets
from . import models
from . import transforms
import os
import importlib.machinery
def _download_file_from_remote_location(fpath: str, url: str) -> None:
pass
def _is_remote_location_available() -> bool:
return False
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
def _get_extension_path(lib_name):
lib_dir = os.path.dirname(__file__)
if os.name == 'nt':
# Register the main torchvision library location on the default DLL path
import ctypes
import sys
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
prev_error_mode = kernel32.SetErrorMode(0x0001)
if with_load_library_flags:
kernel32.AddDllDirectory.restype = ctypes.c_void_p
if sys.version_info >= (3, 8):
os.add_dll_directory(lib_dir)
elif with_load_library_flags:
res = kernel32.AddDllDirectory(lib_dir)
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += f' Error adding "{lib_dir}" to the DLL directories.'
raise err
kernel32.SetErrorMode(prev_error_mode)
loader_details = (importlib.machinery.ExtensionFileLoader,
importlib.machinery.EXTENSION_SUFFIXES)
extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
ext_specs = extfinder.find_spec(lib_name)
if ext_specs is None:
raise ImportError
return ext_specs.origin
from .folder import ImageFolder, DatasetFolder
from .vision import VisionDataset
__all__ = ('ImageFolder', 'DatasetFolder', 'VisionDataset')
from .vision import VisionDataset
from PIL import Image
import os
import os.path
from typing import Any, Callable, cast, Dict, List, Optional, Tuple
def has_file_allowed_extension(filename: str,
extensions: Tuple[str, ...]) -> bool:
"""Checks if a file is an allowed extension.
Args:
filename (string): path to a file
extensions (tuple of strings): extensions to consider (lowercase)
Returns:
bool: True if the filename ends with one of given extensions
"""
return filename.lower().endswith(extensions)
def is_image_file(filename: str) -> bool:
"""Checks if a file is an allowed image extension.
Args:
filename (string): path to a file
Returns:
bool: True if the filename ends with a known image extension
"""
return has_file_allowed_extension(filename, IMG_EXTENSIONS)
def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
"""Finds the class folders in a dataset.
See :class:`DatasetFolder` for details.
"""
classes = sorted(
entry.name for entry in os.scandir(directory) if entry.is_dir())
if not classes:
raise FileNotFoundError(
f"Couldn't find any class folder in {directory}.")
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
return classes, class_to_idx
def make_dataset(
directory: str,
class_to_idx: Optional[Dict[str, int]]=None,
extensions: Optional[Tuple[str, ...]]=None,
is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[Tuple[
str, int]]:
"""Generates a list of samples of a form (path_to_sample, class).
See :class:`DatasetFolder` for details.
Note: The class_to_idx parameter is here optional and will use the logic of the ``find_classes`` function
by default.
"""
directory = os.path.expanduser(directory)
if class_to_idx is None:
_, class_to_idx = find_classes(directory)
elif not class_to_idx:
raise ValueError(
"'class_to_index' must have at least one entry to collect any samples."
)
both_none = extensions is None and is_valid_file is None
both_something = extensions is not None and is_valid_file is not None
if both_none or both_something:
raise ValueError(
"Both extensions and is_valid_file cannot be None or not None at the same time"
)
if extensions is not None:
def is_valid_file(x: str) -> bool:
return has_file_allowed_extension(
x, cast(Tuple[str, ...], extensions))
is_valid_file = cast(Callable[[str], bool], is_valid_file)
instances = []
available_classes = set()
for target_class in sorted(class_to_idx.keys()):
class_index = class_to_idx[target_class]
target_dir = os.path.join(directory, target_class)
if not os.path.isdir(target_dir):
continue
for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
for fname in sorted(fnames):
if is_valid_file(fname):
path = os.path.join(root, fname)
item = path, class_index
instances.append(item)
if target_class not in available_classes:
available_classes.add(target_class)
return instances
class DatasetFolder(VisionDataset):
"""A generic data loader.
This default directory structure can be customized by overriding the
:meth:`find_classes` method.
Args:
root (string): Root directory path.
loader (callable): A function to load a sample given its path.
extensions (tuple[string]): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable, optional): A function/transform that takes in
a sample and returns a transformed version.
E.g, ``transforms.RandomCrop`` for images.
target_transform (callable, optional): A function/transform that takes
in the target and transforms it.
is_valid_file (callable, optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Attributes:
classes (list): List of the class names sorted alphabetically.
class_to_idx (dict): Dict with items (class_name, class_index).
samples (list): List of (sample path, class_index) tuples
targets (list): The class_index value for each image in the dataset
"""
def __init__(
self,
root: str,
loader: Callable[[str], Any],
extensions: Optional[Tuple[str, ...]]=None,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None,
is_valid_file: Optional[Callable[[str], bool]]=None, ) -> None:
super(DatasetFolder, self).__init__(
root, transform=transform, target_transform=target_transform)
classes, class_to_idx = self.find_classes(self.root)
samples = self.make_dataset(self.root, class_to_idx, extensions,
is_valid_file)
self.loader = loader
self.extensions = extensions
self.classes = classes
self.class_to_idx = class_to_idx
self.samples = samples
self.targets = [s[1] for s in samples]
@staticmethod
def make_dataset(
directory: str,
class_to_idx: Dict[str, int],
extensions: Optional[Tuple[str, ...]]=None,
is_valid_file: Optional[Callable[[str], bool]]=None, ) -> List[
Tuple[str, int]]:
"""Generates a list of samples of a form (path_to_sample, class).
This can be overridden to e.g. read files from a compressed zip file instead of from the disk.
Args:
directory (str): root dataset directory, corresponding to ``self.root``.
class_to_idx (Dict[str, int]): Dictionary mapping class name to class index.
extensions (optional): A list of allowed extensions.
Either extensions or is_valid_file should be passed. Defaults to None.
is_valid_file (optional): A function that takes path of a file
and checks if the file is a valid file
(used to check of corrupt files) both extensions and
is_valid_file should not be passed. Defaults to None.
Raises:
ValueError: In case ``class_to_idx`` is empty.
ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None.
FileNotFoundError: In case no valid file was found for any class.
Returns:
List[Tuple[str, int]]: samples of a form (path_to_sample, class)
"""
if class_to_idx is None:
# prevent potential bug since make_dataset() would use the class_to_idx logic of the
# find_classes() function, instead of using that of the find_classes() method, which
# is potentially overridden and thus could have a different logic.
raise ValueError("The class_to_idx parameter cannot be None.")
return make_dataset(
directory,
class_to_idx,
extensions=extensions,
is_valid_file=is_valid_file)
def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]:
"""Find the class folders in a dataset structured as follows::
directory/
├── class_x
│ ├── xxx.ext
│ ├── xxy.ext
│ └── ...
│ └── xxz.ext
└── class_y
├── 123.ext
├── nsdf3.ext
└── ...
└── asd932_.ext
This method can be overridden to only consider
a subset of classes, or to adapt to a different dataset directory structure.
Args:
directory(str): Root directory path, corresponding to ``self.root``
Raises:
FileNotFoundError: If ``dir`` has no class folders.
Returns:
(Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index.
"""
return find_classes(directory)
def __getitem__(self, index: int) -> Tuple[Any, Any]:
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
def __len__(self) -> int:
return len(self.samples)
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
'.tiff', '.webp')
def pil_loader(path: str) -> Image.Image:
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
# TODO: specify the return type
def accimage_loader(path: str) -> Any:
import accimage
try:
return accimage.Image(path)
except IOError:
# Potentially a decoding problem, fall back to PIL.Image
return pil_loader(path)
def default_loader(path: str) -> Any:
return pil_loader(path)
class ImageFolder(DatasetFolder):
"""A generic data loader where the images are arranged in this way by default: ::
root/dog/xxx.png
root/dog/xxy.png
root/dog/[...]/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/[...]/asd932_.png
This class inherits from :class:`~torchvision.datasets.DatasetFolder` so
the same methods can be overridden to customize the dataset.
Args:
root (string): Root directory path.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
loader (callable, optional): A function to load an image given its path.
is_valid_file (callable, optional): A function that takes path of an Image file
and check if the file is a valid file (used to check of corrupt files)
Attributes:
classes (list): List of the class names sorted alphabetically.
class_to_idx (dict): Dict with items (class_name, class_index).
imgs (list): List of (image path, class_index) tuples
"""
def __init__(
self,
root: str,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None,
loader: Callable[[str], Any]=default_loader,
is_valid_file: Optional[Callable[[str], bool]]=None, ):
super(ImageFolder, self).__init__(
root,
loader,
IMG_EXTENSIONS if is_valid_file is None else None,
transform=transform,
target_transform=target_transform,
is_valid_file=is_valid_file)
self.imgs = self.samples
import os
import torch
import torch.utils.data as data
from typing import Any, Callable, List, Optional, Tuple
class VisionDataset(data.Dataset):
"""
Base Class For making datasets which are compatible with torchvision.
It is necessary to override the ``__getitem__`` and ``__len__`` method.
Args:
root (string): Root directory of dataset.
transforms (callable, optional): A function/transforms that takes in
an image and a label and returns the transformed versions of both.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
.. note::
:attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
"""
_repr_indent = 4
def __init__(
self,
root: str,
transforms: Optional[Callable]=None,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None, ) -> None:
if isinstance(root, torch._six.string_classes):
root = os.path.expanduser(root)
self.root = root
has_transforms = transforms is not None
has_separate_transform = transform is not None or target_transform is not None
if has_transforms and has_separate_transform:
raise ValueError(
"Only transforms or transform/target_transform can "
"be passed as argument")
# for backwards-compatibility
self.transform = transform
self.target_transform = target_transform
if has_separate_transform:
transforms = StandardTransform(transform, target_transform)
self.transforms = transforms
def __getitem__(self, index: int) -> Any:
"""
Args:
index (int): Index
Returns:
(Any): Sample and meta data, optionally transformed by the respective transforms.
"""
raise NotImplementedError
def __len__(self) -> int:
raise NotImplementedError
def __repr__(self) -> str:
head = "Dataset " + self.__class__.__name__
body = ["Number of datapoints: {}".format(self.__len__())]
if self.root is not None:
body.append("Root location: {}".format(self.root))
body += self.extra_repr().splitlines()
if hasattr(self, "transforms") and self.transforms is not None:
body += [repr(self.transforms)]
lines = [head] + [" " * self._repr_indent + line for line in body]
return '\n'.join(lines)
def _format_transform_repr(self, transform: Callable,
head: str) -> List[str]:
lines = transform.__repr__().splitlines()
return (["{}{}".format(head, lines[0])] +
["{}{}".format(" " * len(head), line) for line in lines[1:]])
def extra_repr(self) -> str:
return ""
class StandardTransform(object):
def __init__(self,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None) -> None:
self.transform = transform
self.target_transform = target_transform
def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
if self.transform is not None:
input = self.transform(input)
if self.target_transform is not None:
target = self.target_transform(target)
return input, target
def _format_transform_repr(self, transform: Callable,
head: str) -> List[str]:
lines = transform.__repr__().splitlines()
return (["{}{}".format(head, lines[0])] +
["{}{}".format(" " * len(head), line) for line in lines[1:]])
def __repr__(self) -> str:
body = [self.__class__.__name__]
if self.transform is not None:
body += self._format_transform_repr(self.transform, "Transform: ")
if self.target_transform is not None:
body += self._format_transform_repr(self.target_transform,
"Target transform: ")
return '\n'.join(body)
from .mobilenet_v3_torch import mobilenet_v3_large, mobilenet_v3_small
from collections import OrderedDict
from typing import Dict, Optional
from torch import nn
class IntermediateLayerGetter(nn.ModuleDict):
"""
Module wrapper that returns intermediate layers from a model
It has a strong assumption that the modules have been registered
into the model in the same order as they are used.
This means that one should **not** reuse the same nn.Module
twice in the forward if you want this to work.
Additionally, it is only able to query submodules that are directly
assigned to the model. So if `model` is passed, `model.feature1` can
be returned, but not `model.feature1.layer2`.
Args:
model (nn.Module): model on which we will extract the features
return_layers (Dict[name, new_name]): a dict containing the names
of the modules for which the activations will be returned as
the key of the dict, and the value of the dict is the name
of the returned activation (which the user can specify).
Examples::
>>> m = torchvision.models.resnet18(pretrained=True)
>>> # extract layer1 and layer3, giving as names `feat1` and feat2`
>>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
>>> {'layer1': 'feat1', 'layer3': 'feat2'})
>>> out = new_m(torch.rand(1, 3, 224, 224))
>>> print([(k, v.shape) for k, v in out.items()])
>>> [('feat1', torch.Size([1, 64, 56, 56])),
>>> ('feat2', torch.Size([1, 256, 14, 14]))]
"""
_version = 2
__annotations__ = {"return_layers": Dict[str, str], }
def __init__(self, model: nn.Module,
return_layers: Dict[str, str]) -> None:
if not set(return_layers).issubset(
[name for name, _ in model.named_children()]):
raise ValueError("return_layers are not present in model")
orig_return_layers = return_layers
return_layers = {str(k): str(v) for k, v in return_layers.items()}
layers = OrderedDict()
for name, module in model.named_children():
layers[name] = module
if name in return_layers:
del return_layers[name]
if not return_layers:
break
super().__init__(layers)
self.return_layers = orig_return_layers
def forward(self, x):
out = OrderedDict()
for name, module in self.items():
x = module(x)
if name in self.return_layers:
out_name = self.return_layers[name]
out[out_name] = x
return out
def _make_divisible(v: float, divisor: int,
min_value: Optional[int]=None) -> int:
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
import warnings
from typing import Callable, List, Optional
import torch
from torch import Tensor
class Conv2d(torch.nn.Conv2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
"torchvision.ops.misc.Conv2d is deprecated and will be "
"removed in future versions, use torch.nn.Conv2d instead.",
FutureWarning, )
class ConvTranspose2d(torch.nn.ConvTranspose2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
"torchvision.ops.misc.ConvTranspose2d is deprecated and will be "
"removed in future versions, use torch.nn.ConvTranspose2d instead.",
FutureWarning, )
class BatchNorm2d(torch.nn.BatchNorm2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
"torchvision.ops.misc.BatchNorm2d is deprecated and will be "
"removed in future versions, use torch.nn.BatchNorm2d instead.",
FutureWarning, )
interpolate = torch.nn.functional.interpolate
# This is not in nn
class FrozenBatchNorm2d(torch.nn.Module):
"""
BatchNorm2d where the batch statistics and the affine parameters are fixed
Args:
num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)``
eps (float): a value added to the denominator for numerical stability. Default: 1e-5
"""
def __init__(
self,
num_features: int,
eps: float=1e-5,
n: Optional[int]=None, ):
# n=None for backward-compatibility
if n is not None:
warnings.warn(
"`n` argument is deprecated and has been renamed `num_features`",
DeprecationWarning)
num_features = n
super().__init__()
# _log_api_usage_once("ops", self.__class__.__name__)
self.eps = eps
self.register_buffer("weight", torch.ones(num_features))
self.register_buffer("bias", torch.zeros(num_features))
self.register_buffer("running_mean", torch.zeros(num_features))
self.register_buffer("running_var", torch.ones(num_features))
def _load_from_state_dict(
self,
state_dict: dict,
prefix: str,
local_metadata: dict,
strict: bool,
missing_keys: List[str],
unexpected_keys: List[str],
error_msgs: List[str], ):
num_batches_tracked_key = prefix + "num_batches_tracked"
if num_batches_tracked_key in state_dict:
del state_dict[num_batches_tracked_key]
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
def forward(self, x: Tensor) -> Tensor:
# move reshapes to the beginning
# to make it fuser-friendly
w = self.weight.reshape(1, -1, 1, 1)
b = self.bias.reshape(1, -1, 1, 1)
rv = self.running_var.reshape(1, -1, 1, 1)
rm = self.running_mean.reshape(1, -1, 1, 1)
scale = w * (rv + self.eps).rsqrt()
bias = b - rm * scale
return x * scale + bias
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})"
class ConvNormActivation(torch.nn.Sequential):
"""
Configurable block used for Convolution-Normalzation-Activation blocks.
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block
kernel_size: (int, optional): Size of the convolving kernel. Default: 3
stride (int, optional): Stride of the convolution. Default: 1
padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d``
activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
dilation (int): Spacing between kernel elements. Default: 1
inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int=3,
stride: int=1,
padding: Optional[int]=None,
groups: int=1,
norm_layer: Optional[Callable[
..., torch.nn.Module]]=torch.nn.BatchNorm2d,
activation_layer: Optional[Callable[
..., torch.nn.Module]]=torch.nn.ReLU,
dilation: int=1,
inplace: bool=True,
bias: Optional[bool]=None, ) -> None:
if padding is None:
padding = (kernel_size - 1) // 2 * dilation
if bias is None:
bias = norm_layer is None
layers = [
torch.nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=dilation,
groups=groups,
bias=bias, )
]
if norm_layer is not None:
layers.append(norm_layer(out_channels))
if activation_layer is not None:
layers.append(activation_layer(inplace=inplace))
super().__init__(*layers)
# _log_api_usage_once("ops", self.__class__.__name__)
self.out_channels = out_channels
class SqueezeExcitation(torch.nn.Module):
"""
This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3.
Args:
input_channels (int): Number of channels in the input image
squeeze_channels (int): Number of squeeze channels
activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU``
scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid``
"""
def __init__(
self,
input_channels: int,
squeeze_channels: int,
activation: Callable[..., torch.nn.Module]=torch.nn.ReLU,
scale_activation: Callable[..., torch.nn.Module]=torch.nn.Sigmoid,
) -> None:
super().__init__()
# _log_api_usage_once("ops", self.__class__.__name__)
self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1)
self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1)
self.activation = activation()
self.scale_activation = scale_activation()
def _scale(self, input: Tensor) -> Tensor:
scale = self.avgpool(input)
scale = self.fc1(scale)
scale = self.activation(scale)
scale = self.fc2(scale)
return self.scale_activation(scale)
def forward(self, input: Tensor) -> Tensor:
scale = self._scale(input)
return scale * input
reprod-log
\ No newline at end of file
[2021/12/22 20:08:46] root INFO: acc_top1:
[2021/12/22 20:08:46] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/22 20:08:46] root INFO: acc_top5:
[2021/12/22 20:08:46] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/22 20:08:46] root INFO: diff check passed
[2021/12/23 17:49:27] root INFO: loss_0:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: False, value: 1.9073486328125e-06
[2021/12/23 17:49:27] root INFO: lr_0:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:49:27] root INFO: loss_1:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: False, value: 2.384185791015625e-06
[2021/12/23 17:49:27] root INFO: lr_1:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:49:27] root INFO: loss_2:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: False, value: 7.62939453125e-06
[2021/12/23 17:49:27] root INFO: lr_2:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:49:27] root INFO: loss_3:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: False, value: 0.002070903778076172
[2021/12/23 17:49:27] root INFO: lr_3:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:49:27] root INFO: loss_4:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: False, value: 0.002232074737548828
[2021/12/23 17:49:27] root INFO: lr_4:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:49:27] root INFO: loss_5:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: False, value: 0.03954291343688965
[2021/12/23 17:49:27] root INFO: lr_5:
[2021/12/23 17:49:27] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:49:27] root INFO: diff check failed
[2021/12/23 17:21:22] root INFO: length:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_0:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_1:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_2:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: dataloader_3:
[2021/12/23 17:21:22] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:21:22] root INFO: diff check passed
[2021/12/23 17:44:09] root INFO: logits:
[2021/12/23 17:44:09] root INFO: mean diff: check passed: False, value: 2.308018565599923e-06
[2021/12/23 17:44:09] root INFO: diff check failed
[2021/12/23 17:46:12] root INFO: loss:
[2021/12/23 17:46:12] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:46:12] root INFO: diff check passed
[2021/12/23 17:45:32] root INFO: acc_top1:
[2021/12/23 17:45:32] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:45:32] root INFO: acc_top5:
[2021/12/23 17:45:32] root INFO: mean diff: check passed: True, value: 0.0
[2021/12/23 17:45:32] root INFO: diff check passed
import numpy as np
def gen_fake_data():
fake_data = np.random.rand(1, 3, 224, 224).astype(np.float32) - 0.5
fake_label = np.arange(1).astype(np.int64)
np.save("fake_data.npy", fake_data)
np.save("fake_label.npy", fake_label)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册