提交 2b7cbf12 编写于 作者: A Aston Zhang

num_workers=4 for windows

上级 9e8ce8ba
......@@ -10,7 +10,6 @@ import gluonbook as gb
from mxnet import gluon, image, init, nd
from mxnet.gluon import data as gdata, loss as gloss, model_zoo, nn
import numpy as np
import sys
```
## 转置卷积层
......@@ -161,13 +160,12 @@ for i, cm in enumerate(gb.VOC_COLORMAP):
colormap2label[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i
voc_dir = gb.download_voc_pascal(data_dir='../data')
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(
gb.VOCSegDataset(True, crop_size, voc_dir, colormap2label), batch_size,
shuffle=True, last_batch='discard', num_workers=num_workers)
shuffle=True, last_batch='discard', num_workers=4)
test_iter = gdata.DataLoader(
gb.VOCSegDataset(False, crop_size, voc_dir, colormap2label), batch_size,
last_batch='discard', num_workers=num_workers)
last_batch='discard', num_workers=4)
```
## 训练
......
......@@ -10,7 +10,6 @@ import gluonbook as gb
import mxnet as mx
from mxnet import autograd, gluon, image, init, nd
from mxnet.gluon import data as gdata, loss as gloss, utils as gutils
import sys
from time import time
```
......@@ -125,11 +124,10 @@ no_aug = gdata.vision.transforms.Compose([
接下来我们定义一个辅助函数来方便读取图像并应用图像增广。Gluon的数据集提供的`transform_first`函数将图像增广应用在每个训练样本(图像和标签)的第一个元素,即图像之上。有关`DataLoader`的详细介绍,可参考更早的[“图像分类数据集(Fashion-MNIST)”](../chapter_deep-learning-basics/fashion-mnist.md)一节。
```{.python .input n=34}
num_workers = 0 if sys.platform.startswith('win32') else 4
def load_cifar10(is_train, augs, batch_size):
return gdata.DataLoader(
gdata.vision.CIFAR10(train=is_train).transform_first(augs),
batch_size=batch_size, shuffle=is_train, num_workers=num_workers)
batch_size=batch_size, shuffle=is_train, num_workers=4)
```
### 使用多GPU训练模型
......
......@@ -23,7 +23,6 @@ import gluonbook as gb
from mxnet import gluon, image, nd
from mxnet.gluon import data as gdata, utils as gutils
import os
import sys
import tarfile
```
......@@ -177,11 +176,10 @@ voc_test = VOCSegDataset(False, crop_size, voc_dir, colormap2label)
```{.python .input n=11}
batch_size = 64
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(voc_train, batch_size, shuffle=True,
last_batch='discard', num_workers=num_workers)
last_batch='discard', num_workers=4)
test_iter = gdata.DataLoader(voc_test, batch_size, last_batch='discard',
num_workers=num_workers)
num_workers=4)
```
打印第一个小批量的形状。不同于图像分类和目标识别,这里的标签是一个三维的数组。
......
......@@ -57,7 +57,6 @@ import gluonbook as gb
from mxnet import gluon, init, nd
from mxnet.gluon import data as gdata, nn
import os
import sys
net = nn.Sequential()
# 使用较大的 11 x 11 窗口来捕获物体。同时使用步幅 4 来较大减小输出高和宽。
......@@ -106,13 +105,10 @@ def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join(
transformer = gdata.vision.transforms.Compose(transformer)
mnist_train = gdata.vision.FashionMNIST(root=root, train=True)
mnist_test = gdata.vision.FashionMNIST(root=root, train=False)
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(
mnist_train.transform_first(transformer), batch_size, shuffle=True,
num_workers=num_workers)
test_iter = gdata.DataLoader(
mnist_test.transform_first(transformer), batch_size, shuffle=False,
num_workers=num_workers)
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
batch_size, shuffle=True, num_workers=4)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
batch_size, shuffle=False, num_workers=4)
return train_iter, test_iter
batch_size = 128
......
......@@ -10,7 +10,6 @@
%matplotlib inline
import gluonbook as gb
from mxnet.gluon import data as gdata
import sys
import time
```
......@@ -81,24 +80,17 @@ show_fashion_mnist(X, get_fashion_mnist_labels(y))
我们将在训练数据集上训练模型,并将训练好的模型在测试数据集上评价模型的表现。虽然我们可以像[“线性回归的从零开始实现”](linear-regression-scratch.md)一节中那样通过`yield`来定义读取小批量数据样本的函数,但为了代码简洁,这里我们直接创建`DataLoader`实例。该实例每次读取一个样本数为`batch_size`的小批量数据。这里的批量大小`batch_size`是一个超参数。
在实践中,数据读取经常是训练的性能瓶颈,特别当模型较简单或者计算硬件性能较高时。Gluon的`DataLoader`中一个很方便的功能是允许使用多进程来加速数据读取(暂不支持Windows操作系统)。这里我们通过参数`num_workers`来设置4个进程读取数据。
在实践中,数据读取经常是训练的性能瓶颈,特别当模型较简单或者计算硬件性能较高时。Gluon的`DataLoader`中一个很方便的功能是允许使用多进程来加速数据读取。这里我们通过参数`num_workers`来设置4个进程读取数据。
此外,我们通过`ToTensor`类将图像数据从uint8格式变换成32位浮点数格式,并除以255使得所有像素的数值均在0到1之间。`ToTensor`类还将图像通道从最后一维移到最前一维来方便之后介绍的卷积神经网络计算。通过数据集的`transform_first`函数,我们将`ToTensor`的变换应用在每个数据样本(图像和标签)的第一个元素,即图像之上。
```{.python .input n=28}
batch_size = 256
transformer = gdata.vision.transforms.ToTensor()
if sys.platform.startswith('win'):
num_workers = 0 # 0 表示不用额外的进程来加速读取数据。
else:
num_workers = 4
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
batch_size, shuffle=True,
num_workers=num_workers)
batch_size, shuffle=True, num_workers=4)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
batch_size, shuffle=False,
num_workers=num_workers)
batch_size, shuffle=False, num_workers=4)
```
我们将获取并读取Fashion-MNIST数据集的逻辑封装在`gluonbook.load_data_fashion_mnist`函数中供后面章节调用。该函数将返回`train_iter``test_iter`两个变量。随着本书内容的不断深入,我们会进一步改进该函数。它的完整实现将在[“深度卷积神经网络(AlexNet)”](../chapter_convolutional-neural-networks/alexnet.md)一节中描述。
......@@ -120,7 +112,7 @@ for X, y in train_iter:
## 练习
* 减小`batch_size`(例如到1)会影响读取性能吗?
* 非Windows用户请尝试修改`num_workers`查看它对读取性能的影响。
* 修改`num_workers`查看它对读取性能的影响。
* 查看MXNet文档,`gdata.vision`里还提供了哪些别的数据集?
* 查看MXNet文档,`gdata.vision.transforms`还提供了哪些别的变换方法?
......
......@@ -182,10 +182,9 @@ def batchify(data):
```{.python .input n=14}
batch_size = 512
num_workers = 0 if sys.platform.startswith('win32') else 4
dataset = gdata.ArrayDataset(all_centers, all_contexts, all_negatives)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True,
batchify_fn=batchify, num_workers=num_workers)
batchify_fn=batchify, num_workers=4)
for batch in data_iter:
for name, data in zip(['centers', 'contexts_negatives', 'masks',
'labels'], batch):
......
......@@ -2,7 +2,6 @@ import collections
import math
import os
import random
import sys
import tarfile
import time
import zipfile
......@@ -235,14 +234,11 @@ def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join(
mnist_train = gdata.vision.FashionMNIST(root=root, train=True)
mnist_test = gdata.vision.FashionMNIST(root=root, train=False)
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
batch_size, shuffle=True,
num_workers=num_workers)
batch_size, shuffle=True, num_workers=4)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
batch_size, shuffle=False,
num_workers=num_workers)
batch_size, shuffle=False, num_workers=4)
return train_iter, test_iter
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册