update utils

3d677233 · Aston Zhang · 59d002f6 · 3d677233 · 3d677233
隐藏空白更改
内联并排

Showing with 21 addition and 21 deletion

chapter_computer-vision/image-augmentation.md chapter_computer-vision/image-augmentation.md +20 -20

gluonbook/utils.py gluonbook/utils.py +1 -1

未找到文件。
--- a/chapter_computer-vision/image-augmentation.md
+++ b/chapter_computer-vision/image-augmentation.md
@@ -4,7 +4,7 @@

 首先，导入本节实验所需的包或模块。

-```{.python .input  n=1}
+```{.python .input  n=21}
 %matplotlib inline
 import sys
 sys.path.insert(0, '..')
@@ -19,7 +19,7 @@ from time import time

 我们先读取一张$400\times 500$的图片作为样例。

-```{.python .input  n=2}
+```{.python .input  n=22}
 gb.set_figsize()
 img = image.imread('../img/cat1.jpg')
 gb.plt.imshow(img.asnumpy())
@@ -27,7 +27,7 @@ gb.plt.imshow(img.asnumpy())

 下面定义绘图函数`show_images`。该函数也被定义在`gluonbook`包中供后面章节调用。

-```{.python .input}
+```{.python .input  n=23}
 def show_images(imgs, num_rows, num_cols, scale=2):                                                                              
    """Plot a list of images."""
    figsize = (num_cols * scale, num_rows * scale)
@@ -42,7 +42,7 @@ def show_images(imgs, num_rows, num_cols, scale=2):

 因为大部分的增广方法都有一定的随机性。接下来我们定义一个辅助函数，它对输入图片`img`运行多次增广方法`aug`并显示所有结果。

-```{.python .input  n=3}
+```{.python .input  n=24}
 def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):
    Y = [aug(img) for _ in range(num_rows * num_cols)]
    show_images(Y, num_rows, num_cols, scale)
@@ -52,13 +52,13 @@ def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):

 左右翻转图片通常不物体的类别，它是最早也是最广泛使用的一种增广。下面我们使用transform模块里的`RandomFlipLeftRight`类来实现按0.5的概率左右翻转图片：

-```{.python .input  n=4}
+```{.python .input  n=25}
 apply(img, gdata.vision.transforms.RandomFlipLeftRight())
 ```

 上下翻转不如水平翻转通用，但是至少对于样例图片，上下翻转不会造成识别障碍。

-```{.python .input  n=5}
+```{.python .input  n=26}
 apply(img, gdata.vision.transforms.RandomFlipTopBottom())
 ```

@@ -66,7 +66,7 @@ apply(img, gdata.vision.transforms.RandomFlipTopBottom())

 下面代码里我们每次随机裁剪一片面积为原面积10%到100%的区域，其宽和高的比例在0.5和2之间，然后再将高宽缩放到200像素大小。

-```{.python .input  n=6}
+```{.python .input  n=27}
 shape_aug = gdata.vision.transforms.RandomResizedCrop(
    (200, 200), scale=(0.1, 1), ratio=(0.5, 2))
 apply(img, shape_aug)
@@ -76,19 +76,19 @@ apply(img, shape_aug)

 另一类增广方法是变化颜色。我们可以从四个维度改变图片的颜色：亮度、对比、饱和度和色相。在下面的例子里，我们将随机亮度改为原图的50%到150%。

-```{.python .input  n=7}
+```{.python .input  n=28}
 apply(img, gdata.vision.transforms.RandomBrightness(0.5))
 ```

 类似的，我们可以修改色相。

-```{.python .input  n=8}
+```{.python .input  n=29}
 apply(img, gdata.vision.transforms.RandomHue(0.5))
 ```

 或者用使用`RandomColorJitter`来一起使用。

-```{.python .input  n=9}
+```{.python .input  n=30}
 color_aug = gdata.vision.transforms.RandomColorJitter(
    brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5)
 apply(img, color_aug)
@@ -98,7 +98,7 @@ apply(img, color_aug)

 实际应用中我们会将多个增广叠加使用。`Compose`类可以将多个增广串联起来。

-```{.python .input  n=10}
+```{.python .input  n=31}
 augs = gdata.vision.transforms.Compose([
    gdata.vision.transforms.RandomFlipLeftRight(), color_aug, shape_aug])
 apply(img, augs)
@@ -108,13 +108,13 @@ apply(img, augs)

 接下来我们来看一个将图片增广应用在实际训练中的例子，并比较其与不使用时的区别。这里我们使用CIFAR-10数据集，而不是之前我们一直使用的Fashion-MNIST。原因在于Fashion-MNIST中物体位置和尺寸都已经归一化了，而CIFAR-10中物体颜色和大小区别更加显著。下面我们展示CIFAR-10中的前32张训练图片。

-```{.python .input  n=11}
+```{.python .input  n=32}
 show_images(gdata.vision.CIFAR10(train=True)[0:32][0], 4, 8, scale=0.8);
 ```

 我们通常将图片增广用在训练样本上，但是在预测的时候并不使用随机增广。这里我们仅仅使用最简单的随机水平翻转。此外，我们使用`ToTensor`变换来将图片转成MXNet需要的格式，即格式为（批量，通道，高，宽）以及类型为32位浮点数。

-```{.python .input  n=12}
+```{.python .input  n=33}
 train_augs = gdata.vision.transforms.Compose([
    gdata.vision.transforms.RandomFlipLeftRight(),
    gdata.vision.transforms.ToTensor(),
@@ -127,7 +127,7 @@ test_augs = gdata.vision.transforms.Compose([

 接下来我们定义一个辅助函数来方便读取图片并应用增广。Gluon的数据集提供`transform_first`函数来对数据里面的第一项（数据一般有图片和标签两项）来应用增广。另外图片增广将增加计算复杂度，我们使用两个额外CPU进程加来加速计算。

-```{.python .input  n=13}
+```{.python .input  n=34}
 def load_cifar10(is_train, augs, batch_size):
    return gdata.DataLoader(
        gdata.vision.CIFAR10(train=is_train).transform_first(augs),
@@ -140,7 +140,7 @@ def load_cifar10(is_train, augs, batch_size):

 首先，我们定义`try_all_gpus`函数，从而能够使用所有可用的GPU。

-```{.python .input  n=14}
+```{.python .input  n=35}
 def try_all_gpus():
    ctxes = []
    try:
@@ -157,7 +157,7 @@ def try_all_gpus():

 然后，我们定义`evaluate_accuracy`函数评价模型的分类准确率。与[“Softmax回归的从零开始实现”](../chapter_deep-learning-basics/softmax-regression-scratch.md)和[“卷积神经网络（LeNet）”](../chapter_convolutional-neural-networks/lenet.md)两节中描述的`evaluate_accuracy`函数不同，当`ctx`包含多个GPU时，这里定义的函数通过辅助函数`_get_batch`将小批量数据样本划分并复制到各个GPU上。

-```{.python .input  n=15}
+```{.python .input  n=36}
 def _get_batch(batch, ctx):
    features, labels = batch
    if labels.dtype != features.dtype:
@@ -184,7 +184,7 @@ def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):

 接下来，我们定义`train`函数使用多GPU训练并评价模型。

-```{.python .input  n=16}
+```{.python .input  n=37}
 def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
    print('training on', ctx)
    if isinstance(ctx, mx.Context):
@@ -215,7 +215,7 @@ def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):

 现在，我们可以定义函数使用图片增广来训练模型了。

-```{.python .input  n=17}
+```{.python .input  n=38}
 def train_with_data_aug(train_augs, test_augs, lr=0.01):
    batch_size = 256
    ctx = try_all_gpus()
@@ -231,13 +231,13 @@ def train_with_data_aug(train_augs, test_augs, lr=0.01):

 我们先观察使用了图片增广的结果。

-```{.python .input  n=18}
+```{.python .input  n=39}
 train_with_data_aug(train_augs, test_augs)
 ```

 作为对比，我们尝试只对训练数据做中间剪裁。

-```{.python .input  n=19}
+```{.python .input  n=40}
 train_with_data_aug(test_augs, test_augs)
 ```


--- a/gluonbook/utils.py
+++ b/gluonbook/utils.py
 import random
 import os
+import sys
 import tarfile
 from time import time

@@ -9,7 +10,6 @@ import mxnet as mx
 from mxnet import autograd, gluon, image, nd
 from mxnet.gluon import nn, data as gdata, loss as gloss, utils as gutils
 import numpy as np
-import sys


 voc_classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',