mxnet.py 80.6 KB
Newer Older
X
xiaotinghe 已提交
1 2
#################   WARNING   ################
# The below part is generated automatically through:
A
Aston Zhang 已提交
3 4 5 6
#    d2lbook build lib
# Don't edit it directly

import collections
A
Aston Zhang 已提交
7
import hashlib
A
Aston Zhang 已提交
8 9 10 11 12 13 14 15
import math
import os
import random
import re
import shutil
import sys
import tarfile
import time
X
xiaotinghe 已提交
16
import zipfile
A
Aston Zhang 已提交
17 18 19 20 21 22
from collections import defaultdict
import pandas as pd
import requests
from IPython import display
from matplotlib import pyplot as plt

A
Aston Zhang 已提交
23 24 25 26 27
d2l = sys.modules[__name__]

from mxnet import autograd, context, gluon, image, init, np, npx
from mxnet.gluon import nn, rnn

A
Aston Zhang 已提交
28
def use_svg_display():
X
xiaotinghe 已提交
29
    """使用svg格式在Jupyter中显示绘图。
A
Aston Zhang 已提交
30

X
xiaotinghe 已提交
31 32
    Defined in :numref:`sec_calculus`"""
    display.set_matplotlib_formats('svg')
A
Aston Zhang 已提交
33

A
Aston Zhang 已提交
34
def set_figsize(figsize=(3.5, 2.5)):
X
xiaotinghe 已提交
35 36 37
    """设置matplotlib的图表大小。

    Defined in :numref:`sec_calculus`"""
A
Aston Zhang 已提交
38 39 40 41
    use_svg_display()
    d2l.plt.rcParams['figure.figsize'] = figsize

def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
X
xiaotinghe 已提交
42 43 44
    """设置matplotlib的轴。

    Defined in :numref:`sec_calculus`"""
A
Aston Zhang 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid()

def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,
         ylim=None, xscale='linear', yscale='linear',
         fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):
X
xiaotinghe 已提交
58 59 60
    """绘制数据点。

    Defined in :numref:`sec_calculus`"""
A
Aston Zhang 已提交
61 62 63 64 65 66
    if legend is None:
        legend = []

    set_figsize(figsize)
    axes = axes if axes else d2l.plt.gca()

X
xiaotinghe 已提交
67
    # 如果 `X` 有一个轴,输出True
A
Aston Zhang 已提交
68
    def has_one_axis(X):
X
xiaotinghe 已提交
69 70
        return (hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list)
                and not hasattr(X[0], "__len__"))
A
Aston Zhang 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87

    if has_one_axis(X):
        X = [X]
    if Y is None:
        X, Y = [[]] * len(X), X
    elif has_one_axis(Y):
        Y = [Y]
    if len(X) != len(Y):
        X = X * len(Y)
    axes.cla()
    for x, y, fmt in zip(X, Y, fmts):
        if len(x):
            axes.plot(x, y, fmt)
        else:
            axes.plot(y, fmt)
    set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)

A
Aston Zhang 已提交
88
class Timer:
X
xiaotinghe 已提交
89
    """记录多次运行时间。"""
A
Aston Zhang 已提交
90
    def __init__(self):
X
xiaotinghe 已提交
91
        """Defined in :numref:`subsec_linear_model`"""
A
Aston Zhang 已提交
92 93 94 95
        self.times = []
        self.start()

    def start(self):
X
xiaotinghe 已提交
96
        """启动计时器。"""
A
Aston Zhang 已提交
97 98 99
        self.tik = time.time()

    def stop(self):
X
xiaotinghe 已提交
100
        """停止计时器并将时间记录在列表中。"""
A
Aston Zhang 已提交
101 102 103 104
        self.times.append(time.time() - self.tik)
        return self.times[-1]

    def avg(self):
X
xiaotinghe 已提交
105
        """返回平均时间。"""
A
Aston Zhang 已提交
106 107 108
        return sum(self.times) / len(self.times)

    def sum(self):
X
xiaotinghe 已提交
109
        """返回时间总和。"""
A
Aston Zhang 已提交
110 111 112
        return sum(self.times)

    def cumsum(self):
X
xiaotinghe 已提交
113
        """返回累计时间。"""
A
Aston Zhang 已提交
114 115
        return np.array(self.times).cumsum().tolist()

A
Aston Zhang 已提交
116
def synthetic_data(w, b, num_examples):
X
xiaotinghe 已提交
117 118 119
    """生成 y = Xw + b + 噪声。

    Defined in :numref:`sec_linear_scratch`"""
A
Aston Zhang 已提交
120 121 122 123 124
    X = d2l.normal(0, 1, (num_examples, len(w)))
    y = d2l.matmul(X, w) + b
    y += d2l.normal(0, 0.01, y.shape)
    return X, d2l.reshape(y, (-1, 1))

A
Aston Zhang 已提交
125
def linreg(X, w, b):
X
xiaotinghe 已提交
126
    """线性回归模型。
A
Aston Zhang 已提交
127

X
xiaotinghe 已提交
128 129
    Defined in :numref:`sec_linear_scratch`"""
    return d2l.matmul(X, w) + b
A
Aston Zhang 已提交
130

A
Aston Zhang 已提交
131
def squared_loss(y_hat, y):
X
xiaotinghe 已提交
132
    """均方损失。
A
Aston Zhang 已提交
133

X
xiaotinghe 已提交
134 135
    Defined in :numref:`sec_linear_scratch`"""
    return (y_hat - d2l.reshape(y, y_hat.shape)) ** 2 / 2
A
Aston Zhang 已提交
136

A
Aston Zhang 已提交
137
def sgd(params, lr, batch_size):
X
xiaotinghe 已提交
138 139 140
    """小批量随机梯度下降。

    Defined in :numref:`sec_linear_scratch`"""
A
Aston Zhang 已提交
141 142 143
    for param in params:
        param[:] = param - lr * param.grad / batch_size

A
Aston Zhang 已提交
144
def load_array(data_arrays, batch_size, is_train=True):
X
xiaotinghe 已提交
145 146 147
    """构造一个Gluon数据迭代器。

    Defined in :numref:`sec_linear_concise`"""
A
Aston Zhang 已提交
148 149 150
    dataset = gluon.data.ArrayDataset(*data_arrays)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

A
Aston Zhang 已提交
151
def get_fashion_mnist_labels(labels):
X
xiaotinghe 已提交
152
    """返回Fashion-MNIST数据集的文本标签。
A
Aston Zhang 已提交
153

X
xiaotinghe 已提交
154 155 156 157
    Defined in :numref:`sec_fashion_mnist`"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]
A
Aston Zhang 已提交
158

A
Aston Zhang 已提交
159
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
X
xiaotinghe 已提交
160 161 162
    """绘制图像列表。

    Defined in :numref:`sec_fashion_mnist`"""
A
Aston Zhang 已提交
163 164 165 166 167 168 169 170 171 172 173
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        ax.imshow(d2l.numpy(img))
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes

A
Aston Zhang 已提交
174
def get_dataloader_workers():
X
xiaotinghe 已提交
175
    """在非Windows的平台上,使用4个进程来读取数据。
A
Aston Zhang 已提交
176

X
xiaotinghe 已提交
177 178
    Defined in :numref:`sec_fashion_mnist`"""
    return 0 if sys.platform.startswith('win') else 4
A
Aston Zhang 已提交
179

A
Aston Zhang 已提交
180
def load_data_fashion_mnist(batch_size, resize=None):
X
xiaotinghe 已提交
181 182 183
    """下载Fashion-MNIST数据集,然后将其加载到内存中。

    Defined in :numref:`sec_fashion_mnist`"""
A
Aston Zhang 已提交
184 185 186 187 188 189 190 191 192 193 194 195
    dataset = gluon.data.vision
    trans = [dataset.transforms.ToTensor()]
    if resize:
        trans.insert(0, dataset.transforms.Resize(resize))
    trans = dataset.transforms.Compose(trans)
    mnist_train = dataset.FashionMNIST(train=True).transform_first(trans)
    mnist_test = dataset.FashionMNIST(train=False).transform_first(trans)
    return (gluon.data.DataLoader(mnist_train, batch_size, shuffle=True,
                                  num_workers=get_dataloader_workers()),
            gluon.data.DataLoader(mnist_test, batch_size, shuffle=False,
                                  num_workers=get_dataloader_workers()))

A
Aston Zhang 已提交
196
def accuracy(y_hat, y):
X
xiaotinghe 已提交
197 198 199
    """计算预测正确的数量。

    Defined in :numref:`sec_softmax_scratch`"""
A
Aston Zhang 已提交
200
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
A
Aston Zhang 已提交
201
        y_hat = d2l.argmax(y_hat, axis=1)
A
Aston Zhang 已提交
202 203 204
    cmp = d2l.astype(y_hat, y.dtype) == y
    return float(d2l.reduce_sum(d2l.astype(cmp, y.dtype)))

A
Aston Zhang 已提交
205
def evaluate_accuracy(net, data_iter):
X
xiaotinghe 已提交
206 207 208 209
    """计算在指定数据集上模型的精度。

    Defined in :numref:`sec_softmax_scratch`"""
    metric = Accumulator(2)  # 正确预测数、预测总数
X
xiaotinghe 已提交
210
    for X, y in data_iter:
A
Aston Zhang 已提交
211 212 213
        metric.add(accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]

A
Aston Zhang 已提交
214
class Accumulator:
X
xiaotinghe 已提交
215
    """在`n`个变量上累加。"""
A
Aston Zhang 已提交
216
    def __init__(self, n):
X
xiaotinghe 已提交
217
        """Defined in :numref:`sec_softmax_scratch`"""
A
Aston Zhang 已提交
218 219 220 221 222 223 224 225 226 227 228
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

A
Aston Zhang 已提交
229
def train_epoch_ch3(net, train_iter, loss, updater):
X
xiaotinghe 已提交
230 231 232 233
    """训练模型一个迭代周期(定义见第3章)。

    Defined in :numref:`sec_softmax_scratch`"""
    # 训练损失总和、训练准确度总和、样本数
A
Aston Zhang 已提交
234 235 236 237
    metric = Accumulator(3)
    if isinstance(updater, gluon.Trainer):
        updater = updater.step
    for X, y in train_iter:
X
xiaotinghe 已提交
238
        # 计算梯度并更新参数
A
Aston Zhang 已提交
239 240 241 242 243 244
        with autograd.record():
            y_hat = net(X)
            l = loss(y_hat, y)
        l.backward()
        updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.size)
X
xiaotinghe 已提交
245
    # 返回训练损失和训练精度
A
Aston Zhang 已提交
246 247
    return metric[0] / metric[2], metric[1] / metric[2]

A
Aston Zhang 已提交
248
class Animator:
X
xiaotinghe 已提交
249
    """在动画中绘制数据。"""
A
Aston Zhang 已提交
250 251 252 253
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5)):
X
xiaotinghe 已提交
254 255
        """Defined in :numref:`sec_softmax_scratch`"""
        # 增量地绘制多条线
A
Aston Zhang 已提交
256 257 258 259 260
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
X
xiaotinghe 已提交
261 262 263 264
            self.axes = [self.axes, ]
        # 使用lambda函数捕获参数
        self.config_axes = lambda: d2l.set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
A
Aston Zhang 已提交
265 266 267
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
X
xiaotinghe 已提交
268
        # 向图表中添加多个数据点
A
Aston Zhang 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)

A
Aston Zhang 已提交
289
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
X
xiaotinghe 已提交
290 291 292
    """训练模型(定义见第3章)。

    Defined in :numref:`sec_softmax_scratch`"""
A
Aston Zhang 已提交
293 294 295 296 297 298 299 300 301 302 303
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

A
Aston Zhang 已提交
304
def predict_ch3(net, test_iter, n=6):
X
xiaotinghe 已提交
305 306 307
    """预测标签(定义见第3章)。

    Defined in :numref:`sec_softmax_scratch`"""
A
Aston Zhang 已提交
308 309 310 311
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(d2l.argmax(net(X), axis=1))
X
xiaotinghe 已提交
312 313 314
    titles = [true +'\n' + pred for true, pred in zip(trues, preds)]
    d2l.show_images(
        d2l.reshape(X[0:n], (n, 28, 28)), 1, n, titles=titles[0:n])
A
Aston Zhang 已提交
315

A
Aston Zhang 已提交
316
def evaluate_loss(net, data_iter, loss):
X
xiaotinghe 已提交
317 318 319 320
    """评估给定数据集上模型的损失。

    Defined in :numref:`sec_model_selection`"""
    metric = d2l.Accumulator(2)  # 损失的总和, 样本数量
A
Aston Zhang 已提交
321 322 323 324 325
    for X, y in data_iter:
        l = loss(net(X), y)
        metric.add(d2l.reduce_sum(l), d2l.size(l))
    return metric[0] / metric[1]

A
Aston Zhang 已提交
326 327
DATA_HUB = dict()
DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/'
A
Aston Zhang 已提交
328

A
Aston Zhang 已提交
329
def download(name, cache_dir=os.path.join('..', 'data')):
X
xiaotinghe 已提交
330 331 332 333
    """下载一个DATA_HUB中的文件,返回本地文件名。

    Defined in :numref:`sec_kaggle_house`"""
    assert name in DATA_HUB, f"{name} 不存在于 {DATA_HUB}."
A
Aston Zhang 已提交
334
    url, sha1_hash = DATA_HUB[name]
X
xiaotinghe 已提交
335
    os.makedirs(cache_dir, exist_ok=True)
A
Aston Zhang 已提交
336 337 338 339 340 341 342 343 344 345 346
    fname = os.path.join(cache_dir, url.split('/')[-1])
    if os.path.exists(fname):
        sha1 = hashlib.sha1()
        with open(fname, 'rb') as f:
            while True:
                data = f.read(1048576)
                if not data:
                    break
                sha1.update(data)
        if sha1.hexdigest() == sha1_hash:
            return fname  # Hit cache
X
xiaotinghe 已提交
347
    print(f'正在从{url}下载{fname}...')
A
Aston Zhang 已提交
348 349 350 351 352
    r = requests.get(url, stream=True, verify=True)
    with open(fname, 'wb') as f:
        f.write(r.content)
    return fname

A
Aston Zhang 已提交
353
def download_extract(name, folder=None):
X
xiaotinghe 已提交
354 355 356
    """下载并解压zip/tar文件。

    Defined in :numref:`sec_kaggle_house`"""
A
Aston Zhang 已提交
357 358 359 360 361 362 363 364
    fname = download(name)
    base_dir = os.path.dirname(fname)
    data_dir, ext = os.path.splitext(fname)
    if ext == '.zip':
        fp = zipfile.ZipFile(fname, 'r')
    elif ext in ('.tar', '.gz'):
        fp = tarfile.open(fname, 'r')
    else:
X
xiaotinghe 已提交
365
        assert False, '只有zip/tar文件可以被解压缩。'
A
Aston Zhang 已提交
366 367 368
    fp.extractall(base_dir)
    return os.path.join(base_dir, folder) if folder else data_dir

A
Aston Zhang 已提交
369
def download_all():
X
xiaotinghe 已提交
370 371 372
    """下载DATA_HUB中的所有文件。

    Defined in :numref:`sec_kaggle_house`"""
A
Aston Zhang 已提交
373 374 375
    for name in DATA_HUB:
        download(name)

X
xiaotinghe 已提交
376 377 378
DATA_HUB['kaggle_house_train'] = (
    DATA_URL + 'kaggle_house_pred_train.csv',
    '585e9cc93e70b39160e7921475f9bcd7d31219ce')
A
Aston Zhang 已提交
379

X
xiaotinghe 已提交
380 381 382
DATA_HUB['kaggle_house_test'] = (
    DATA_URL + 'kaggle_house_pred_test.csv',
    'fa19780a7b011d9b009e8bff8e99922a8ee2eb90')
A
Aston Zhang 已提交
383

A
Aston Zhang 已提交
384
def try_gpu(i=0):
X
xiaotinghe 已提交
385
    """如果存在,则返回gpu(i),否则返回cpu()。
A
Aston Zhang 已提交
386

X
xiaotinghe 已提交
387 388
    Defined in :numref:`sec_use_gpu`"""
    return npx.gpu(i) if npx.num_gpus() >= i + 1 else npx.cpu()
A
Aston Zhang 已提交
389

A
Aston Zhang 已提交
390
def try_all_gpus():
X
xiaotinghe 已提交
391 392 393
    """返回所有可用的GPU,如果没有GPU,则返回[cpu()]。

    Defined in :numref:`sec_use_gpu`"""
A
Aston Zhang 已提交
394 395 396
    devices = [npx.gpu(i) for i in range(npx.num_gpus())]
    return devices if devices else [npx.cpu()]

A
Aston Zhang 已提交
397
def corr2d(X, K):
X
xiaotinghe 已提交
398 399 400
    """计算二维互相关运算。

    Defined in :numref:`sec_conv_layer`"""
A
Aston Zhang 已提交
401 402 403 404
    h, w = K.shape
    Y = d2l.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
X
xiaotinghe 已提交
405
            Y[i, j] = d2l.reduce_sum((X[i: i + h, j: j + w] * K))
A
Aston Zhang 已提交
406 407
    return Y

A
Aston Zhang 已提交
408
def evaluate_accuracy_gpu(net, data_iter, device=None):
X
xiaotinghe 已提交
409 410 411 412
    """使用GPU计算模型在数据集上的精度。

    Defined in :numref:`sec_lenet`"""
    if not device:  # 查询第一个参数所在的第一个设备
A
Aston Zhang 已提交
413
        device = list(net.collect_params().values())[0].list_ctx()[0]
X
xiaotinghe 已提交
414
    metric = d2l.Accumulator(2)  # 正确预测的数量,总预测的数量
A
Aston Zhang 已提交
415 416 417
    for X, y in data_iter:
        X, y = X.as_in_ctx(device), y.as_in_ctx(device)
        metric.add(d2l.accuracy(net(X), y), d2l.size(y))
A
Aston Zhang 已提交
418
    return metric[0] / metric[1]
A
Aston Zhang 已提交
419

A
Aston Zhang 已提交
420
def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):
X
xiaotinghe 已提交
421 422 423
    """用GPU训练模型(在第六章定义)。

    Defined in :numref:`sec_lenet`"""
A
Aston Zhang 已提交
424 425
    net.initialize(force_reinit=True, ctx=device, init=init.Xavier())
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
X
xiaotinghe 已提交
426 427
    trainer = gluon.Trainer(net.collect_params(),
                            'sgd', {'learning_rate': lr})
A
Aston Zhang 已提交
428
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
A
Aston Zhang 已提交
429
                            legend=['train loss', 'train acc', 'test acc'])
A
Aston Zhang 已提交
430
    timer, num_batches = d2l.Timer(), len(train_iter)
A
Aston Zhang 已提交
431
    for epoch in range(num_epochs):
X
xiaotinghe 已提交
432
        metric = d2l.Accumulator(3)  # 训练损失之和,训练精度之和,范例数
A
Aston Zhang 已提交
433 434
        for i, (X, y) in enumerate(train_iter):
            timer.start()
X
xiaotinghe 已提交
435
            # 下面是与“d2l.train_epoch_ch3”的主要不同
A
Aston Zhang 已提交
436 437 438 439 440 441 442 443
            X, y = X.as_in_ctx(device), y.as_in_ctx(device)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)
            l.backward()
            trainer.step(X.shape[0])
            metric.add(l.sum(), d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
A
Aston Zhang 已提交
444
            train_l = metric[0] / metric[2]
A
Aston Zhang 已提交
445
            train_acc = metric[1] / metric[2]
A
Aston Zhang 已提交
446 447 448
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
                animator.add(epoch + (i + 1) / num_batches,
                             (train_l, train_acc, None))
A
Aston Zhang 已提交
449 450
        test_acc = evaluate_accuracy_gpu(net, test_iter)
        animator.add(epoch + 1, (None, None, test_acc))
A
Aston Zhang 已提交
451
    print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
A
Aston Zhang 已提交
452 453 454 455
          f'test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')

A
Aston Zhang 已提交
456
class Residual(nn.Block):
A
Aston Zhang 已提交
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1,
                               strides=strides)
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2D(num_channels, kernel_size=1,
                                   strides=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()

    def forward(self, X):
        Y = npx.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return npx.relu(Y + X)

d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL + 'timemachine.txt',
                                '090b5e7e70c295757f55df93cb0a180b9691891a')

A
Aston Zhang 已提交
480
def read_time_machine():
X
xiaotinghe 已提交
481 482 483
    """Load the time machine dataset into a list of text lines.

    Defined in :numref:`sec_text_preprocessing`"""
A
Aston Zhang 已提交
484 485
    with open(d2l.download('time_machine'), 'r') as f:
        lines = f.readlines()
A
Aston Zhang 已提交
486
    return [re.sub('[^A-Za-z]+', ' ', line).strip().lower() for line in lines]
A
Aston Zhang 已提交
487

A
Aston Zhang 已提交
488
def tokenize(lines, token='word'):
X
xiaotinghe 已提交
489 490 491
    """将文本行拆分为单词或字符词元。

    Defined in :numref:`sec_text_preprocessing`"""
A
Aston Zhang 已提交
492
    if token == 'word':
A
Aston Zhang 已提交
493
        return [line.split() for line in lines]
A
Aston Zhang 已提交
494 495 496
    elif token == 'char':
        return [list(line) for line in lines]
    else:
X
xiaotinghe 已提交
497
        print('错误:未知词元类型:' + token)
A
Aston Zhang 已提交
498

A
Aston Zhang 已提交
499
class Vocab:
X
xiaotinghe 已提交
500
    """文本词表"""
A
Aston Zhang 已提交
501
    def __init__(self, tokens=None, min_freq=0, reserved_tokens=None):
X
xiaotinghe 已提交
502
        """Defined in :numref:`sec_text_preprocessing`"""
A
Aston Zhang 已提交
503 504
        if tokens is None:
            tokens = []
A
Aston Zhang 已提交
505
        if reserved_tokens is None:
A
Aston Zhang 已提交
506
            reserved_tokens = []
X
xiaotinghe 已提交
507
        # 按出现频率排序
A
Aston Zhang 已提交
508
        counter = count_corpus(tokens)
X
xiaotinghe 已提交
509 510 511 512 513 514 515 516 517 518
        self.token_freqs = sorted(counter.items(), key=lambda x: x[1],
                                  reverse=True)
        # 未知词元的索引为0
        self.unk, uniq_tokens = 0, ['<unk>'] + reserved_tokens
        uniq_tokens += [token for token, freq in self.token_freqs
                        if freq >= min_freq and token not in uniq_tokens]
        self.idx_to_token, self.token_to_idx = [], dict()
        for token in uniq_tokens:
            self.idx_to_token.append(token)
            self.token_to_idx[token] = len(self.idx_to_token) - 1
A
Aston Zhang 已提交
519 520 521 522 523 524 525 526 527 528 529 530 531 532

    def __len__(self):
        return len(self.idx_to_token)

    def __getitem__(self, tokens):
        if not isinstance(tokens, (list, tuple)):
            return self.token_to_idx.get(tokens, self.unk)
        return [self.__getitem__(token) for token in tokens]

    def to_tokens(self, indices):
        if not isinstance(indices, (list, tuple)):
            return self.idx_to_token[indices]
        return [self.idx_to_token[index] for index in indices]

A
Aston Zhang 已提交
533
def count_corpus(tokens):
X
xiaotinghe 已提交
534 535 536 537
    """统计词元的频率。

    Defined in :numref:`sec_text_preprocessing`"""
    # 这里的 `tokens` 是 1D 列表或 2D 列表
A
Aston Zhang 已提交
538
    if len(tokens) == 0 or isinstance(tokens[0], list):
X
xiaotinghe 已提交
539
        # 将词元列表展平成使用词元填充的一个列表
A
Aston Zhang 已提交
540
        tokens = [token for line in tokens for token in line]
A
Aston Zhang 已提交
541 542
    return collections.Counter(tokens)

A
Aston Zhang 已提交
543
def load_corpus_time_machine(max_tokens=-1):
X
xiaotinghe 已提交
544
    """返回时光机器数据集的词元索引列表和词表。
X
xiaotinghe 已提交
545 546

    Defined in :numref:`sec_text_preprocessing`"""
A
Aston Zhang 已提交
547 548 549
    lines = read_time_machine()
    tokens = tokenize(lines, 'char')
    vocab = Vocab(tokens)
X
xiaotinghe 已提交
550 551
    # 因为时光机器数据集中的每个文本行不一定是一个句子或一个段落,
    # 所以将所有文本行展平到一个列表中
A
Aston Zhang 已提交
552
    corpus = [vocab[token] for line in tokens for token in line]
A
Aston Zhang 已提交
553 554 555 556
    if max_tokens > 0:
        corpus = corpus[:max_tokens]
    return corpus, vocab

A
Aston Zhang 已提交
557
def seq_data_iter_random(corpus, batch_size, num_steps):
X
xiaotinghe 已提交
558 559 560 561
    """使用随机抽样生成一个小批量子序列。

    Defined in :numref:`sec_language_model`"""
    # 从随机偏移量开始对序列进行分区,随机范围包括`num_steps - 1`
M
Mu Li 已提交
562
    corpus = corpus[random.randint(0, num_steps - 1):]
X
xiaotinghe 已提交
563
    # 减去1,是因为我们需要考虑标签
A
Aston Zhang 已提交
564
    num_subseqs = (len(corpus) - 1) // num_steps
X
xiaotinghe 已提交
565
    # 长度为`num_steps`的子序列的起始索引
A
Aston Zhang 已提交
566
    initial_indices = list(range(0, num_subseqs * num_steps, num_steps))
X
xiaotinghe 已提交
567 568
    # 在随机抽样的迭代过程中,
    # 来自两个相邻的、随机的、小批量中的子序列不一定在原始序列上相邻
A
Aston Zhang 已提交
569
    random.shuffle(initial_indices)
A
Aston Zhang 已提交
570 571

    def data(pos):
X
xiaotinghe 已提交
572 573
        # 返回从`pos`位置开始的长度为`num_steps`的序列
        return corpus[pos: pos + num_steps]
A
Aston Zhang 已提交
574

M
Mu Li 已提交
575 576
    num_batches = num_subseqs // batch_size
    for i in range(0, batch_size * num_batches, batch_size):
X
xiaotinghe 已提交
577 578
        # 在这里,`initial_indices`包含子序列的随机起始索引
        initial_indices_per_batch = initial_indices[i: i + batch_size]
A
Aston Zhang 已提交
579 580
        X = [data(j) for j in initial_indices_per_batch]
        Y = [data(j + 1) for j in initial_indices_per_batch]
A
Aston Zhang 已提交
581 582
        yield d2l.tensor(X), d2l.tensor(Y)

A
Aston Zhang 已提交
583
def seq_data_iter_sequential(corpus, batch_size, num_steps):
X
xiaotinghe 已提交
584 585 586 587
    """使用顺序分区生成一个小批量子序列。

    Defined in :numref:`sec_language_model`"""
    # 从随机偏移量开始划分序列
A
Aston Zhang 已提交
588
    offset = random.randint(0, num_steps)
A
Aston Zhang 已提交
589
    num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
X
xiaotinghe 已提交
590 591
    Xs = d2l.tensor(corpus[offset: offset + num_tokens])
    Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
A
Aston Zhang 已提交
592 593
    Xs, Ys = Xs.reshape(batch_size, -1), Ys.reshape(batch_size, -1)
    num_batches = Xs.shape[1] // num_steps
M
Mu Li 已提交
594
    for i in range(0, num_steps * num_batches, num_steps):
X
xiaotinghe 已提交
595 596
        X = Xs[:, i: i + num_steps]
        Y = Ys[:, i: i + num_steps]
A
Aston Zhang 已提交
597 598
        yield X, Y

A
Aston Zhang 已提交
599
class SeqDataLoader:
X
xiaotinghe 已提交
600
    """加载序列数据的迭代器。"""
A
Aston Zhang 已提交
601
    def __init__(self, batch_size, num_steps, use_random_iter, max_tokens):
X
xiaotinghe 已提交
602
        """Defined in :numref:`sec_language_model`"""
A
Aston Zhang 已提交
603 604 605
        if use_random_iter:
            self.data_iter_fn = d2l.seq_data_iter_random
        else:
A
Aston Zhang 已提交
606
            self.data_iter_fn = d2l.seq_data_iter_sequential
A
Aston Zhang 已提交
607 608 609 610 611 612
        self.corpus, self.vocab = d2l.load_corpus_time_machine(max_tokens)
        self.batch_size, self.num_steps = batch_size, num_steps

    def __iter__(self):
        return self.data_iter_fn(self.corpus, self.batch_size, self.num_steps)

X
xiaotinghe 已提交
613 614
def load_data_time_machine(batch_size, num_steps,
                           use_random_iter=False, max_tokens=10000):
X
xiaotinghe 已提交
615
    """返回时光机器数据集的迭代器和词表。
A
Aston Zhang 已提交
616

X
xiaotinghe 已提交
617 618 619
    Defined in :numref:`sec_language_model`"""
    data_iter = SeqDataLoader(
        batch_size, num_steps, use_random_iter, max_tokens)
A
Aston Zhang 已提交
620 621
    return data_iter, data_iter.vocab

A
Aston Zhang 已提交
622
class RNNModelScratch:
X
xiaotinghe 已提交
623
    """从零开始实现的循环神经网络模型"""
A
Aston Zhang 已提交
624 625
    def __init__(self, vocab_size, num_hiddens, device, get_params,
                 init_state, forward_fn):
X
xiaotinghe 已提交
626
        """Defined in :numref:`sec_rnn_scratch`"""
A
Aston Zhang 已提交
627 628
        self.vocab_size, self.num_hiddens = vocab_size, num_hiddens
        self.params = get_params(vocab_size, num_hiddens, device)
A
Aston Zhang 已提交
629
        self.init_state, self.forward_fn = init_state, forward_fn
A
Aston Zhang 已提交
630 631 632 633 634 635 636 637

    def __call__(self, X, state):
        X = npx.one_hot(X.T, self.vocab_size)
        return self.forward_fn(X, state, self.params)

    def begin_state(self, batch_size, ctx):
        return self.init_state(batch_size, self.num_hiddens, ctx)

M
Mu Li 已提交
638
def predict_ch8(prefix, num_preds, net, vocab, device):
X
xiaotinghe 已提交
639 640 641
    """在`prefix`后面生成新字符。

    Defined in :numref:`sec_rnn_scratch`"""
M
Mu Li 已提交
642
    state = net.begin_state(batch_size=1, ctx=device)
A
Aston Zhang 已提交
643
    outputs = [vocab[prefix[0]]]
X
xiaotinghe 已提交
644 645 646
    get_input = lambda: d2l.reshape(
        d2l.tensor([outputs[-1]], ctx=device), (1, 1))
    for y in prefix[1:]:  # 预热期
M
Mu Li 已提交
647
        _, state = net(get_input(), state)
A
Aston Zhang 已提交
648
        outputs.append(vocab[y])
X
xiaotinghe 已提交
649
    for _ in range(num_preds):  # 预测`num_preds`步
M
Mu Li 已提交
650
        y, state = net(get_input(), state)
A
Aston Zhang 已提交
651
        outputs.append(int(y.argmax(axis=1).reshape(1)))
A
Aston Zhang 已提交
652 653
    return ''.join([vocab.idx_to_token[i] for i in outputs])

M
Mu Li 已提交
654
def grad_clipping(net, theta):
X
xiaotinghe 已提交
655 656 657
    """裁剪梯度。

    Defined in :numref:`sec_rnn_scratch`"""
M
Mu Li 已提交
658 659
    if isinstance(net, gluon.Block):
        params = [p.data() for p in net.collect_params().values()]
A
Aston Zhang 已提交
660
    else:
M
Mu Li 已提交
661
        params = net.params
X
xiaotinghe 已提交
662
    norm = math.sqrt(sum((p.grad ** 2).sum() for p in params))
A
Aston Zhang 已提交
663 664 665 666
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

M
Mu Li 已提交
667
def train_epoch_ch8(net, train_iter, loss, updater, device, use_random_iter):
X
xiaotinghe 已提交
668 669 670
    """训练模型一个迭代周期(定义见第8章)。

    Defined in :numref:`sec_rnn_scratch`"""
A
Aston Zhang 已提交
671
    state, timer = None, d2l.Timer()
X
xiaotinghe 已提交
672
    metric = d2l.Accumulator(2)  # 训练损失之和, 词元数量
A
Aston Zhang 已提交
673 674
    for X, Y in train_iter:
        if state is None or use_random_iter:
X
xiaotinghe 已提交
675
            # 在第一次迭代或使用随机抽样时初始化`state`
M
Mu Li 已提交
676
            state = net.begin_state(batch_size=X.shape[0], ctx=device)
A
Aston Zhang 已提交
677 678 679 680 681 682
        else:
            for s in state:
                s.detach()
        y = Y.T.reshape(-1)
        X, y = X.as_in_ctx(device), y.as_in_ctx(device)
        with autograd.record():
M
Mu Li 已提交
683
            y_hat, state = net(X, state)
A
Aston Zhang 已提交
684
            l = loss(y_hat, y).mean()
A
Aston Zhang 已提交
685
        l.backward()
M
Mu Li 已提交
686
        grad_clipping(net, 1)
X
xiaotinghe 已提交
687
        updater(batch_size=1)  # 因为已经调用了`mean`函数
A
Aston Zhang 已提交
688
        metric.add(l * d2l.size(y), d2l.size(y))
A
Aston Zhang 已提交
689
    return math.exp(metric[0] / metric[1]), metric[1] / timer.stop()
A
Aston Zhang 已提交
690

M
Mu Li 已提交
691
def train_ch8(net, train_iter, vocab, lr, num_epochs, device,
A
Aston Zhang 已提交
692
              use_random_iter=False):
X
xiaotinghe 已提交
693 694 695
    """训练模型(定义见第8章)。

    Defined in :numref:`sec_rnn_scratch`"""
A
Aston Zhang 已提交
696 697
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    animator = d2l.Animator(xlabel='epoch', ylabel='perplexity',
A
Aston Zhang 已提交
698
                            legend=['train'], xlim=[10, num_epochs])
X
xiaotinghe 已提交
699
    # 初始化
M
Mu Li 已提交
700
    if isinstance(net, gluon.Block):
X
xiaotinghe 已提交
701 702 703 704
        net.initialize(ctx=device, force_reinit=True,
                         init=init.Normal(0.01))
        trainer = gluon.Trainer(net.collect_params(),
                                'sgd', {'learning_rate': lr})
A
Aston Zhang 已提交
705 706
        updater = lambda batch_size: trainer.step(batch_size)
    else:
M
Mu Li 已提交
707 708
        updater = lambda batch_size: d2l.sgd(net.params, lr, batch_size)
    predict = lambda prefix: predict_ch8(prefix, 50, net, vocab, device)
X
xiaotinghe 已提交
709
    # 训练和预测
A
Aston Zhang 已提交
710
    for epoch in range(num_epochs):
X
xiaotinghe 已提交
711 712
        ppl, speed = train_epoch_ch8(
            net, train_iter, loss, updater, device, use_random_iter)
A
Aston Zhang 已提交
713
        if (epoch + 1) % 10 == 0:
A
Aston Zhang 已提交
714
            animator.add(epoch + 1, [ppl])
X
xiaotinghe 已提交
715
    print(f'困惑度 {ppl:.1f}, {speed:.1f} 词元/秒 {str(device)}')
A
Aston Zhang 已提交
716 717 718 719
    print(predict('time traveller'))
    print(predict('traveller'))

class RNNModel(nn.Block):
X
xiaotinghe 已提交
720 721 722
    """循环神经网络模型。

    Defined in :numref:`sec_rnn-concise`"""
A
Aston Zhang 已提交
723 724 725 726 727 728 729 730 731
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.dense = nn.Dense(vocab_size)

    def forward(self, inputs, state):
        X = npx.one_hot(inputs.T, self.vocab_size)
        Y, state = self.rnn(X, state)
X
xiaotinghe 已提交
732 733
        # 全连接层首先将`Y`的形状改为(`时间步数`*`批量大小`, `隐藏单元数`)。
        # 它的输出形状是 (`时间步数`*`批量大小`, `词表大小`)。
A
Aston Zhang 已提交
734 735 736 737 738 739
        output = self.dense(Y.reshape(-1, Y.shape[-1]))
        return output, state

    def begin_state(self, *args, **kwargs):
        return self.rnn.begin_state(*args, **kwargs)

A
Aston Zhang 已提交
740 741 742 743 744
def annotate(text, xy, xytext):
    d2l.plt.gca().annotate(text, xy=xy, xytext=xytext,
                           arrowprops=dict(arrowstyle='->'))

def train_2d(trainer, steps=20, f_grad=None):
X
xiaotinghe 已提交
745 746 747 748
    """用定制的训练机优化2D目标函数。

    Defined in :numref:`subsec_gd-learningrate`"""
    # `s1` 和 `s2` 是稍后将使用的内部状态变量
A
Aston Zhang 已提交
749 750 751 752 753 754 755 756 757 758 759 760
    x1, x2, s1, s2 = -5, -2, 0, 0
    results = [(x1, x2)]
    for i in range(steps):
        if f_grad:
            x1, x2, s1, s2 = trainer(x1, x2, s1, s2, f_grad)
        else:
            x1, x2, s1, s2 = trainer(x1, x2, s1, s2)
        results.append((x1, x2))
    print(f'epoch {i + 1}, x1: {float(x1):f}, x2: {float(x2):f}')
    return results

def show_trace_2d(f, results):
X
xiaotinghe 已提交
761 762 763
    """显示优化过程中2D变量的轨迹。

    Defined in :numref:`subsec_gd-learningrate`"""
A
Aston Zhang 已提交
764 765 766 767 768 769 770 771 772 773 774 775
    d2l.set_figsize()
    d2l.plt.plot(*zip(*results), '-o', color='#ff7f0e')
    x1, x2 = d2l.meshgrid(d2l.arange(-5.5, 1.0, 0.1),
                          d2l.arange(-3.0, 1.0, 0.1))
    d2l.plt.contour(x1, x2, f(x1, x2), colors='#1f77b4')
    d2l.plt.xlabel('x1')
    d2l.plt.ylabel('x2')

d2l.DATA_HUB['airfoil'] = (d2l.DATA_URL + 'airfoil_self_noise.dat',
                           '76e5be1548fd8222e5074cf0faae75edff8cf93f')

def get_data_ch11(batch_size=10, n=1500):
X
xiaotinghe 已提交
776 777 778
    """Defined in :numref:`sec_minibatches`"""
    data = np.genfromtxt(d2l.download('airfoil'),
                         dtype=np.float32, delimiter='\t')
A
Aston Zhang 已提交
779
    data = (data - data.mean(axis=0)) / data.std(axis=0)
X
xiaotinghe 已提交
780 781 782 783 784 785 786 787
    data_iter = d2l.load_array(
        (data[:n, :-1], data[:n, -1]), batch_size, is_train=True)
    return data_iter, data.shape[1]-1

def train_ch11(trainer_fn, states, hyperparams, data_iter,
               feature_dim, num_epochs=2):
    """Defined in :numref:`sec_minibatches`"""
    # 初始化模型
A
Aston Zhang 已提交
788 789 790 791 792
    w = np.random.normal(scale=0.01, size=(feature_dim, 1))
    b = np.zeros(1)
    w.attach_grad()
    b.attach_grad()
    net, loss = lambda X: d2l.linreg(X, w, b), d2l.squared_loss
X
xiaotinghe 已提交
793
    # 训练模型
A
Aston Zhang 已提交
794 795 796 797 798 799 800 801 802 803 804 805
    animator = d2l.Animator(xlabel='epoch', ylabel='loss',
                            xlim=[0, num_epochs], ylim=[0.22, 0.35])
    n, timer = 0, d2l.Timer()
    for _ in range(num_epochs):
        for X, y in data_iter:
            with autograd.record():
                l = loss(net(X), y).mean()
            l.backward()
            trainer_fn([w, b], states, hyperparams)
            n += X.shape[0]
            if n % 200 == 0:
                timer.stop()
X
xiaotinghe 已提交
806
                animator.add(n/X.shape[0]/len(data_iter),
A
Aston Zhang 已提交
807 808 809 810 811 812
                             (d2l.evaluate_loss(net, data_iter, loss),))
                timer.start()
    print(f'loss: {animator.Y[0][-1]:.3f}, {timer.avg():.3f} sec/epoch')
    return timer.cumsum(), animator.Y[0]

def train_concise_ch11(tr_name, hyperparams, data_iter, num_epochs=2):
X
xiaotinghe 已提交
813 814
    """Defined in :numref:`sec_minibatches`"""
    # 初始化模型
A
Aston Zhang 已提交
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    trainer = gluon.Trainer(net.collect_params(), tr_name, hyperparams)
    loss = gluon.loss.L2Loss()
    animator = d2l.Animator(xlabel='epoch', ylabel='loss',
                            xlim=[0, num_epochs], ylim=[0.22, 0.35])
    n, timer = 0, d2l.Timer()
    for _ in range(num_epochs):
        for X, y in data_iter:
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(X.shape[0])
            n += X.shape[0]
            if n % 200 == 0:
                timer.stop()
X
xiaotinghe 已提交
832
                animator.add(n/X.shape[0]/len(data_iter),
A
Aston Zhang 已提交
833 834 835 836
                             (d2l.evaluate_loss(net, data_iter, loss),))
                timer.start()
    print(f'loss: {animator.Y[0][-1]:.3f}, {timer.avg():.3f} sec/epoch')

M
Mu Li 已提交
837 838
class Benchmark:
    def __init__(self, description='Done'):
X
xiaotinghe 已提交
839
        """Defined in :numref:`sec_hybridize`"""
M
Mu Li 已提交
840
        self.description = description
A
dbl  
Aston Zhang 已提交
841

M
Mu Li 已提交
842 843 844
    def __enter__(self):
        self.timer = d2l.Timer()
        return self
A
dbl  
Aston Zhang 已提交
845

M
Mu Li 已提交
846 847
    def __exit__(self, *args):
        print(f'{self.description}: {self.timer.stop():.4f} sec')
A
dbl  
Aston Zhang 已提交
848

X
xiaotinghe 已提交
849
def split_batch(X, y, devices):
X
xiaotinghe 已提交
850 851 852
    """将`X`和`y`拆分到多个设备上

    Defined in :numref:`sec_multi_gpu`"""
X
xiaotinghe 已提交
853 854 855 856 857
    assert X.shape[0] == y.shape[0]
    return (gluon.utils.split_and_load(X, devices),
            gluon.utils.split_and_load(y, devices))

def resnet18(num_classes):
X
xiaotinghe 已提交
858 859 860
    """稍加修改的 ResNet-18 模型。

    Defined in :numref:`sec_multi_gpu_concise`"""
X
xiaotinghe 已提交
861 862 863 864
    def resnet_block(num_channels, num_residuals, first_block=False):
        blk = nn.Sequential()
        for i in range(num_residuals):
            if i == 0 and not first_block:
X
xiaotinghe 已提交
865 866
                blk.add(d2l.Residual(
                    num_channels, use_1x1conv=True, strides=2))
X
xiaotinghe 已提交
867 868 869 870 871
            else:
                blk.add(d2l.Residual(num_channels))
        return blk

    net = nn.Sequential()
X
xiaotinghe 已提交
872
    # 该模型使用了更小的卷积核、步长和填充,而且删除了最大汇聚层。
X
xiaotinghe 已提交
873 874
    net.add(nn.Conv2D(64, kernel_size=3, strides=1, padding=1),
            nn.BatchNorm(), nn.Activation('relu'))
X
xiaotinghe 已提交
875 876 877 878
    net.add(resnet_block(64, 2, first_block=True),
            resnet_block(128, 2),
            resnet_block(256, 2),
            resnet_block(512, 2))
X
xiaotinghe 已提交
879 880 881 882
    net.add(nn.GlobalAvgPool2D(), nn.Dense(num_classes))
    return net

def evaluate_accuracy_gpus(net, data_iter, split_f=d2l.split_batch):
X
xiaotinghe 已提交
883 884 885 886
    """使用多个GPU计算数据集上模型的精度。

    Defined in :numref:`sec_multi_gpu_concise`"""
    # 查询设备列表
X
xiaotinghe 已提交
887
    devices = list(net.collect_params().values())[0].list_ctx()
X
xiaotinghe 已提交
888
    # 正确预测的数量,预测的总数量
A
Aston Zhang 已提交
889
    metric = d2l.Accumulator(2)
X
xiaotinghe 已提交
890 891
    for features, labels in data_iter:
        X_shards, y_shards = split_f(features, labels, devices)
X
xiaotinghe 已提交
892
        # 并行运行
X
xiaotinghe 已提交
893
        pred_shards = [net(X_shard) for X_shard in X_shards]
X
xiaotinghe 已提交
894 895 896
        metric.add(sum(float(d2l.accuracy(pred_shard, y_shard)) for
                       pred_shard, y_shard in zip(
                           pred_shards, y_shards)), labels.size)
X
xiaotinghe 已提交
897 898 899 900
    return metric[0] / metric[1]

def train_batch_ch13(net, features, labels, loss, trainer, devices,
                     split_f=d2l.split_batch):
X
xiaotinghe 已提交
901 902 903
    """用多GPU进行小批量训练

    Defined in :numref:`sec_image_augmentation`"""
X
xiaotinghe 已提交
904 905 906
    X_shards, y_shards = split_f(features, labels, devices)
    with autograd.record():
        pred_shards = [net(X_shard) for X_shard in X_shards]
X
xiaotinghe 已提交
907 908
        ls = [loss(pred_shard, y_shard) for pred_shard, y_shard
              in zip(pred_shards, y_shards)]
X
xiaotinghe 已提交
909 910
    for l in ls:
        l.backward()
X
xiaotinghe 已提交
911
    # True标志允许使用过时的梯度,这很有用(例如,在微调BERT中)
X
xiaotinghe 已提交
912 913
    trainer.step(labels.shape[0], ignore_stale_grad=True)
    train_loss_sum = sum([float(l.sum()) for l in ls])
X
xiaotinghe 已提交
914 915
    train_acc_sum = sum(d2l.accuracy(pred_shard, y_shard)
                        for pred_shard, y_shard in zip(pred_shards, y_shards))
X
xiaotinghe 已提交
916 917 918 919
    return train_loss_sum, train_acc_sum

def train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs,
               devices=d2l.try_all_gpus(), split_f=d2l.split_batch):
X
xiaotinghe 已提交
920 921 922
    """用多GPU进行模型训练

    Defined in :numref:`sec_image_augmentation`"""
X
xiaotinghe 已提交
923 924 925 926
    timer, num_batches = d2l.Timer(), len(train_iter)
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
                            legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
X
xiaotinghe 已提交
927
        # 4个维度:储存训练损失,训练准确度,实例数,特点数
X
xiaotinghe 已提交
928 929 930
        metric = d2l.Accumulator(4)
        for i, (features, labels) in enumerate(train_iter):
            timer.start()
X
xiaotinghe 已提交
931 932
            l, acc = train_batch_ch13(
                net, features, labels, loss, trainer, devices, split_f)
X
xiaotinghe 已提交
933 934 935
            metric.add(l, acc, labels.shape[0], labels.size)
            timer.stop()
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
X
xiaotinghe 已提交
936 937 938
                animator.add(epoch + (i + 1) / num_batches,
                             (metric[0] / metric[2], metric[1] / metric[3],
                              None))
X
xiaotinghe 已提交
939 940 941 942 943 944 945
        test_acc = d2l.evaluate_accuracy_gpus(net, test_iter, split_f)
        animator.add(epoch + 1, (None, None, test_acc))
    print(f'loss {metric[0] / metric[2]:.3f}, train acc '
          f'{metric[1] / metric[3]:.3f}, test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on '
          f'{str(devices)}')

A
Aston Zhang 已提交
946
d2l.DATA_HUB['hotdog'] = (d2l.DATA_URL + 'hotdog.zip',
X
xiaotinghe 已提交
947
                         'fba480ffa8aa7e0febbb511d181409f899b9baa5')
X
xiaotinghe 已提交
948 949

def box_corner_to_center(boxes):
X
xiaotinghe 已提交
950 951 952
    """从(左上,右下)转换到(中间,宽度,高度)

    Defined in :numref:`sec_bbox`"""
X
xiaotinghe 已提交
953 954 955 956 957 958 959 960 961
    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2
    w = x2 - x1
    h = y2 - y1
    boxes = d2l.stack((cx, cy, w, h), axis=-1)
    return boxes

def box_center_to_corner(boxes):
X
xiaotinghe 已提交
962 963 964
    """从(中间,宽度,高度)转换到(左上,右下)

    Defined in :numref:`sec_bbox`"""
X
xiaotinghe 已提交
965 966 967 968 969 970 971 972 973
    cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    x1 = cx - 0.5 * w
    y1 = cy - 0.5 * h
    x2 = cx + 0.5 * w
    y2 = cy + 0.5 * h
    boxes = d2l.stack((x1, y1, x2, y2), axis=-1)
    return boxes

def bbox_to_rect(bbox, color):
X
xiaotinghe 已提交
974 975 976 977 978 979
    """Defined in :numref:`sec_bbox`"""
    # 将边界框 (左上x, 左上y, 右下x, 右下y) 格式转换成 matplotlib 格式:
    # ((左上x, 左上y), 宽, 高)
    return d2l.plt.Rectangle(
        xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
        fill=False, edgecolor=color, linewidth=2)
X
xiaotinghe 已提交
980 981

def multibox_prior(data, sizes, ratios):
X
xiaotinghe 已提交
982 983 984
    """生成以每个像素为中心具有不同形状的锚框。

    Defined in :numref:`sec_anchor`"""
X
xiaotinghe 已提交
985 986 987 988 989
    in_height, in_width = data.shape[-2:]
    device, num_sizes, num_ratios = data.ctx, len(sizes), len(ratios)
    boxes_per_pixel = (num_sizes + num_ratios - 1)
    size_tensor = d2l.tensor(sizes, ctx=device)
    ratio_tensor = d2l.tensor(ratios, ctx=device)
X
xiaotinghe 已提交
990 991 992

    # 为了将锚点移动到像素的中心,需要设置偏移量。
    # 因为一个像素的的高为1且宽为1,我们选择偏移我们的中心0.5
X
xiaotinghe 已提交
993
    offset_h, offset_w = 0.5, 0.5
A
Aston Zhang 已提交
994 995
    steps_h = 1.0 / in_height  # Scaled steps in y-axis
    steps_w = 1.0 / in_width  # Scaled steps in x-axis
X
xiaotinghe 已提交
996

X
xiaotinghe 已提交
997
    # 生成锚框的所有中心点
X
xiaotinghe 已提交
998 999 1000 1001 1002
    center_h = (d2l.arange(in_height, ctx=device) + offset_h) * steps_h
    center_w = (d2l.arange(in_width, ctx=device) + offset_w) * steps_w
    shift_x, shift_y = d2l.meshgrid(center_w, center_h)
    shift_x, shift_y = shift_x.reshape(-1), shift_y.reshape(-1)

X
xiaotinghe 已提交
1003 1004
    # 生成“boxes_per_pixel”个高和宽,
    # 之后用于创建锚框的四角坐标 (xmin, xmax, ymin, ymax)
X
xiaotinghe 已提交
1005
    w = np.concatenate((size_tensor * np.sqrt(ratio_tensor[0]),
A
Aston Zhang 已提交
1006 1007
                        sizes[0] * np.sqrt(ratio_tensor[1:]))) \
                        * in_height / in_width  # Handle rectangular inputs
X
xiaotinghe 已提交
1008 1009
    h = np.concatenate((size_tensor / np.sqrt(ratio_tensor[0]),
                        sizes[0] / np.sqrt(ratio_tensor[1:])))
X
xiaotinghe 已提交
1010 1011 1012
    # 除以2来获得半高和半宽
    anchor_manipulations = np.tile(np.stack((-w, -h, w, h)).T,
                                   (in_height * in_width, 1)) / 2
X
xiaotinghe 已提交
1013

X
xiaotinghe 已提交
1014 1015
    # 每个中心点都将有“boxes_per_pixel”个锚框,
    # 所以生成含所有锚框中心的网格,重复了“boxes_per_pixel”次
X
xiaotinghe 已提交
1016
    out_grid = d2l.stack([shift_x, shift_y, shift_x, shift_y],
A
Aston Zhang 已提交
1017
                         axis=1).repeat(boxes_per_pixel, axis=0)
X
xiaotinghe 已提交
1018 1019 1020 1021
    output = out_grid + anchor_manipulations
    return np.expand_dims(output, axis=0)

def show_bboxes(axes, bboxes, labels=None, colors=None):
X
xiaotinghe 已提交
1022 1023 1024 1025
    """显示所有边界框。

    Defined in :numref:`sec_anchor`"""
    def _make_list(obj, default_values=None):
X
xiaotinghe 已提交
1026 1027 1028 1029 1030
        if obj is None:
            obj = default_values
        elif not isinstance(obj, (list, tuple)):
            obj = [obj]
        return obj
A
Aston Zhang 已提交
1031

X
xiaotinghe 已提交
1032 1033
    labels = _make_list(labels)
    colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c'])
X
xiaotinghe 已提交
1034 1035 1036 1037 1038 1039
    for i, bbox in enumerate(bboxes):
        color = colors[i % len(colors)]
        rect = d2l.bbox_to_rect(d2l.numpy(bbox), color)
        axes.add_patch(rect)
        if labels and len(labels) > i:
            text_color = 'k' if color == 'w' else 'w'
X
xiaotinghe 已提交
1040 1041
            axes.text(rect.xy[0], rect.xy[1], labels[i],
                      va='center', ha='center', fontsize=9, color=text_color,
X
xiaotinghe 已提交
1042 1043 1044
                      bbox=dict(facecolor=color, lw=0))

def box_iou(boxes1, boxes2):
X
xiaotinghe 已提交
1045 1046 1047
    """计算两个锚框或边界框列表中成对的交并比。

    Defined in :numref:`sec_anchor`"""
X
xiaotinghe 已提交
1048 1049
    box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) *
                              (boxes[:, 3] - boxes[:, 1]))
X
xiaotinghe 已提交
1050 1051 1052 1053 1054
    # `boxes1`, `boxes2`, `areas1`, `areas2`的形状:
    # `boxes1`:(boxes1的数量, 4),
    # `boxes2`:(boxes2的数量, 4),
    # `areas1`:(boxes1的数量,),
    # `areas2`:(boxes2的数量,)
A
Aston Zhang 已提交
1055 1056
    areas1 = box_area(boxes1)
    areas2 = box_area(boxes2)
X
xiaotinghe 已提交
1057 1058 1059

    #  `inter_upperlefts`, `inter_lowerrights`, `inters`的形状:
    # (boxes1的数量, boxes2的数量, 2)
A
Aston Zhang 已提交
1060 1061 1062
    inter_upperlefts = np.maximum(boxes1[:, None, :2], boxes2[:, :2])
    inter_lowerrights = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:])
    inters = (inter_lowerrights - inter_upperlefts).clip(min=0)
X
xiaotinghe 已提交
1063
    # `inter_areas` and `union_areas`的形状: (boxes1的数量, boxes2的数量)
A
Aston Zhang 已提交
1064 1065 1066
    inter_areas = inters[:, :, 0] * inters[:, :, 1]
    union_areas = areas1[:, None] + areas2 - inter_areas
    return inter_areas / union_areas
X
xiaotinghe 已提交
1067

A
Aston Zhang 已提交
1068
def assign_anchor_to_bbox(ground_truth, anchors, device, iou_threshold=0.5):
X
xiaotinghe 已提交
1069 1070 1071
    """将最接近的真实边界框分配给锚框。

    Defined in :numref:`sec_anchor`"""
X
xiaotinghe 已提交
1072
    num_anchors, num_gt_boxes = anchors.shape[0], ground_truth.shape[0]
X
xiaotinghe 已提交
1073
    # 位于第i行和第j列的元素 x_ij 是锚框i和真实边界框j的IoU
X
xiaotinghe 已提交
1074
    jaccard = box_iou(anchors, ground_truth)
X
xiaotinghe 已提交
1075
    # 对于每个锚框,分配的真实边界框的张量
X
xiaotinghe 已提交
1076
    anchors_bbox_map = np.full((num_anchors,), -1, dtype=np.int32, ctx=device)
X
xiaotinghe 已提交
1077
    # 根据阈值,决定是否分配真实边界框
X
xiaotinghe 已提交
1078 1079 1080 1081 1082 1083 1084
    max_ious, indices = np.max(jaccard, axis=1), np.argmax(jaccard, axis=1)
    anc_i = np.nonzero(max_ious >= 0.5)[0]
    box_j = indices[max_ious >= 0.5]
    anchors_bbox_map[anc_i] = box_j
    col_discard = np.full((num_anchors,), -1)
    row_discard = np.full((num_gt_boxes,), -1)
    for _ in range(num_gt_boxes):
X
xiaotinghe 已提交
1085
        max_idx = np.argmax(jaccard)
X
xiaotinghe 已提交
1086 1087 1088 1089 1090 1091 1092 1093
        box_idx = (max_idx % num_gt_boxes).astype('int32')
        anc_idx = (max_idx / num_gt_boxes).astype('int32')
        anchors_bbox_map[anc_idx] = box_idx
        jaccard[:, box_idx] = col_discard
        jaccard[anc_idx, :] = row_discard
    return anchors_bbox_map

def offset_boxes(anchors, assigned_bb, eps=1e-6):
X
xiaotinghe 已提交
1094 1095 1096
    """对锚框偏移量的转换。

    Defined in :numref:`subsec_labeling-anchor-boxes`"""
X
xiaotinghe 已提交
1097 1098 1099 1100 1101 1102 1103 1104
    c_anc = d2l.box_corner_to_center(anchors)
    c_assigned_bb = d2l.box_corner_to_center(assigned_bb)
    offset_xy = 10 * (c_assigned_bb[:, :2] - c_anc[:, :2]) / c_anc[:, 2:]
    offset_wh = 5 * d2l.log(eps + c_assigned_bb[:, 2:] / c_anc[:, 2:])
    offset = d2l.concat([offset_xy, offset_wh], axis=1)
    return offset

def multibox_target(anchors, labels):
X
xiaotinghe 已提交
1105 1106 1107
    """使用真实边界框标记锚框。

    Defined in :numref:`subsec_labeling-anchor-boxes`"""
X
xiaotinghe 已提交
1108 1109 1110 1111 1112
    batch_size, anchors = labels.shape[0], anchors.squeeze(0)
    batch_offset, batch_mask, batch_class_labels = [], [], []
    device, num_anchors = anchors.ctx, anchors.shape[0]
    for i in range(batch_size):
        label = labels[i, :, :]
X
xiaotinghe 已提交
1113 1114 1115 1116 1117
        anchors_bbox_map = assign_anchor_to_bbox(
            label[:, 1:], anchors, device)
        bbox_mask = np.tile((np.expand_dims((anchors_bbox_map >= 0),
                                            axis=-1)), (1, 4)).astype('int32')
        # 将类标签和分配的边界框坐标初始化为零
X
xiaotinghe 已提交
1118
        class_labels = d2l.zeros(num_anchors, dtype=np.int32, ctx=device)
A
Aston Zhang 已提交
1119 1120
        assigned_bb = d2l.zeros((num_anchors, 4), dtype=np.float32,
                                ctx=device)
X
xiaotinghe 已提交
1121 1122
        # 使用真实边界框来标记锚框的类别。
        # 如果一个锚框没有被分配,我们标记其为背景(值为零)
X
xiaotinghe 已提交
1123 1124 1125 1126
        indices_true = np.nonzero(anchors_bbox_map >= 0)[0]
        bb_idx = anchors_bbox_map[indices_true]
        class_labels[indices_true] = label[bb_idx, 0].astype('int32') + 1
        assigned_bb[indices_true] = label[bb_idx, 1:]
X
xiaotinghe 已提交
1127
        # 偏移量转换
X
xiaotinghe 已提交
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137
        offset = offset_boxes(anchors, assigned_bb) * bbox_mask
        batch_offset.append(offset.reshape(-1))
        batch_mask.append(bbox_mask.reshape(-1))
        batch_class_labels.append(class_labels)
    bbox_offset = d2l.stack(batch_offset)
    bbox_mask = d2l.stack(batch_mask)
    class_labels = d2l.stack(batch_class_labels)
    return (bbox_offset, bbox_mask, class_labels)

def offset_inverse(anchors, offset_preds):
X
xiaotinghe 已提交
1138 1139 1140
    """根据带有预测偏移量的锚框来预测边界框。

    Defined in :numref:`subsec_labeling-anchor-boxes`"""
A
Aston Zhang 已提交
1141 1142 1143 1144 1145 1146
    anc = d2l.box_corner_to_center(anchors)
    pred_bbox_xy = (offset_preds[:, :2] * anc[:, 2:] / 10) + anc[:, :2]
    pred_bbox_wh = d2l.exp(offset_preds[:, 2:] / 5) * anc[:, 2:]
    pred_bbox = d2l.concat((pred_bbox_xy, pred_bbox_wh), axis=1)
    predicted_bbox = d2l.box_center_to_corner(pred_bbox)
    return predicted_bbox
X
xiaotinghe 已提交
1147 1148

def nms(boxes, scores, iou_threshold):
X
xiaotinghe 已提交
1149 1150 1151
    """对预测边界框的置信度进行排序。

    Defined in :numref:`subsec_predicting-bounding-boxes-nms`"""
X
xiaotinghe 已提交
1152
    B = scores.argsort()[::-1]
X
xiaotinghe 已提交
1153
    keep = []  # 保留预测边界框的指标
X
xiaotinghe 已提交
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
    while B.size > 0:
        i = B[0]
        keep.append(i)
        if B.size == 1: break
        iou = box_iou(boxes[i, :].reshape(-1, 4),
                      boxes[B[1:], :].reshape(-1, 4)).reshape(-1)
        inds = np.nonzero(iou <= iou_threshold)[0]
        B = B[inds + 1]
    return np.array(keep, dtype=np.int32, ctx=boxes.ctx)

def multibox_detection(cls_probs, offset_preds, anchors, nms_threshold=0.5,
A
Aston Zhang 已提交
1165
                       pos_threshold=0.009999999):
X
xiaotinghe 已提交
1166 1167 1168
    """使用非极大值抑制来预测边界框。

    Defined in :numref:`subsec_predicting-bounding-boxes-nms`"""
X
xiaotinghe 已提交
1169 1170 1171 1172 1173 1174 1175 1176
    device, batch_size = cls_probs.ctx, cls_probs.shape[0]
    anchors = np.squeeze(anchors, axis=0)
    num_classes, num_anchors = cls_probs.shape[1], cls_probs.shape[2]
    out = []
    for i in range(batch_size):
        cls_prob, offset_pred = cls_probs[i], offset_preds[i].reshape(-1, 4)
        conf, class_id = np.max(cls_prob[1:], 0), np.argmax(cls_prob[1:], 0)
        predicted_bb = offset_inverse(anchors, offset_pred)
A
Aston Zhang 已提交
1177
        keep = nms(predicted_bb, conf, nms_threshold)
X
xiaotinghe 已提交
1178 1179

        # 找到所有的 non_keep 索引,并将类设置为背景
X
xiaotinghe 已提交
1180 1181 1182 1183 1184 1185 1186 1187
        all_idx = np.arange(num_anchors, dtype=np.int32, ctx=device)
        combined = d2l.concat((keep, all_idx))
        unique, counts = np.unique(combined, return_counts=True)
        non_keep = unique[counts == 1]
        all_id_sorted = d2l.concat((keep, non_keep))
        class_id[non_keep] = -1
        class_id = class_id[all_id_sorted].astype('float32')
        conf, predicted_bb = conf[all_id_sorted], predicted_bb[all_id_sorted]
X
xiaotinghe 已提交
1188
        # `pos_threshold` 是一个用于非背景预测的阈值
X
xiaotinghe 已提交
1189 1190 1191
        below_min_idx = (conf < pos_threshold)
        class_id[below_min_idx] = -1
        conf[below_min_idx] = 1 - conf[below_min_idx]
X
xiaotinghe 已提交
1192 1193 1194
        pred_info = d2l.concat((np.expand_dims(class_id, axis=1),
                                np.expand_dims(conf, axis=1),
                                predicted_bb), axis=1)
X
xiaotinghe 已提交
1195 1196 1197
        out.append(pred_info)
    return d2l.stack(out)

A
Aston Zhang 已提交
1198 1199 1200
d2l.DATA_HUB['banana-detection'] = (
    d2l.DATA_URL + 'banana-detection.zip',
    '5de26c8fce5ccdea9f91267273464dc968d20d72')
X
xiaotinghe 已提交
1201 1202

def read_data_bananas(is_train=True):
X
xiaotinghe 已提交
1203 1204 1205
    """读取香蕉检测数据集中的图像和标签。

    Defined in :numref:`sec_object-detection-dataset`"""
X
xiaotinghe 已提交
1206
    data_dir = d2l.download_extract('banana-detection')
X
xiaotinghe 已提交
1207 1208
    csv_fname = os.path.join(data_dir, 'bananas_train' if is_train
                             else 'bananas_val', 'label.csv')
X
xiaotinghe 已提交
1209 1210 1211 1212
    csv_data = pd.read_csv(csv_fname)
    csv_data = csv_data.set_index('img_name')
    images, targets = [], []
    for img_name, target in csv_data.iterrows():
X
xiaotinghe 已提交
1213 1214 1215
        images.append(image.imread(
            os.path.join(data_dir, 'bananas_train' if is_train else
                         'bananas_val', 'images', f'{img_name}')))
A
Aston Zhang 已提交
1216 1217 1218
        # Here `target` contains (class, upper-left x, upper-left y,
        # lower-right x, lower-right y), where all the images have the same
        # banana class (index 0)
X
xiaotinghe 已提交
1219 1220 1221 1222
        targets.append(list(target))
    return images, np.expand_dims(np.array(targets), 1) / 256

class BananasDataset(gluon.data.Dataset):
X
xiaotinghe 已提交
1223 1224 1225
    """一个用于加载香蕉检测数据集的自定义数据集。

    Defined in :numref:`sec_object-detection-dataset`"""
X
xiaotinghe 已提交
1226 1227
    def __init__(self, is_train):
        self.features, self.labels = read_data_bananas(is_train)
X
xiaotinghe 已提交
1228 1229
        print('read ' + str(len(self.features)) + (f' training examples' if
              is_train else f' validation examples'))
X
xiaotinghe 已提交
1230 1231 1232 1233 1234 1235 1236 1237 1238

    def __getitem__(self, idx):
        return (self.features[idx].astype('float32').transpose(2, 0, 1),
                self.labels[idx])

    def __len__(self):
        return len(self.features)

def load_data_bananas(batch_size):
X
xiaotinghe 已提交
1239 1240 1241
    """加载香蕉检测数据集。

    Defined in :numref:`sec_object-detection-dataset`"""
X
xiaotinghe 已提交
1242 1243 1244 1245
    train_iter = gluon.data.DataLoader(BananasDataset(is_train=True),
                                       batch_size, shuffle=True)
    val_iter = gluon.data.DataLoader(BananasDataset(is_train=False),
                                     batch_size)
A
Aston Zhang 已提交
1246
    return train_iter, val_iter
X
xiaotinghe 已提交
1247

A
Aston Zhang 已提交
1248 1249 1250 1251
d2l.DATA_HUB['voc2012'] = (d2l.DATA_URL + 'VOCtrainval_11-May-2012.tar',
                           '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')

def read_voc_images(voc_dir, is_train=True):
X
xiaotinghe 已提交
1252 1253 1254
    """读取所有VOC图像并标注。

    Defined in :numref:`sec_semantic_segmentation`"""
A
Aston Zhang 已提交
1255 1256 1257 1258 1259 1260
    txt_fname = os.path.join(voc_dir, 'ImageSets', 'Segmentation',
                             'train.txt' if is_train else 'val.txt')
    with open(txt_fname, 'r') as f:
        images = f.read().split()
    features, labels = [], []
    for i, fname in enumerate(images):
X
xiaotinghe 已提交
1261 1262 1263 1264
        features.append(image.imread(os.path.join(
            voc_dir, 'JPEGImages', f'{fname}.jpg')))
        labels.append(image.imread(os.path.join(
            voc_dir, 'SegmentationClass', f'{fname}.png')))
A
Aston Zhang 已提交
1265 1266 1267 1268 1269 1270 1271 1272 1273
    return features, labels

VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
                [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
                [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
                [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
                [0, 64, 128]]

X
xiaotinghe 已提交
1274 1275 1276 1277
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
               'diningtable', 'dog', 'horse', 'motorbike', 'person',
               'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']
A
Aston Zhang 已提交
1278 1279

def voc_colormap2label():
X
xiaotinghe 已提交
1280 1281 1282 1283
    """构建从RGB到VOC类别索引的映射。

    Defined in :numref:`sec_semantic_segmentation`"""
    colormap2label = np.zeros(256 ** 3)
A
Aston Zhang 已提交
1284
    for i, colormap in enumerate(VOC_COLORMAP):
X
xiaotinghe 已提交
1285 1286
        colormap2label[
            (colormap[0] * 256 + colormap[1]) * 256 + colormap[2]] = i
A
Aston Zhang 已提交
1287 1288 1289
    return colormap2label

def voc_label_indices(colormap, colormap2label):
X
xiaotinghe 已提交
1290 1291 1292
    """将VOC标签中的RGB值映射到它们的类别索引。

    Defined in :numref:`sec_semantic_segmentation`"""
A
Aston Zhang 已提交
1293
    colormap = colormap.astype(np.int32)
X
xiaotinghe 已提交
1294 1295
    idx = ((colormap[:, :, 0] * 256 + colormap[:, :, 1]) * 256
           + colormap[:, :, 2])
A
Aston Zhang 已提交
1296 1297 1298
    return colormap2label[idx]

def voc_rand_crop(feature, label, height, width):
X
xiaotinghe 已提交
1299 1300 1301
    """随机裁剪特征和标签图像。

    Defined in :numref:`sec_semantic_segmentation`"""
A
Aston Zhang 已提交
1302 1303 1304 1305 1306
    feature, rect = image.random_crop(feature, (width, height))
    label = image.fixed_crop(label, *rect)
    return feature, label

class VOCSegDataset(gluon.data.Dataset):
X
xiaotinghe 已提交
1307 1308 1309
    """一个用于加载VOC数据集的自定义数据集。

    Defined in :numref:`sec_semantic_segmentation`"""
A
Aston Zhang 已提交
1310 1311 1312 1313 1314
    def __init__(self, is_train, crop_size, voc_dir):
        self.rgb_mean = np.array([0.485, 0.456, 0.406])
        self.rgb_std = np.array([0.229, 0.224, 0.225])
        self.crop_size = crop_size
        features, labels = read_voc_images(voc_dir, is_train=is_train)
X
xiaotinghe 已提交
1315 1316
        self.features = [self.normalize_image(feature)
                         for feature in self.filter(features)]
A
Aston Zhang 已提交
1317 1318 1319 1320 1321 1322 1323 1324
        self.labels = self.filter(labels)
        self.colormap2label = voc_colormap2label()
        print('read ' + str(len(self.features)) + ' examples')

    def normalize_image(self, img):
        return (img.astype('float32') / 255 - self.rgb_mean) / self.rgb_std

    def filter(self, imgs):
X
xiaotinghe 已提交
1325 1326 1327
        return [img for img in imgs if (
            img.shape[0] >= self.crop_size[0] and
            img.shape[1] >= self.crop_size[1])]
A
Aston Zhang 已提交
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338

    def __getitem__(self, idx):
        feature, label = voc_rand_crop(self.features[idx], self.labels[idx],
                                       *self.crop_size)
        return (feature.transpose(2, 0, 1),
                voc_label_indices(label, self.colormap2label))

    def __len__(self):
        return len(self.features)

def load_data_voc(batch_size, crop_size):
X
xiaotinghe 已提交
1339 1340 1341 1342 1343
    """加载VOC语义分割数据集。

    Defined in :numref:`sec_semantic_segmentation`"""
    voc_dir = d2l.download_extract('voc2012', os.path.join(
        'VOCdevkit', 'VOC2012'))
A
Aston Zhang 已提交
1344 1345
    num_workers = d2l.get_dataloader_workers()
    train_iter = gluon.data.DataLoader(
X
xiaotinghe 已提交
1346 1347
        VOCSegDataset(True, crop_size, voc_dir), batch_size,
        shuffle=True, last_batch='discard', num_workers=num_workers)
A
Aston Zhang 已提交
1348 1349 1350 1351 1352
    test_iter = gluon.data.DataLoader(
        VOCSegDataset(False, crop_size, voc_dir), batch_size,
        last_batch='discard', num_workers=num_workers)
    return train_iter, test_iter

X
xiaotinghe 已提交
1353 1354 1355 1356
d2l.DATA_HUB['cifar10_tiny'] = (d2l.DATA_URL + 'kaggle_cifar10_tiny.zip',
                                '2068874e4b9a9f0fb07ebe0ad2b29754449ccacd')

def read_csv_labels(fname):
X
xiaotinghe 已提交
1357 1358 1359
    """读取 `fname` 来给标签字典返回一个文件名。

    Defined in :numref:`sec_kaggle_cifar10`"""
X
xiaotinghe 已提交
1360
    with open(fname, 'r') as f:
X
xiaotinghe 已提交
1361
        # 跳过文件头行 (列名)
X
xiaotinghe 已提交
1362 1363 1364 1365 1366
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines]
    return dict(((name, label) for name, label in tokens))

def copyfile(filename, target_dir):
X
xiaotinghe 已提交
1367 1368 1369
    """将文件复制到目标目录。

    Defined in :numref:`sec_kaggle_cifar10`"""
X
xiaotinghe 已提交
1370 1371 1372 1373
    os.makedirs(target_dir, exist_ok=True)
    shutil.copy(filename, target_dir)

def reorg_train_valid(data_dir, labels, valid_ratio):
X
xiaotinghe 已提交
1374 1375 1376 1377
    """将验证集从原始的训练集中拆分出来

    Defined in :numref:`sec_kaggle_cifar10`"""
    # 训练数据集中示例最少的类别中的示例数
X
xiaotinghe 已提交
1378
    n = collections.Counter(labels.values()).most_common()[-1][1]
X
xiaotinghe 已提交
1379
    # 验证集中每个类别的示例数
X
xiaotinghe 已提交
1380 1381 1382 1383 1384
    n_valid_per_label = max(1, math.floor(n * valid_ratio))
    label_count = {}
    for train_file in os.listdir(os.path.join(data_dir, 'train')):
        label = labels[train_file.split('.')[0]]
        fname = os.path.join(data_dir, 'train', train_file)
X
xiaotinghe 已提交
1385 1386
        copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                     'train_valid', label))
X
xiaotinghe 已提交
1387
        if label not in label_count or label_count[label] < n_valid_per_label:
X
xiaotinghe 已提交
1388 1389
            copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                         'valid', label))
X
xiaotinghe 已提交
1390 1391
            label_count[label] = label_count.get(label, 0) + 1
        else:
X
xiaotinghe 已提交
1392 1393
            copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                         'train', label))
X
xiaotinghe 已提交
1394 1395 1396
    return n_valid_per_label

def reorg_test(data_dir):
X
xiaotinghe 已提交
1397
    """在预测期间整理测试集,以方便读取
X
xiaotinghe 已提交
1398

X
xiaotinghe 已提交
1399 1400 1401 1402 1403
    Defined in :numref:`sec_kaggle_cifar10`"""
    for test_file in os.listdir(os.path.join(data_dir, 'test')):
        copyfile(os.path.join(data_dir, 'test', test_file),
                 os.path.join(data_dir, 'train_valid_test', 'test',
                              'unknown'))
X
xiaotinghe 已提交
1404 1405 1406 1407

d2l.DATA_HUB['dog_tiny'] = (d2l.DATA_URL + 'kaggle_dog_tiny.zip',
                            '0cb91d09b814ecdc07b50f31f8dcad3e81d6a86d')

A
Aston Zhang 已提交
1408 1409 1410 1411
d2l.DATA_HUB['ptb'] = (d2l.DATA_URL + 'ptb.zip',
                       '319d85e578af0cdc590547f26231e4e31cdf1e42')

def read_ptb():
X
xiaotinghe 已提交
1412 1413 1414
    """将PTB数据集加载到文本行的列表中。

    Defined in :numref:`sec_word2vec_data`"""
A
Aston Zhang 已提交
1415
    data_dir = d2l.download_extract('ptb')
A
Aston Zhang 已提交
1416
    # Read the training set.
A
Aston Zhang 已提交
1417 1418 1419 1420
    with open(os.path.join(data_dir, 'ptb.train.txt')) as f:
        raw_text = f.read()
    return [line.split() for line in raw_text.split('\n')]

A
Aston Zhang 已提交
1421
def subsample(sentences, vocab):
X
xiaotinghe 已提交
1422 1423 1424 1425
    """下采样高频词。

    Defined in :numref:`sec_word2vec_data`"""
    # 排除未知词元 '<unk>'
A
Aston Zhang 已提交
1426
    sentences = [[token for token in line if vocab[token] != vocab.unk]
A
Aston Zhang 已提交
1427 1428 1429 1430
                 for line in sentences]
    counter = d2l.count_corpus(sentences)
    num_tokens = sum(counter.values())

X
xiaotinghe 已提交
1431
    # 如果在下采样期间保留词元,则返回True
A
Aston Zhang 已提交
1432
    def keep(token):
X
xiaotinghe 已提交
1433 1434
        return(random.uniform(0, 1) <
               math.sqrt(1e-4 / counter[token] * num_tokens))
A
Aston Zhang 已提交
1435

X
xiaotinghe 已提交
1436 1437
    return ([[token for token in line if keep(token)] for line in sentences],
            counter)
A
Aston Zhang 已提交
1438 1439

def get_centers_and_contexts(corpus, max_window_size):
X
xiaotinghe 已提交
1440 1441 1442
    """返回跳元模型中的中心词和上下文词。

    Defined in :numref:`sec_word2vec_data`"""
A
Aston Zhang 已提交
1443 1444
    centers, contexts = [], []
    for line in corpus:
X
xiaotinghe 已提交
1445
        # 要形成“中心词-上下文词”对,每个句子至少需要有2个词
A
Aston Zhang 已提交
1446 1447 1448
        if len(line) < 2:
            continue
        centers += line
X
xiaotinghe 已提交
1449
        for i in range(len(line)):  # 上下文窗口中间`i`
A
Aston Zhang 已提交
1450
            window_size = random.randint(1, max_window_size)
X
xiaotinghe 已提交
1451 1452 1453
            indices = list(range(max(0, i - window_size),
                                 min(len(line), i + 1 + window_size)))
            # 从上下文词中排除中心词
A
Aston Zhang 已提交
1454 1455 1456 1457 1458
            indices.remove(i)
            contexts.append([line[idx] for idx in indices])
    return centers, contexts

class RandomGenerator:
X
xiaotinghe 已提交
1459
    """根据n个采样权重在 {1, ..., n} 中随机抽取。"""
A
Aston Zhang 已提交
1460
    def __init__(self, sampling_weights):
X
xiaotinghe 已提交
1461
        """Defined in :numref:`sec_word2vec_data`"""
A
Aston Zhang 已提交
1462 1463
        # Exclude
        self.population = list(range(1, len(sampling_weights) + 1))
A
Aston Zhang 已提交
1464 1465 1466 1467 1468 1469
        self.sampling_weights = sampling_weights
        self.candidates = []
        self.i = 0

    def draw(self):
        if self.i == len(self.candidates):
X
xiaotinghe 已提交
1470 1471 1472
            # 缓存`k`个随机采样结果
            self.candidates = random.choices(
                self.population, self.sampling_weights, k=10000)
A
Aston Zhang 已提交
1473 1474 1475 1476
            self.i = 0
        self.i += 1
        return self.candidates[self.i - 1]

A
Aston Zhang 已提交
1477
def get_negatives(all_contexts, vocab, counter, K):
X
xiaotinghe 已提交
1478 1479 1480 1481 1482 1483
    """返回负采样中的噪声词。

    Defined in :numref:`sec_word2vec_data`"""
    # 索引为1、2、...(索引0是词表中排除的未知标记)
    sampling_weights = [counter[vocab.to_tokens(i)]**0.75
                        for i in range(1, len(vocab))]
A
Aston Zhang 已提交
1484 1485 1486 1487 1488
    all_negatives, generator = [], RandomGenerator(sampling_weights)
    for contexts in all_contexts:
        negatives = []
        while len(negatives) < len(contexts) * K:
            neg = generator.draw()
X
xiaotinghe 已提交
1489
            # 噪声词不能是上下文词
A
Aston Zhang 已提交
1490 1491 1492 1493 1494 1495
            if neg not in contexts:
                negatives.append(neg)
        all_negatives.append(negatives)
    return all_negatives

def batchify(data):
X
xiaotinghe 已提交
1496 1497 1498
    """返回带有负采样的跳元模型的小批量样本。

    Defined in :numref:`sec_word2vec_data`"""
A
Aston Zhang 已提交
1499 1500 1501 1502 1503 1504 1505 1506
    max_len = max(len(c) + len(n) for _, c, n in data)
    centers, contexts_negatives, masks, labels = [], [], [], []
    for center, context, negative in data:
        cur_len = len(context) + len(negative)
        centers += [center]
        contexts_negatives += [context + negative + [0] * (max_len - cur_len)]
        masks += [[1] * cur_len + [0] * (max_len - cur_len)]
        labels += [[1] * len(context) + [0] * (max_len - len(context))]
X
xiaotinghe 已提交
1507 1508
    return (d2l.reshape(d2l.tensor(centers), (-1, 1)), d2l.tensor(
        contexts_negatives), d2l.tensor(masks), d2l.tensor(labels))
A
Aston Zhang 已提交
1509 1510

def load_data_ptb(batch_size, max_window_size, num_noise_words):
X
xiaotinghe 已提交
1511 1512 1513
    """下载PTB数据集,然后将其加载到内存中。

    Defined in :numref:`subsec_word2vec-minibatch-loading`"""
A
Aston Zhang 已提交
1514 1515
    sentences = read_ptb()
    vocab = d2l.Vocab(sentences, min_freq=10)
A
Aston Zhang 已提交
1516
    subsampled, counter = subsample(sentences, vocab)
A
Aston Zhang 已提交
1517 1518 1519
    corpus = [vocab[line] for line in subsampled]
    all_centers, all_contexts = get_centers_and_contexts(
        corpus, max_window_size)
X
xiaotinghe 已提交
1520 1521 1522 1523
    all_negatives = get_negatives(
        all_contexts, vocab, counter, num_noise_words)
    dataset = gluon.data.ArrayDataset(
        all_centers, all_contexts, all_negatives)
A
Aston Zhang 已提交
1524
    data_iter = gluon.data.DataLoader(
X
xiaotinghe 已提交
1525
        dataset, batch_size, shuffle=True,batchify_fn=batchify,
A
Aston Zhang 已提交
1526
        num_workers=d2l.get_dataloader_workers())
A
Aston Zhang 已提交
1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
    return data_iter, vocab

d2l.DATA_HUB['glove.6b.50d'] = (d2l.DATA_URL + 'glove.6B.50d.zip',
                                '0b8703943ccdb6eb788e6f091b8946e82231bc4d')

d2l.DATA_HUB['glove.6b.100d'] = (d2l.DATA_URL + 'glove.6B.100d.zip',
                                 'cd43bfb07e44e6f27cbcc7bc9ae3d80284fdaf5a')

d2l.DATA_HUB['glove.42b.300d'] = (d2l.DATA_URL + 'glove.42B.300d.zip',
                                  'b5116e234e9eb9076672cfeabf5469f3eec904fa')

d2l.DATA_HUB['wiki.en'] = (d2l.DATA_URL + 'wiki.en.zip',
                           'c1816da3821ae9f43899be655002f6c723e91b88')

class TokenEmbedding:
    """Token Embedding."""
    def __init__(self, embedding_name):
X
xiaotinghe 已提交
1544
        """Defined in :numref:`sec_synonyms`"""
A
Aston Zhang 已提交
1545 1546 1547
        self.idx_to_token, self.idx_to_vec = self._load_embedding(
            embedding_name)
        self.unknown_idx = 0
X
xiaotinghe 已提交
1548 1549
        self.token_to_idx = {token: idx for idx, token in
                             enumerate(self.idx_to_token)}
A
Aston Zhang 已提交
1550 1551 1552 1553

    def _load_embedding(self, embedding_name):
        idx_to_token, idx_to_vec = ['<unk>'], []
        data_dir = d2l.download_extract(embedding_name)
X
xiaotinghe 已提交
1554 1555
        # GloVe网站:https://nlp.stanford.edu/projects/glove/
        # fastText网站:https://fasttext.cc/
A
Aston Zhang 已提交
1556 1557 1558 1559
        with open(os.path.join(data_dir, 'vec.txt'), 'r') as f:
            for line in f:
                elems = line.rstrip().split(' ')
                token, elems = elems[0], [float(elem) for elem in elems[1:]]
X
xiaotinghe 已提交
1560
                # 跳过标题信息,例如fastText中的首行
A
Aston Zhang 已提交
1561 1562 1563 1564 1565 1566 1567
                if len(elems) > 1:
                    idx_to_token.append(token)
                    idx_to_vec.append(elems)
        idx_to_vec = [[0] * len(idx_to_vec[0])] + idx_to_vec
        return idx_to_token, d2l.tensor(idx_to_vec)

    def __getitem__(self, tokens):
X
xiaotinghe 已提交
1568 1569
        indices = [self.token_to_idx.get(token, self.unknown_idx)
                   for token in tokens]
A
Aston Zhang 已提交
1570 1571 1572 1573 1574 1575 1576
        vecs = self.idx_to_vec[d2l.tensor(indices)]
        return vecs

    def __len__(self):
        return len(self.idx_to_token)

def get_tokens_and_segments(tokens_a, tokens_b=None):
X
xiaotinghe 已提交
1577 1578 1579
    """获取输入序列的词元及其片段索引。

    Defined in :numref:`sec_bert`"""
A
Aston Zhang 已提交
1580
    tokens = ['<cls>'] + tokens_a + ['<sep>']
X
xiaotinghe 已提交
1581
    # 0和1分别标记片段A和B
A
Aston Zhang 已提交
1582 1583 1584 1585 1586 1587 1588
    segments = [0] * (len(tokens_a) + 2)
    if tokens_b is not None:
        tokens += tokens_b + ['<sep>']
        segments += [1] * (len(tokens_b) + 1)
    return tokens, segments

class BERTEncoder(nn.Block):
X
xiaotinghe 已提交
1589 1590 1591
    """BERT encoder.

    Defined in :numref:`subsec_bert_input_rep`"""
A
Aston Zhang 已提交
1592 1593 1594 1595 1596 1597 1598
    def __init__(self, vocab_size, num_hiddens, ffn_num_hiddens, num_heads,
                 num_layers, dropout, max_len=1000, **kwargs):
        super(BERTEncoder, self).__init__(**kwargs)
        self.token_embedding = nn.Embedding(vocab_size, num_hiddens)
        self.segment_embedding = nn.Embedding(2, num_hiddens)
        self.blks = nn.Sequential()
        for _ in range(num_layers):
X
xiaotinghe 已提交
1599 1600 1601
            self.blks.add(d2l.EncoderBlock(
                num_hiddens, ffn_num_hiddens, num_heads, dropout, True))
        # 在BERT中,位置嵌入是可学习的,因此我们创建一个足够长的位置嵌入参数
A
Aston Zhang 已提交
1602 1603 1604 1605
        self.pos_embedding = self.params.get('pos_embedding',
                                             shape=(1, max_len, num_hiddens))

    def forward(self, tokens, segments, valid_lens):
X
xiaotinghe 已提交
1606
        # 在以下代码段中,`X`的形状保持不变:(批量大小,最大序列长度,`num_hiddens`)
A
Aston Zhang 已提交
1607 1608 1609 1610 1611 1612 1613
        X = self.token_embedding(tokens) + self.segment_embedding(segments)
        X = X + self.pos_embedding.data(ctx=X.ctx)[:, :X.shape[1], :]
        for blk in self.blks:
            X = blk(X, valid_lens)
        return X

class MaskLM(nn.Block):
X
xiaotinghe 已提交
1614 1615 1616
    """BERT的遮蔽语言模型任务

    Defined in :numref:`subsec_bert_input_rep`"""
A
Aston Zhang 已提交
1617 1618 1619
    def __init__(self, vocab_size, num_hiddens, **kwargs):
        super(MaskLM, self).__init__(**kwargs)
        self.mlp = nn.Sequential()
X
xiaotinghe 已提交
1620 1621
        self.mlp.add(
            nn.Dense(num_hiddens, flatten=False, activation='relu'))
A
Aston Zhang 已提交
1622 1623 1624 1625 1626 1627 1628 1629
        self.mlp.add(nn.LayerNorm())
        self.mlp.add(nn.Dense(vocab_size, flatten=False))

    def forward(self, X, pred_positions):
        num_pred_positions = pred_positions.shape[1]
        pred_positions = pred_positions.reshape(-1)
        batch_size = X.shape[0]
        batch_idx = np.arange(0, batch_size)
X
xiaotinghe 已提交
1630 1631
        # 假设`batch_size=2,`num_pred_positions`=3
        # 那么`batch_idx`是`np.array([0,0,0,1,1])`
A
Aston Zhang 已提交
1632 1633 1634 1635 1636 1637 1638
        batch_idx = np.repeat(batch_idx, num_pred_positions)
        masked_X = X[batch_idx, pred_positions]
        masked_X = masked_X.reshape((batch_size, num_pred_positions, -1))
        mlm_Y_hat = self.mlp(masked_X)
        return mlm_Y_hat

class NextSentencePred(nn.Block):
X
xiaotinghe 已提交
1639 1640 1641
    """BERT的下一句预测任务

    Defined in :numref:`subsec_mlm`"""
A
Aston Zhang 已提交
1642 1643 1644 1645 1646
    def __init__(self, **kwargs):
        super(NextSentencePred, self).__init__(**kwargs)
        self.output = nn.Dense(2)

    def forward(self, X):
X
xiaotinghe 已提交
1647
        # `X`的形状: (batch size, `num_hiddens`)
A
Aston Zhang 已提交
1648 1649 1650
        return self.output(X)

class BERTModel(nn.Block):
X
xiaotinghe 已提交
1651 1652 1653
    """BERT模型

    Defined in :numref:`subsec_nsp`"""
A
Aston Zhang 已提交
1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
    def __init__(self, vocab_size, num_hiddens, ffn_num_hiddens, num_heads,
                 num_layers, dropout, max_len=1000):
        super(BERTModel, self).__init__()
        self.encoder = BERTEncoder(vocab_size, num_hiddens, ffn_num_hiddens,
                                   num_heads, num_layers, dropout, max_len)
        self.hidden = nn.Dense(num_hiddens, activation='tanh')
        self.mlm = MaskLM(vocab_size, num_hiddens)
        self.nsp = NextSentencePred()

    def forward(self, tokens, segments, valid_lens=None, pred_positions=None):
        encoded_X = self.encoder(tokens, segments, valid_lens)
        if pred_positions is not None:
            mlm_Y_hat = self.mlm(encoded_X, pred_positions)
        else:
            mlm_Y_hat = None
X
xiaotinghe 已提交
1669
        # 用于下一句预测的多层感知机分类器的隐藏层。0是“<cls>”标记的索引。
A
Aston Zhang 已提交
1670 1671 1672 1673 1674 1675 1676 1677
        nsp_Y_hat = self.nsp(self.hidden(encoded_X[:, 0, :]))
        return encoded_X, mlm_Y_hat, nsp_Y_hat

d2l.DATA_HUB['wikitext-2'] = (
    'https://s3.amazonaws.com/research.metamind.io/wikitext/'
    'wikitext-2-v1.zip', '3c914d17d80b1459be871a5039ac23e752a53cbe')

def _read_wiki(data_dir):
X
xiaotinghe 已提交
1678
    """Defined in :numref:`sec_bert-dataset`"""
A
Aston Zhang 已提交
1679 1680 1681
    file_name = os.path.join(data_dir, 'wiki.train.tokens')
    with open(file_name, 'r') as f:
        lines = f.readlines()
X
xiaotinghe 已提交
1682 1683 1684
    # 大写字母转换为小写字母
    paragraphs = [line.strip().lower().split(' . ')
                  for line in lines if len(line.split(' . ')) >= 2]
A
Aston Zhang 已提交
1685 1686 1687 1688
    random.shuffle(paragraphs)
    return paragraphs

def _get_next_sentence(sentence, next_sentence, paragraphs):
X
xiaotinghe 已提交
1689
    """Defined in :numref:`sec_bert-dataset`"""
A
Aston Zhang 已提交
1690 1691 1692
    if random.random() < 0.5:
        is_next = True
    else:
X
xiaotinghe 已提交
1693
        # `paragraphs`是三重列表的嵌套
A
Aston Zhang 已提交
1694 1695 1696 1697 1698
        next_sentence = random.choice(random.choice(paragraphs))
        is_next = False
    return sentence, next_sentence, is_next

def _get_nsp_data_from_paragraph(paragraph, paragraphs, vocab, max_len):
X
xiaotinghe 已提交
1699
    """Defined in :numref:`sec_bert-dataset`"""
A
Aston Zhang 已提交
1700 1701 1702 1703
    nsp_data_from_paragraph = []
    for i in range(len(paragraph) - 1):
        tokens_a, tokens_b, is_next = _get_next_sentence(
            paragraph[i], paragraph[i + 1], paragraphs)
X
xiaotinghe 已提交
1704
        # 考虑1个'<cls>'词元和2个'<sep>'词元
A
Aston Zhang 已提交
1705 1706 1707 1708 1709 1710 1711 1712
        if len(tokens_a) + len(tokens_b) + 3 > max_len:
            continue
        tokens, segments = d2l.get_tokens_and_segments(tokens_a, tokens_b)
        nsp_data_from_paragraph.append((tokens, segments, is_next))
    return nsp_data_from_paragraph

def _replace_mlm_tokens(tokens, candidate_pred_positions, num_mlm_preds,
                        vocab):
X
xiaotinghe 已提交
1713 1714
    """Defined in :numref:`sec_bert-dataset`"""
    # 为遮蔽语言模型的输入创建新的词元副本,其中输入可能包含替换的“<mask>”或随机词元
A
Aston Zhang 已提交
1715 1716
    mlm_input_tokens = [token for token in tokens]
    pred_positions_and_labels = []
X
xiaotinghe 已提交
1717
    # 打乱后用于在遮蔽语言模型任务中获取15%的随机词元进行预测
A
Aston Zhang 已提交
1718 1719 1720 1721 1722
    random.shuffle(candidate_pred_positions)
    for mlm_pred_position in candidate_pred_positions:
        if len(pred_positions_and_labels) >= num_mlm_preds:
            break
        masked_token = None
X
xiaotinghe 已提交
1723
        # 80%的时间:将词替换为“<mask>”词元
A
Aston Zhang 已提交
1724 1725 1726
        if random.random() < 0.8:
            masked_token = '<mask>'
        else:
X
xiaotinghe 已提交
1727
            # 10%的时间:保持词不变
A
Aston Zhang 已提交
1728 1729
            if random.random() < 0.5:
                masked_token = tokens[mlm_pred_position]
X
xiaotinghe 已提交
1730
            # 10%的时间:用随机词替换该词
A
Aston Zhang 已提交
1731
            else:
A
Aston Zhang 已提交
1732
                masked_token = random.choice(vocab.idx_to_token)
A
Aston Zhang 已提交
1733 1734 1735 1736 1737 1738
        mlm_input_tokens[mlm_pred_position] = masked_token
        pred_positions_and_labels.append(
            (mlm_pred_position, tokens[mlm_pred_position]))
    return mlm_input_tokens, pred_positions_and_labels

def _get_mlm_data_from_tokens(tokens, vocab):
X
xiaotinghe 已提交
1739
    """Defined in :numref:`subsec_prepare_mlm_data`"""
A
Aston Zhang 已提交
1740
    candidate_pred_positions = []
X
xiaotinghe 已提交
1741
    # `tokens`是一个字符串列表
A
Aston Zhang 已提交
1742
    for i, token in enumerate(tokens):
X
xiaotinghe 已提交
1743
        # 在遮蔽语言模型任务中不会预测特殊词元
A
Aston Zhang 已提交
1744 1745 1746
        if token in ['<cls>', '<sep>']:
            continue
        candidate_pred_positions.append(i)
X
xiaotinghe 已提交
1747
    # 遮蔽语言模型任务中预测15%的随机词元
A
Aston Zhang 已提交
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
    num_mlm_preds = max(1, round(len(tokens) * 0.15))
    mlm_input_tokens, pred_positions_and_labels = _replace_mlm_tokens(
        tokens, candidate_pred_positions, num_mlm_preds, vocab)
    pred_positions_and_labels = sorted(pred_positions_and_labels,
                                       key=lambda x: x[0])
    pred_positions = [v[0] for v in pred_positions_and_labels]
    mlm_pred_labels = [v[1] for v in pred_positions_and_labels]
    return vocab[mlm_input_tokens], pred_positions, vocab[mlm_pred_labels]

def _pad_bert_inputs(examples, max_len, vocab):
X
xiaotinghe 已提交
1758
    """Defined in :numref:`subsec_prepare_mlm_data`"""
A
Aston Zhang 已提交
1759
    max_num_mlm_preds = round(max_len * 0.15)
X
xiaotinghe 已提交
1760
    all_token_ids, all_segments, valid_lens,  = [], [], []
A
Aston Zhang 已提交
1761 1762 1763 1764
    all_pred_positions, all_mlm_weights, all_mlm_labels = [], [], []
    nsp_labels = []
    for (token_ids, pred_positions, mlm_pred_label_ids, segments,
         is_next) in examples:
X
xiaotinghe 已提交
1765 1766 1767 1768 1769
        all_token_ids.append(np.array(token_ids + [vocab['<pad>']] * (
            max_len - len(token_ids)), dtype='int32'))
        all_segments.append(np.array(segments + [0] * (
            max_len - len(segments)), dtype='int32'))
        # `valid_lens` 不包括'<pad>'的计数
A
Aston Zhang 已提交
1770
        valid_lens.append(np.array(len(token_ids), dtype='float32'))
X
xiaotinghe 已提交
1771 1772 1773
        all_pred_positions.append(np.array(pred_positions + [0] * (
            max_num_mlm_preds - len(pred_positions)), dtype='int32'))
        # 填充词元的预测将通过乘以0权重在损失中过滤掉
A
Aston Zhang 已提交
1774
        all_mlm_weights.append(
X
xiaotinghe 已提交
1775 1776 1777 1778
            np.array([1.0] * len(mlm_pred_label_ids) + [0.0] * (
                max_num_mlm_preds - len(pred_positions)), dtype='float32'))
        all_mlm_labels.append(np.array(mlm_pred_label_ids + [0] * (
            max_num_mlm_preds - len(mlm_pred_label_ids)), dtype='int32'))
A
Aston Zhang 已提交
1779 1780 1781 1782 1783
        nsp_labels.append(np.array(is_next))
    return (all_token_ids, all_segments, valid_lens, all_pred_positions,
            all_mlm_weights, all_mlm_labels, nsp_labels)

class _WikiTextDataset(gluon.data.Dataset):
X
xiaotinghe 已提交
1784
    """Defined in :numref:`subsec_prepare_mlm_data`"""
A
Aston Zhang 已提交
1785
    def __init__(self, paragraphs, max_len):
X
xiaotinghe 已提交
1786 1787 1788 1789 1790 1791 1792 1793
        # 输入`paragraphs[i]`是代表段落的句子字符串列表;而输出`paragraphs[i]`是代表段落的句子列表,其中每个句子都是词元列表
        paragraphs = [d2l.tokenize(
            paragraph, token='word') for paragraph in paragraphs]
        sentences = [sentence for paragraph in paragraphs
                     for sentence in paragraph]
        self.vocab = d2l.Vocab(sentences, min_freq=5, reserved_tokens=[
            '<pad>', '<mask>', '<cls>', '<sep>'])
        # 获取下一句子预测任务的数据
A
Aston Zhang 已提交
1794 1795
        examples = []
        for paragraph in paragraphs:
X
xiaotinghe 已提交
1796 1797 1798 1799 1800 1801 1802
            examples.extend(_get_nsp_data_from_paragraph(
                paragraph, paragraphs, self.vocab, max_len))
        # 获取遮蔽语言模型任务的数据
        examples = [(_get_mlm_data_from_tokens(tokens, self.vocab)
                      + (segments, is_next))
                     for tokens, segments, is_next in examples]
        # 填充输入
A
Aston Zhang 已提交
1803
        (self.all_token_ids, self.all_segments, self.valid_lens,
X
xiaotinghe 已提交
1804 1805 1806
         self.all_pred_positions, self.all_mlm_weights,
         self.all_mlm_labels, self.nsp_labels) = _pad_bert_inputs(
            examples, max_len, self.vocab)
A
Aston Zhang 已提交
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817

    def __getitem__(self, idx):
        return (self.all_token_ids[idx], self.all_segments[idx],
                self.valid_lens[idx], self.all_pred_positions[idx],
                self.all_mlm_weights[idx], self.all_mlm_labels[idx],
                self.nsp_labels[idx])

    def __len__(self):
        return len(self.all_token_ids)

def load_data_wiki(batch_size, max_len):
X
xiaotinghe 已提交
1818 1819 1820
    """加载WikiText-2数据集。

    Defined in :numref:`subsec_prepare_mlm_data`"""
A
Aston Zhang 已提交
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832
    num_workers = d2l.get_dataloader_workers()
    data_dir = d2l.download_extract('wikitext-2', 'wikitext-2')
    paragraphs = _read_wiki(data_dir)
    train_set = _WikiTextDataset(paragraphs, max_len)
    train_iter = gluon.data.DataLoader(train_set, batch_size, shuffle=True,
                                       num_workers=num_workers)
    return train_iter, train_set.vocab

def _get_batch_loss_bert(net, loss, vocab_size, tokens_X_shards,
                         segments_X_shards, valid_lens_x_shards,
                         pred_positions_X_shards, mlm_weights_X_shards,
                         mlm_Y_shards, nsp_y_shards):
X
xiaotinghe 已提交
1833
    """Defined in :numref:`sec_bert-pretraining`"""
A
Aston Zhang 已提交
1834 1835 1836
    mlm_ls, nsp_ls, ls = [], [], []
    for (tokens_X_shard, segments_X_shard, valid_lens_x_shard,
         pred_positions_X_shard, mlm_weights_X_shard, mlm_Y_shard,
X
xiaotinghe 已提交
1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
         nsp_y_shard) in zip(
        tokens_X_shards, segments_X_shards, valid_lens_x_shards,
        pred_positions_X_shards, mlm_weights_X_shards, mlm_Y_shards,
        nsp_y_shards):
        # 前向传播
        _, mlm_Y_hat, nsp_Y_hat = net(
            tokens_X_shard, segments_X_shard, valid_lens_x_shard.reshape(-1),
            pred_positions_X_shard)
        # 计算遮蔽语言模型损失
        mlm_l = loss(
            mlm_Y_hat.reshape((-1, vocab_size)), mlm_Y_shard.reshape(-1),
            mlm_weights_X_shard.reshape((-1, 1)))
A
Aston Zhang 已提交
1849
        mlm_l = mlm_l.sum() / (mlm_weights_X_shard.sum() + 1e-8)
X
xiaotinghe 已提交
1850
        # 计算下一句子预测任务的损失
A
Aston Zhang 已提交
1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863
        nsp_l = loss(nsp_Y_hat, nsp_y_shard)
        nsp_l = nsp_l.mean()
        mlm_ls.append(mlm_l)
        nsp_ls.append(nsp_l)
        ls.append(mlm_l + nsp_l)
        npx.waitall()
    return mlm_ls, nsp_ls, ls

d2l.DATA_HUB['aclImdb'] = (
    'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',
    '01ada507287d82875905620988597833ad4e0903')

def read_imdb(data_dir, is_train):
X
xiaotinghe 已提交
1864 1865 1866
    """读取IMDb评论数据集文本序列和标签。

    Defined in :numref:`sec_sentiment`"""
A
Aston Zhang 已提交
1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
    data, labels = [], []
    for label in ('pos', 'neg'):
        folder_name = os.path.join(data_dir, 'train' if is_train else 'test',
                                   label)
        for file in os.listdir(folder_name):
            with open(os.path.join(folder_name, file), 'rb') as f:
                review = f.read().decode('utf-8').replace('\n', '')
                data.append(review)
                labels.append(1 if label == 'pos' else 0)
    return data, labels

def load_data_imdb(batch_size, num_steps=500):
X
xiaotinghe 已提交
1879 1880 1881
    """返回数据迭代器和IMDb评论数据集的词表。

    Defined in :numref:`sec_sentiment`"""
A
Aston Zhang 已提交
1882 1883 1884 1885 1886 1887
    data_dir = d2l.download_extract('aclImdb', 'aclImdb')
    train_data = read_imdb(data_dir, True)
    test_data = read_imdb(data_dir, False)
    train_tokens = d2l.tokenize(train_data[0], token='word')
    test_tokens = d2l.tokenize(test_data[0], token='word')
    vocab = d2l.Vocab(train_tokens, min_freq=5)
X
xiaotinghe 已提交
1888 1889 1890 1891
    train_features = np.array([d2l.truncate_pad(
        vocab[line], num_steps, vocab['<pad>']) for line in train_tokens])
    test_features = np.array([d2l.truncate_pad(
        vocab[line], num_steps, vocab['<pad>']) for line in test_tokens])
A
Aston Zhang 已提交
1892 1893 1894 1895 1896
    train_iter = d2l.load_array((train_features, train_data[1]), batch_size)
    test_iter = d2l.load_array((test_features, test_data[1]), batch_size,
                               is_train=False)
    return train_iter, test_iter, vocab

A
Aston Zhang 已提交
1897
def predict_sentiment(net, vocab, sequence):
X
xiaotinghe 已提交
1898 1899 1900
    """预测文本序列的情感。

    Defined in :numref:`sec_sentiment_rnn`"""
A
Aston Zhang 已提交
1901 1902
    sequence = np.array(vocab[sequence.split()], ctx=d2l.try_gpu())
    label = np.argmax(net(sequence.reshape(1, -1)), axis=1)
A
Aston Zhang 已提交
1903 1904
    return 'positive' if label == 1 else 'negative'

X
xiaotinghe 已提交
1905 1906 1907
d2l.DATA_HUB['SNLI'] = (
    'https://nlp.stanford.edu/projects/snli/snli_1.0.zip',
    '9fcde07509c7e87ec61c640c1b2753d9041758e4')
A
Aston Zhang 已提交
1908 1909

def read_snli(data_dir, is_train):
X
xiaotinghe 已提交
1910 1911 1912
    """将SNLI数据集解析为前提、假设和标签。

    Defined in :numref:`sec_natural-language-inference-and-dataset`"""
A
Aston Zhang 已提交
1913
    def extract_text(s):
X
xiaotinghe 已提交
1914
        # 删除我们不会使用的信息
A
Aston Zhang 已提交
1915 1916
        s = re.sub('\\(', '', s)
        s = re.sub('\\)', '', s)
X
xiaotinghe 已提交
1917
        # 用一个空格替换两个或多个连续的空格
A
Aston Zhang 已提交
1918 1919 1920
        s = re.sub('\\s{2,}', ' ', s)
        return s.strip()
    label_set = {'entailment': 0, 'contradiction': 1, 'neutral': 2}
X
xiaotinghe 已提交
1921 1922
    file_name = os.path.join(data_dir, 'snli_1.0_train.txt'
                             if is_train else 'snli_1.0_test.txt')
A
Aston Zhang 已提交
1923 1924 1925 1926 1927 1928 1929 1930
    with open(file_name, 'r') as f:
        rows = [row.split('\t') for row in f.readlines()[1:]]
    premises = [extract_text(row[1]) for row in rows if row[0] in label_set]
    hypotheses = [extract_text(row[2]) for row in rows if row[0] in label_set]
    labels = [label_set[row[0]] for row in rows if row[0] in label_set]
    return premises, hypotheses, labels

class SNLIDataset(gluon.data.Dataset):
X
xiaotinghe 已提交
1931 1932 1933
    """用于加载SNLI数据集的自定义数据集。

    Defined in :numref:`sec_natural-language-inference-and-dataset`"""
A
Aston Zhang 已提交
1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948
    def __init__(self, dataset, num_steps, vocab=None):
        self.num_steps = num_steps
        all_premise_tokens = d2l.tokenize(dataset[0])
        all_hypothesis_tokens = d2l.tokenize(dataset[1])
        if vocab is None:
            self.vocab = d2l.Vocab(all_premise_tokens + all_hypothesis_tokens,
                                   min_freq=5, reserved_tokens=['<pad>'])
        else:
            self.vocab = vocab
        self.premises = self._pad(all_premise_tokens)
        self.hypotheses = self._pad(all_hypothesis_tokens)
        self.labels = np.array(dataset[2])
        print('read ' + str(len(self.premises)) + ' examples')

    def _pad(self, lines):
X
xiaotinghe 已提交
1949 1950 1951
        return np.array([d2l.truncate_pad(
            self.vocab[line], self.num_steps, self.vocab['<pad>'])
                         for line in lines])
A
Aston Zhang 已提交
1952 1953 1954 1955 1956 1957 1958 1959

    def __getitem__(self, idx):
        return (self.premises[idx], self.hypotheses[idx]), self.labels[idx]

    def __len__(self):
        return len(self.premises)

def load_data_snli(batch_size, num_steps=50):
X
xiaotinghe 已提交
1960 1961 1962
    """下载SNLI数据集并返回数据迭代器和词表。

    Defined in :numref:`sec_natural-language-inference-and-dataset`"""
A
Aston Zhang 已提交
1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975
    num_workers = d2l.get_dataloader_workers()
    data_dir = d2l.download_extract('SNLI')
    train_data = read_snli(data_dir, True)
    test_data = read_snli(data_dir, False)
    train_set = SNLIDataset(train_data, num_steps)
    test_set = SNLIDataset(test_data, num_steps, train_set.vocab)
    train_iter = gluon.data.DataLoader(train_set, batch_size, shuffle=True,
                                       num_workers=num_workers)
    test_iter = gluon.data.DataLoader(test_set, batch_size, shuffle=False,
                                      num_workers=num_workers)
    return train_iter, test_iter, train_set.vocab

def split_batch_multi_inputs(X, y, devices):
X
xiaotinghe 已提交
1976
    """将多输入'X'和'y'拆分到多个设备。
A
Aston Zhang 已提交
1977

X
xiaotinghe 已提交
1978 1979 1980 1981
    Defined in :numref:`sec_natural-language-inference-attention`"""
    X = list(zip(*[gluon.utils.split_and_load(
        feature, devices, even_split=False) for feature in X]))
    return (X, gluon.utils.split_and_load(y, devices, even_split=False))
A
Aston Zhang 已提交
1982 1983

def predict_snli(net, vocab, premise, hypothesis):
X
xiaotinghe 已提交
1984 1985 1986
    """预测前提和假设之间的逻辑关系。

    Defined in :numref:`sec_natural-language-inference-attention`"""
A
Aston Zhang 已提交
1987 1988
    premise = np.array(vocab[premise], ctx=d2l.try_gpu())
    hypothesis = np.array(vocab[hypothesis], ctx=d2l.try_gpu())
X
xiaotinghe 已提交
1989 1990
    label = np.argmax(net([premise.reshape((1, -1)),
                           hypothesis.reshape((1, -1))]), axis=1)
A
Aston Zhang 已提交
1991
    return 'entailment' if label == 0 else 'contradiction' if label == 1 \
X
xiaotinghe 已提交
1992
            else 'neutral'# Alias defined in config.ini
A
Aston Zhang 已提交
1993 1994
size = lambda a: a.size
transpose = lambda a: a.T
X
xiaotinghe 已提交
1995
nn_Module = nn.Block
A
Aston Zhang 已提交
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010

ones = np.ones
zeros = np.zeros
arange = np.arange
meshgrid = np.meshgrid
sin = np.sin
sinh = np.sinh
cos = np.cos
cosh = np.cosh
tanh = np.tanh
linspace = np.linspace
exp = np.exp
log = np.log
tensor = np.array
normal = np.random.normal
X
xiaotinghe 已提交
2011
randn = np.random.randn
A
Aston Zhang 已提交
2012
rand = np.random.rand
A
Aston Zhang 已提交
2013 2014 2015 2016 2017 2018
matmul = np.dot
int32 = np.int32
float32 = np.float32
concat = np.concatenate
stack = np.stack
abs = np.abs
A
Aston Zhang 已提交
2019
eye = np.eye
A
Aston Zhang 已提交
2020 2021 2022 2023 2024 2025
numpy = lambda x, *args, **kwargs: x.asnumpy(*args, **kwargs)
reshape = lambda x, *args, **kwargs: x.reshape(*args, **kwargs)
to = lambda x, *args, **kwargs: x.as_in_context(*args, **kwargs)
reduce_sum = lambda x, *args, **kwargs: x.sum(*args, **kwargs)
argmax = lambda x, *args, **kwargs: x.argmax(*args, **kwargs)
astype = lambda x, *args, **kwargs: x.astype(*args, **kwargs)
X
xiaotinghe 已提交
2026
reduce_mean = lambda x, *args, **kwargs: x.mean(*args, **kwargs)
M
Mu Li 已提交
2027