Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
d2l-zh
提交
8ad920e0
D
d2l-zh
项目概览
OpenDocCN
/
d2l-zh
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
d2l-zh
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8ad920e0
编写于
12月 31, 2018
作者:
A
Aston Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gb 0.8.9, fix acc, loss computations
上级
7a7411e9
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
142 addition
and
141 deletion
+142
-141
build/env.yml
build/env.yml
+1
-1
chapter_appendix/gluonbook.md
chapter_appendix/gluonbook.md
+0
-1
chapter_computer-vision/image-augmentation.md
chapter_computer-vision/image-augmentation.md
+11
-14
chapter_computer-vision/kaggle-gluon-cifar10.md
chapter_computer-vision/kaggle-gluon-cifar10.md
+8
-8
chapter_computer-vision/kaggle-gluon-dog.md
chapter_computer-vision/kaggle-gluon-dog.md
+14
-12
chapter_computer-vision/ssd.md
chapter_computer-vision/ssd.md
+10
-7
chapter_convolutional-neural-networks/lenet.md
chapter_convolutional-neural-networks/lenet.md
+14
-11
chapter_deep-learning-basics/softmax-regression-scratch.md
chapter_deep-learning-basics/softmax-regression-scratch.md
+14
-13
chapter_natural-language-processing/machine-translation.md
chapter_natural-language-processing/machine-translation.md
+1
-1
chapter_natural-language-processing/word2vec-gluon.md
chapter_natural-language-processing/word2vec-gluon.md
+5
-5
chapter_recurrent-neural-networks/rnn-gluon.md
chapter_recurrent-neural-networks/rnn-gluon.md
+7
-6
chapter_recurrent-neural-networks/rnn-scratch.md
chapter_recurrent-neural-networks/rnn-scratch.md
+6
-5
environment.yml
environment.yml
+1
-1
gluonbook/__init__.py
gluonbook/__init__.py
+1
-1
gluonbook/utils.py
gluonbook/utils.py
+49
-55
未找到文件。
build/env.yml
浏览文件 @
8ad920e0
...
...
@@ -11,6 +11,6 @@ dependencies:
-
recommonmark==0.4.0
-
https://github.com/mli/notedown/tarball/master
-
https://s3-us-west-2.amazonaws.com/szha-experiments/mxnet_cu92-1.4.0b20181219-py2.py3-none-manylinux1_x86_64.whl
-
gluonbook==0.8.
8
-
gluonbook==0.8.
9
-
jieba==0.39
-
awscli
chapter_appendix/gluonbook.md
浏览文件 @
8ad920e0
...
...
@@ -3,7 +3,6 @@
函数、类等名称:定义所在章节
*
`accuracy`
:
[
Softmax回归的从零开始实现
](
../chapter_deep-learning-basics/softmax-regression-scratch.md
)
*
`bbox_to_rect`
:
[
物体检测和边界框
](
../chapter_computer-vision/bounding-box.md
)
*
`Benchmark`
:
[
异步计算
](
../chapter_computational-performance/async-computation.md
)
*
`corr2d`
:
[
二维卷积层
](
../chapter_convolutional-neural-networks/conv-layer.md
)
...
...
chapter_computer-vision/image-augmentation.md
浏览文件 @
8ad920e0
...
...
@@ -11,7 +11,7 @@ import mxnet as mx
from mxnet import autograd, gluon, image, init, nd
from mxnet.gluon import data as gdata, loss as gloss, utils as gutils
import sys
from time
import time
import time
```
## 常用的图像增广方法
...
...
@@ -162,8 +162,7 @@ def _get_batch(batch, ctx):
labels = labels.astype(features.dtype)
# 当 ctx 包含多个 GPU 时,划分小批量数据样本并复制到各个 GPU 上。
return (gutils.split_and_load(features, ctx),
gutils.split_and_load(labels, ctx),
features.shape[0])
gutils.split_and_load(labels, ctx), features.shape[0])
```
然后,我们定义
`evaluate_accuracy`
函数评价模型的分类准确率。与
[
“Softmax回归的从零开始实现”
](
../chapter_deep-learning-basics/softmax-regression-scratch.md
)
和
[
“卷积神经网络(LeNet)”
](
../chapter_convolutional-neural-networks/lenet.md
)
两节中描述的
`evaluate_accuracy`
函数不同,这里定义的函数更加通用:它通过辅助函数
`_get_batch`
使用
`ctx`
变量所包含的所有GPU来评价模型。
...
...
@@ -173,16 +172,15 @@ def _get_batch(batch, ctx):
def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
if isinstance(ctx, mx.Context):
ctx = [ctx]
acc = nd.array([0])
n = 0
acc_sum, n = nd.array([0]), 0
for batch in data_iter:
features, labels, _ = _get_batch(batch, ctx)
for X, y in zip(features, labels):
y = y.astype('float32')
acc += (net(X).argmax(axis=1) == y).sum().copyto(mx.cpu())
acc
_sum
+= (net(X).argmax(axis=1) == y).sum().copyto(mx.cpu())
n += y.size
acc.wait_to_read()
return acc.asscalar() / n
acc
_sum
.wait_to_read()
return acc
_sum
.asscalar() / n
```
接下来,我们定义
`train`
函数使用多GPU训练并评价模型。
...
...
@@ -194,8 +192,7 @@ def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
if isinstance(ctx, mx.Context):
ctx = [ctx]
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, m = 0.0, 0.0, 0.0, 0.0
start = time()
train_l_sum, train_acc_sum, n, m, start = 0.0, 0.0, 0, 0, time.time()
for i, batch in enumerate(train_iter):
Xs, ys, batch_size = _get_batch(batch, ctx)
ls = []
...
...
@@ -204,17 +201,17 @@ def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
ls = [loss(y_hat, y) for y_hat, y in zip(y_hats, ys)]
for l in ls:
l.backward()
trainer.step(batch_size)
train_l_sum += sum([l.sum().asscalar() for l in ls])
n += sum([l.size for l in ls])
train_acc_sum += sum([(y_hat.argmax(axis=1) == y).sum().asscalar()
for y_hat, y in zip(y_hats, ys)])
train_l_sum += sum([l.sum().asscalar() for l in ls])
trainer.step(batch_size)
n += batch_size
m += sum([y.size for y in ys])
test_acc = evaluate_accuracy(test_iter, net, ctx)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
% (epoch + 1, train_l_sum / n, train_acc_sum / m, test_acc,
time() - start))
time
.time
() - start))
```
现在,我们可以定义
`train_with_data_aug`
函数使用图像增广来训练模型了。该函数获取了所有可用的GPU,并将Adam作为训练使用的优化算法,然后将图像增广应用于训练数据集之上,最后调用刚才定义的
`train`
函数训练并评价模型。
...
...
chapter_computer-vision/kaggle-gluon-cifar10.md
浏览文件 @
8ad920e0
...
...
@@ -268,28 +268,28 @@ def train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period,
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
for epoch in range(num_epochs):
train_l
, train_acc, start = 0.0, 0.
0, time.time()
train_l
_sum, train_acc_sum, n, start = 0.0, 0.0,
0, time.time()
if epoch > 0 and epoch % lr_period == 0:
trainer.set_learning_rate(trainer.learning_rate * lr_decay)
for X, y in train_iter:
y = y.astype('float32').as_in_context(ctx)
with autograd.record():
y_hat = net(X.as_in_context(ctx))
l = loss(y_hat, y)
l = loss(y_hat, y)
.sum()
l.backward()
trainer.step(batch_size)
train_l += l.mean().asscalar()
train_acc += gb.accuracy(y_hat, y)
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
time_s = "time %.2f sec" % (time.time() - start)
if valid_iter is not None:
valid_acc = gb.evaluate_accuracy(valid_iter, net, ctx)
epoch_s = ("epoch %d, loss %f, train acc %f, valid acc %f, "
% (epoch + 1, train_l
/ len(train_iter)
,
train_acc / len(train_iter),
valid_acc))
% (epoch + 1, train_l
_sum / n, train_acc_sum / n
,
valid_acc))
else:
epoch_s = ("epoch %d, loss %f, train acc %f, " %
(epoch + 1, train_l / len(train_iter),
train_acc / len(train_iter)))
(epoch + 1, train_l_sum / n, train_acc_sum / n))
print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
```
...
...
chapter_computer-vision/kaggle-gluon-dog.md
浏览文件 @
8ad920e0
...
...
@@ -203,14 +203,15 @@ def get_net(ctx):
```
{.python .input}
loss = gloss.SoftmaxCrossEntropyLoss()
def
get_loss(data
, net, ctx):
l
= 0.
0
for X, y in data:
def
evaluate_loss(data_iter
, net, ctx):
l
_sum, n = 0.0,
0
for X, y in data
_iter
:
y = y.as_in_context(ctx)
output_features = net.features(X.as_in_context(ctx))
outputs = net.output_new(output_features)
l += loss(outputs, y).mean().asscalar()
return l / len(data)
l_sum += loss(outputs, y).sum().asscalar()
n += y.size
return l_sum / n
```
## 定义训练函数
...
...
@@ -224,26 +225,27 @@ def train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period,
trainer = gluon.Trainer(net.output_new.collect_params(), 'sgd',
{'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
for epoch in range(num_epochs):
train_l
, start = 0.
0, time.time()
train_l
_sum, n, start = 0.0,
0, time.time()
if epoch > 0 and epoch % lr_period == 0:
trainer.set_learning_rate(trainer.learning_rate * lr_decay)
for X, y in train_iter:
y = y.as
type('float32').as
_in_context(ctx)
y = y.as_in_context(ctx)
output_features = net.features(X.as_in_context(ctx))
with autograd.record():
outputs = net.output_new(output_features)
l = loss(outputs, y)
l = loss(outputs, y)
.sum()
l.backward()
trainer.step(batch_size)
train_l += l.mean().asscalar()
train_l_sum += l.asscalar()
n += y.size
time_s = "time %.2f sec" % (time.time() - start)
if valid_iter is not None:
valid_loss =
get
_loss(valid_iter, net, ctx)
valid_loss =
evaluate
_loss(valid_iter, net, ctx)
epoch_s = ("epoch %d, train loss %f, valid loss %f, "
% (epoch + 1, train_l
/ len(train_iter)
, valid_loss))
% (epoch + 1, train_l
_sum / n
, valid_loss))
else:
epoch_s = ("epoch %d, train loss %f, "
% (epoch + 1, train_l
/ len(train_iter)
))
% (epoch + 1, train_l
_sum / n
))
print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
```
...
...
chapter_computer-vision/ssd.md
浏览文件 @
8ad920e0
...
...
@@ -224,10 +224,10 @@ def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
```
{.python .input n=18}
def cls_eval(cls_preds, cls_labels):
# 由于类别预测结果放在最后一维,argmax 需要指定最后一维。
return (cls_preds.argmax(axis=-1) == cls_labels).
mean
().asscalar()
return (cls_preds.argmax(axis=-1) == cls_labels).
sum
().asscalar()
def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
return ((bbox_labels - bbox_preds) * bbox_masks).abs().
mean
().asscalar()
return ((bbox_labels - bbox_preds) * bbox_masks).abs().
sum
().asscalar()
```
### 训练模型
...
...
@@ -236,10 +236,10 @@ def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
```
{.python .input n=19}
for epoch in range(20):
acc
, mae =
0, 0
acc
_sum, mae_sum, n, m = 0.0, 0.0,
0, 0
train_iter.reset() # 从头读取数据。
start = time.time()
for
i, batch in enumerate(train_iter)
:
for
batch in train_iter
:
X = batch.data[0].as_in_context(ctx)
Y = batch.label[0].as_in_context(ctx)
with autograd.record():
...
...
@@ -253,11 +253,14 @@ for epoch in range(20):
bbox_masks)
l.backward()
trainer.step(batch_size)
acc += cls_eval(cls_preds, cls_labels)
mae += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
acc_sum += cls_eval(cls_preds, cls_labels)
n += cls_labels.size
mae_sum += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
m += bbox_labels.size
if (epoch + 1) % 5 == 0:
print('epoch %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (
epoch + 1, 1 - acc
/ (i + 1), mae / (i + 1)
, time.time() - start))
epoch + 1, 1 - acc
_sum / n, mae_sum / m
, time.time() - start))
```
## 预测
...
...
chapter_convolutional-neural-networks/lenet.md
浏览文件 @
8ad920e0
...
...
@@ -81,12 +81,13 @@ ctx
# 本函数已保存在 gluonbook 包中方便以后使用。该函数将被逐步改进:它的完整实现将在“图像增
# 广”一节中描述。
def evaluate_accuracy(data_iter, net, ctx):
acc
= nd.array([0], ctx=ctx)
acc
_sum, n = nd.array([0], ctx=ctx), 0
for X, y in data_iter:
# 如果 ctx 代表 GPU 及相应的显存,将数据复制到显存上。
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
acc += gb.accuracy(net(X), y)
return acc.asscalar() / len(data_iter)
X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum()
n += y.size
return acc_sum.asscalar() / n
```
我们同样对
[
“Softmax回归的从零开始实现”
](
../chapter_deep-learning-basics/softmax-regression-scratch.md
)
一节中定义的
`train_ch3`
函数略作修改,确保计算使用的数据和模型同在内存或显存上。
...
...
@@ -98,21 +99,23 @@ def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
print('training on', ctx)
loss = gloss.SoftmaxCrossEntropyLoss()
for epoch in range(num_epochs):
train_l_sum, train_acc_sum,
start =
0, 0, time.time()
train_l_sum, train_acc_sum,
n, start = 0.0, 0.
0, 0, time.time()
for X, y in train_iter:
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y)
l = loss(y_hat, y)
.sum()
l.backward()
trainer.step(batch_size)
train_l_sum += l.mean().asscalar()
train_acc_sum += gb.accuracy(y_hat, y)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net, ctx)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
% (epoch + 1, train_l_sum / len(train_iter),
train_acc_sum / len(train_iter)
,
test_acc,
time.time() - start))
'time %.1f sec'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc
,
time.time() - start))
```
我们重新将模型参数初始化到设备变量
`ctx`
之上,并使用Xavier随机初始化。损失函数和训练算法则依然使用交叉熵损失函数和小批量随机梯度下降。
...
...
chapter_deep-learning-basics/softmax-regression-scratch.md
浏览文件 @
8ad920e0
...
...
@@ -77,7 +77,7 @@ def net(X):
```
{.python .input n=9}
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2])
y = nd.array([0, 2]
, dtype='int32'
)
nd.pick(y_hat, y)
```
...
...
@@ -92,10 +92,9 @@ def cross_entropy(y_hat, y):
给定一个类别的预测概率分布
`y_hat`
,我们把预测概率最大的类别作为输出类别。如果它与真实类别
`y`
一致,说明这次预测是正确的。分类准确率即正确预测数量与总预测数量之比。
下面定义准确率
`accuracy`
函数。其中
`y_hat.argmax(axis=1)`
返回矩阵
`y_hat`
每行中最大元素的索引,且返回结果与变量
`y`
形状相同。我们在
[
“数据操作”
](
../chapter_prerequisite/ndarray.md
)
一节介绍过,相等条件判断式
`(y_hat.argmax(axis=1) == y)`
是一个值为0(相等为假)或1(相等为真)的NDArray。由于标签类型为整数,我们先将变量
`y`
变换为浮点数再进行相等条件判断。
为了演示准确率的计算,
下面定义准确率
`accuracy`
函数。其中
`y_hat.argmax(axis=1)`
返回矩阵
`y_hat`
每行中最大元素的索引,且返回结果与变量
`y`
形状相同。我们在
[
“数据操作”
](
../chapter_prerequisite/ndarray.md
)
一节介绍过,相等条件判断式
`(y_hat.argmax(axis=1) == y)`
是一个值为0(相等为假)或1(相等为真)的NDArray。由于标签类型为整数,我们先将变量
`y`
变换为浮点数再进行相等条件判断。
```
{.python .input n=11}
# 本函数已保存在 gluonbook 包中方便以后使用。
def accuracy(y_hat, y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
```
...
...
@@ -112,10 +111,12 @@ accuracy(y_hat, y)
# 本函数已保存在 gluonbook 包中方便以后使用。该函数将被逐步改进:它的完整实现将在“图像增
# 广”一节中描述。
def evaluate_accuracy(data_iter, net):
acc
=
0
acc
_sum, n = 0.0,
0
for X, y in data_iter:
acc += accuracy(net(X), y)
return acc / len(data_iter)
y = y.astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar()
n += y.size
return acc_sum / n
```
因为我们随机初始化了模型
`net`
,所以这个随机模型的准确率应该接近于类别个数10的倒数0.1。
...
...
@@ -135,23 +136,23 @@ num_epochs, lr = 5, 0.1
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, trainer=None):
for epoch in range(num_epochs):
train_l_sum = 0
train_acc_sum = 0
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y)
l = loss(y_hat, y)
.sum()
l.backward()
if trainer is None:
gb.sgd(params, lr, batch_size)
else:
trainer.step(batch_size) # 下一节将用到。
train_l_sum += l.mean().asscalar()
train_acc_sum += accuracy(y_hat, y)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_l_sum / len(train_iter),
train_acc_sum / len(train_iter), test_acc))
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs,
batch_size, [W, b], lr)
...
...
chapter_natural-language-processing/machine-translation.md
浏览文件 @
8ad920e0
...
...
@@ -213,7 +213,7 @@ def train(encoder, decoder, dataset, lr, batch_size, num_epochs):
loss = gloss.SoftmaxCrossEntropyLoss()
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
for epoch in range(num_epochs):
l_sum = 0
l_sum = 0
.0
for X, Y in data_iter:
with autograd.record():
l = batch_loss(encoder, decoder, X, Y, loss)
...
...
chapter_natural-language-processing/word2vec-gluon.md
浏览文件 @
8ad920e0
...
...
@@ -291,7 +291,7 @@ def train(net, lr, num_epochs):
trainer = gluon.Trainer(net.collect_params(), 'adam',
{'learning_rate': lr})
for epoch in range(num_epochs):
start
_time, train_l_sum = time.time()
, 0
start
, l_sum, n = time.time(), 0.0
, 0
for batch in data_iter:
center, context_negative, mask, label = [
data.as_in_context(ctx) for data in batch]
...
...
@@ -302,10 +302,10 @@ def train(net, lr, num_epochs):
mask.shape[1] / mask.sum(axis=1))
l.backward()
trainer.step(batch_size)
train_l_sum += l.mean
().asscalar()
print('epoch %d, train loss %.2f, time %.2fs'
% (epoch + 1, train_l_sum / len(data_iter),
time.time() - start_time
))
l_sum += l.sum
().asscalar()
n += l.size
print('epoch %d, loss %.2f, time %.2fs'
% (epoch + 1, l_sum / n, time.time() - start
))
```
现在我们可以训练使用负采样的跳字模型了。
...
...
chapter_recurrent-neural-networks/rnn-gluon.md
浏览文件 @
8ad920e0
...
...
@@ -108,11 +108,11 @@ def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
{'learning_rate': lr, 'momentum': 0, 'wd': 0})
for epoch in range(num_epochs):
l_sum,
start = 0.
0, time.time()
l_sum,
n, start = 0.0,
0, time.time()
data_iter = gb.data_iter_consecutive(
corpus_indices, batch_size, num_steps, ctx)
state = model.begin_state(batch_size=batch_size, ctx=ctx)
for
t, (X, Y) in enumerate(data_iter)
:
for
X, Y in data_iter
:
for s in state:
s.detach()
with autograd.record():
...
...
@@ -124,15 +124,16 @@ def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
params = [p.data() for p in model.collect_params().values()]
gb.grad_clipping(params, clipping_theta, ctx)
trainer.step(1) # 因为已经误差取过均值,梯度不用再做平均。
l_sum += l.asscalar()
l_sum += l.asscalar() * y.size
n += y.size
if (epoch + 1) % pred_period == 0:
print('epoch %d, perplexity %f, time %.2f sec' % (
epoch + 1, math.exp(l_sum /
(t + 1)
), time.time() - start))
epoch + 1, math.exp(l_sum /
n
), time.time() - start))
for prefix in prefixes:
print(' -', predict_rnn_gluon(
prefix, pred_len, model, vocab_size,
c
tx, idx_to_char, c
har_to_idx))
prefix, pred_len, model, vocab_size,
ctx, idx_to_char,
char_to_idx))
```
使用和上一节实验中一样的超参数来训练模型。
...
...
chapter_recurrent-neural-networks/rnn-scratch.md
浏览文件 @
8ad920e0
...
...
@@ -134,7 +134,7 @@ $$ \min\left(\frac{\theta}{\|\boldsymbol{g}\|}, 1\right)\boldsymbol{g}$$
```
{.python .input n=10}
# 本函数已保存在 gluonbook 包中方便以后使用。
def grad_clipping(params, theta, ctx):
norm = nd.array([0
.0
], ctx)
norm = nd.array([0], ctx)
for param in params:
norm += (param.grad ** 2).sum()
norm = norm.sqrt().asscalar()
...
...
@@ -180,9 +180,9 @@ def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
for epoch in range(num_epochs):
if not is_random_iter: # 如使用相邻采样,在 epoch 开始时初始化隐藏状态。
state = init_rnn_state(batch_size, num_hiddens, ctx)
l_sum,
start = 0.
0, time.time()
l_sum,
n, start = 0.0,
0, time.time()
data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
for
t, (X, Y) in enumerate(data_iter)
:
for
X, Y in data_iter
:
if is_random_iter: # 如使用随机采样,在每个小批量更新前初始化隐藏状态。
state = init_rnn_state(batch_size, num_hiddens, ctx)
else: # 否则需要使用 detach 函数从计算图分离隐藏状态。
...
...
@@ -202,11 +202,12 @@ def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
l.backward()
grad_clipping(params, clipping_theta, ctx) # 裁剪梯度。
gb.sgd(params, lr, 1) # 因为误差已经取过均值,梯度不用再做平均。
l_sum += l.asscalar()
l_sum += l.asscalar() * y.size
n += y.size
if (epoch + 1) % pred_period == 0:
print('epoch %d, perplexity %f, time %.2f sec' % (
epoch + 1, math.exp(l_sum /
(t + 1)
), time.time() - start))
epoch + 1, math.exp(l_sum /
n
), time.time() - start))
for prefix in prefixes:
print(' -', predict_rnn(
prefix, pred_len, rnn, params, init_rnn_state,
...
...
environment.yml
浏览文件 @
8ad920e0
...
...
@@ -6,4 +6,4 @@ dependencies:
-
pandas=0.23.2
-
pip
:
-
mxnet==1.5.0b20181215
-
gluonbook==0.8.
8
-
gluonbook==0.8.
9
gluonbook/__init__.py
浏览文件 @
8ad920e0
from
.utils
import
*
__version__
=
'0.8.
8
'
__version__
=
'0.8.
9
'
gluonbook/utils.py
浏览文件 @
8ad920e0
...
...
@@ -30,11 +30,6 @@ VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
[
0
,
64
,
128
]]
def
accuracy
(
y_hat
,
y
):
"""Get accuracy."""
return
(
y_hat
.
argmax
(
axis
=
1
)
==
y
.
astype
(
'float32'
)).
mean
().
asscalar
()
def
bbox_to_rect
(
bbox
,
color
):
"""Convert bounding box to matplotlib format."""
return
plt
.
Rectangle
(
xy
=
(
bbox
[
0
],
bbox
[
1
]),
width
=
bbox
[
2
]
-
bbox
[
0
],
...
...
@@ -156,16 +151,15 @@ def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
"""Evaluate accuracy of a model on the given data set."""
if
isinstance
(
ctx
,
mx
.
Context
):
ctx
=
[
ctx
]
acc
=
nd
.
array
([
0
])
n
=
0
acc_sum
,
n
=
nd
.
array
([
0
]),
0
for
batch
in
data_iter
:
features
,
labels
,
_
=
_get_batch
(
batch
,
ctx
)
for
X
,
y
in
zip
(
features
,
labels
):
y
=
y
.
astype
(
'float32'
)
acc
+=
(
net
(
X
).
argmax
(
axis
=
1
)
==
y
).
sum
().
copyto
(
mx
.
cpu
())
acc
_sum
+=
(
net
(
X
).
argmax
(
axis
=
1
)
==
y
).
sum
().
copyto
(
mx
.
cpu
())
n
+=
y
.
size
acc
.
wait_to_read
()
return
acc
.
asscalar
()
/
n
acc
_sum
.
wait_to_read
()
return
acc
_sum
.
asscalar
()
/
n
def
_get_batch
(
batch
,
ctx
):
...
...
@@ -174,8 +168,7 @@ def _get_batch(batch, ctx):
if
labels
.
dtype
!=
features
.
dtype
:
labels
=
labels
.
astype
(
features
.
dtype
)
return
(
gutils
.
split_and_load
(
features
,
ctx
),
gutils
.
split_and_load
(
labels
,
ctx
),
features
.
shape
[
0
])
gutils
.
split_and_load
(
labels
,
ctx
),
features
.
shape
[
0
])
def
get_data_ch7
():
...
...
@@ -209,7 +202,7 @@ def get_vocab_imdb(data):
def
grad_clipping
(
params
,
theta
,
ctx
):
"""Clip the gradient."""
if
theta
is
not
None
:
norm
=
nd
.
array
([
0
.0
],
ctx
)
norm
=
nd
.
array
([
0
],
ctx
)
for
param
in
params
:
norm
+=
(
param
.
grad
**
2
).
sum
()
norm
=
norm
.
sqrt
().
asscalar
()
...
...
@@ -538,9 +531,8 @@ def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
print
(
'training on'
,
ctx
)
if
isinstance
(
ctx
,
mx
.
Context
):
ctx
=
[
ctx
]
for
epoch
in
range
(
1
,
num_epochs
+
1
):
train_l_sum
,
train_acc_sum
,
n
,
m
=
0.0
,
0.0
,
0.0
,
0.0
start
=
time
.
time
()
for
epoch
in
range
(
num_epochs
):
train_l_sum
,
train_acc_sum
,
n
,
m
,
start
=
0.0
,
0.0
,
0
,
0
,
time
.
time
()
for
i
,
batch
in
enumerate
(
train_iter
):
Xs
,
ys
,
batch_size
=
_get_batch
(
batch
,
ctx
)
ls
=
[]
...
...
@@ -549,16 +541,16 @@ def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
ls
=
[
loss
(
y_hat
,
y
)
for
y_hat
,
y
in
zip
(
y_hats
,
ys
)]
for
l
in
ls
:
l
.
backward
()
trainer
.
step
(
batch_size
)
train_l_sum
+=
sum
([
l
.
sum
().
asscalar
()
for
l
in
ls
])
n
+=
sum
([
l
.
size
for
l
in
ls
])
train_acc_sum
+=
sum
([(
y_hat
.
argmax
(
axis
=
1
)
==
y
).
sum
().
asscalar
()
for
y_hat
,
y
in
zip
(
y_hats
,
ys
)])
train_l_sum
+=
sum
([
l
.
sum
().
asscalar
()
for
l
in
ls
])
trainer
.
step
(
batch_size
)
n
+=
batch_size
m
+=
sum
([
y
.
size
for
y
in
ys
])
test_acc
=
evaluate_accuracy
(
test_iter
,
net
,
ctx
)
print
(
'epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
%
(
epoch
,
train_l_sum
/
n
,
train_acc_sum
/
m
,
test_acc
,
%
(
epoch
+
1
,
train_l_sum
/
n
,
train_acc_sum
/
m
,
test_acc
,
time
.
time
()
-
start
))
...
...
@@ -581,18 +573,18 @@ def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
pred_len
,
prefixes
):
"""Train an RNN model and predict the next item in the sequence."""
if
is_random_iter
:
data_iter_fn
=
data_iter_random
data_iter_fn
=
gb
.
data_iter_random
else
:
data_iter_fn
=
data_iter_consecutive
data_iter_fn
=
gb
.
data_iter_consecutive
params
=
get_params
()
loss
=
gloss
.
SoftmaxCrossEntropyLoss
()
for
epoch
in
range
(
num_epochs
):
if
not
is_random_iter
:
state
=
init_rnn_state
(
batch_size
,
num_hiddens
,
ctx
)
l_sum
,
start
=
0.
0
,
time
.
time
()
l_sum
,
n
,
start
=
0.0
,
0
,
time
.
time
()
data_iter
=
data_iter_fn
(
corpus_indices
,
batch_size
,
num_steps
,
ctx
)
for
t
,
(
X
,
Y
)
in
enumerate
(
data_iter
)
:
for
X
,
Y
in
data_iter
:
if
is_random_iter
:
state
=
init_rnn_state
(
batch_size
,
num_hiddens
,
ctx
)
else
:
...
...
@@ -606,12 +598,13 @@ def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
l
=
loss
(
outputs
,
y
).
mean
()
l
.
backward
()
grad_clipping
(
params
,
clipping_theta
,
ctx
)
sgd
(
params
,
lr
,
1
)
l_sum
+=
l
.
asscalar
()
gb
.
sgd
(
params
,
lr
,
1
)
l_sum
+=
l
.
asscalar
()
*
y
.
size
n
+=
y
.
size
if
(
epoch
+
1
)
%
pred_period
==
0
:
print
(
'epoch %d, perplexity %f, time %.2f sec'
%
(
epoch
+
1
,
math
.
exp
(
l_sum
/
(
t
+
1
)
),
time
.
time
()
-
start
))
epoch
+
1
,
math
.
exp
(
l_sum
/
n
),
time
.
time
()
-
start
))
for
prefix
in
prefixes
:
print
(
' -'
,
predict_rnn
(
prefix
,
pred_len
,
rnn
,
params
,
init_rnn_state
,
...
...
@@ -629,11 +622,11 @@ def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
{
'learning_rate'
:
lr
,
'momentum'
:
0
,
'wd'
:
0
})
for
epoch
in
range
(
num_epochs
):
l_sum
,
start
=
0.
0
,
time
.
time
()
data_iter
=
data_iter_consecutive
(
l_sum
,
n
,
start
=
0.0
,
0
,
time
.
time
()
data_iter
=
gb
.
data_iter_consecutive
(
corpus_indices
,
batch_size
,
num_steps
,
ctx
)
state
=
model
.
begin_state
(
batch_size
=
batch_size
,
ctx
=
ctx
)
for
t
,
(
X
,
Y
)
in
enumerate
(
data_iter
)
:
for
X
,
Y
in
data_iter
:
for
s
in
state
:
s
.
detach
()
with
autograd
.
record
():
...
...
@@ -642,65 +635,66 @@ def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
l
=
loss
(
output
,
y
).
mean
()
l
.
backward
()
params
=
[
p
.
data
()
for
p
in
model
.
collect_params
().
values
()]
grad_clipping
(
params
,
clipping_theta
,
ctx
)
g
b
.
g
rad_clipping
(
params
,
clipping_theta
,
ctx
)
trainer
.
step
(
1
)
l_sum
+=
l
.
asscalar
()
l_sum
+=
l
.
asscalar
()
*
y
.
size
n
+=
y
.
size
if
(
epoch
+
1
)
%
pred_period
==
0
:
print
(
'epoch %d, perplexity %f, time %.2f sec'
%
(
epoch
+
1
,
math
.
exp
(
l_sum
/
(
t
+
1
)
),
time
.
time
()
-
start
))
epoch
+
1
,
math
.
exp
(
l_sum
/
n
),
time
.
time
()
-
start
))
for
prefix
in
prefixes
:
print
(
' -'
,
predict_rnn_gluon
(
prefix
,
pred_len
,
model
,
vocab_size
,
c
tx
,
idx_to_char
,
c
har_to_idx
))
prefix
,
pred_len
,
model
,
vocab_size
,
ctx
,
idx_to_char
,
char_to_idx
))
def
train_ch3
(
net
,
train_iter
,
test_iter
,
loss
,
num_epochs
,
batch_size
,
params
=
None
,
lr
=
None
,
trainer
=
None
):
"""Train and evaluate a model on CPU."""
for
epoch
in
range
(
1
,
num_epochs
+
1
):
train_l_sum
=
0
train_acc_sum
=
0
"""Train and evaluate a model with CPU."""
for
epoch
in
range
(
num_epochs
):
train_l_sum
,
train_acc_sum
,
n
=
0.0
,
0.0
,
0
for
X
,
y
in
train_iter
:
with
autograd
.
record
():
y_hat
=
net
(
X
)
l
=
loss
(
y_hat
,
y
)
l
=
loss
(
y_hat
,
y
)
.
sum
()
l
.
backward
()
if
trainer
is
None
:
sgd
(
params
,
lr
,
batch_size
)
gb
.
sgd
(
params
,
lr
,
batch_size
)
else
:
trainer
.
step
(
batch_size
)
train_l_sum
+=
l
.
mean
().
asscalar
()
train_acc_sum
+=
accuracy
(
y_hat
,
y
)
y
=
y
.
astype
(
'float32'
)
train_l_sum
+=
l
.
asscalar
()
train_acc_sum
+=
(
y_hat
.
argmax
(
axis
=
1
)
==
y
).
sum
().
asscalar
()
n
+=
y
.
size
test_acc
=
evaluate_accuracy
(
test_iter
,
net
)
print
(
'epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
%
(
epoch
,
train_l_sum
/
len
(
train_iter
),
train_acc_sum
/
len
(
train_iter
),
test_acc
))
%
(
epoch
+
1
,
train_l_sum
/
n
,
train_acc_sum
/
n
,
test_acc
))
def
train_ch5
(
net
,
train_iter
,
test_iter
,
batch_size
,
trainer
,
ctx
,
num_epochs
):
"""Train and evaluate a model
on
CPU or GPU."""
"""Train and evaluate a model
with
CPU or GPU."""
print
(
'training on'
,
ctx
)
loss
=
gloss
.
SoftmaxCrossEntropyLoss
()
for
epoch
in
range
(
1
,
num_epochs
+
1
):
train_l_sum
=
0
train_acc_sum
=
0
start
=
time
.
time
()
for
epoch
in
range
(
num_epochs
):
train_l_sum
,
train_acc_sum
,
n
,
start
=
0.0
,
0.0
,
0
,
time
.
time
()
for
X
,
y
in
train_iter
:
X
,
y
=
X
.
as_in_context
(
ctx
),
y
.
as_in_context
(
ctx
)
with
autograd
.
record
():
y_hat
=
net
(
X
)
l
=
loss
(
y_hat
,
y
)
l
=
loss
(
y_hat
,
y
)
.
sum
()
l
.
backward
()
trainer
.
step
(
batch_size
)
train_l_sum
+=
l
.
mean
().
asscalar
()
train_acc_sum
+=
accuracy
(
y_hat
,
y
)
y
=
y
.
astype
(
'float32'
)
train_l_sum
+=
l
.
asscalar
()
train_acc_sum
+=
(
y_hat
.
argmax
(
axis
=
1
)
==
y
).
sum
().
asscalar
()
n
+=
y
.
size
test_acc
=
evaluate_accuracy
(
test_iter
,
net
,
ctx
)
print
(
'epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
%
(
epoch
,
train_l_sum
/
len
(
train_iter
)
,
t
rain_acc_sum
/
len
(
train_iter
),
test_acc
,
t
ime
.
time
()
-
start
))
%
(
epoch
+
1
,
train_l_sum
/
n
,
train_acc_sum
/
n
,
test_acc
,
time
.
time
()
-
start
))
def
train_ch7
(
trainer_fn
,
states
,
hyperparams
,
features
,
labels
,
batch_size
=
10
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录