提交 cab512c5 编写于 作者: A Aston Zhang

revise opt and utils

上级 7617b133
......@@ -8,9 +8,8 @@
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet import gluon, nd
from mxnet.gluon import nn
import numpy as np
import sys
sys.path.append('..')
import utils
......@@ -24,9 +23,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 线性回归模型。
net = nn.Sequential()
......@@ -39,7 +38,7 @@ net.add(nn.Dense(1))
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'adadelta', {'rho': 0.9999})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=None,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
## 小结
......
......@@ -66,9 +66,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 初始化模型参数。
def init_params():
......@@ -89,24 +89,23 @@ def init_params():
```{.python .input n=2}
net = utils.linreg
squared_loss = utils.squared_loss
loss = utils.squared_loss
def optimize(batch_size, rho, num_epochs, log_interval):
[w, b], sqrs, deltas = init_params()
y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
ls = [loss(net(features, w, b), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
for batch_i, (features, label) in enumerate(
utils.data_iter(batch_size, num_examples, X, y)):
for batch_i, (X, y) in enumerate(
utils.data_iter(batch_size, num_examples, features, labels)):
with autograd.record():
output = net(features, w, b)
loss = squared_loss(output, label)
loss.backward()
l = loss(net(X, w, b), y)
l.backward()
adadelta([w, b], sqrs, deltas, rho, batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
ls.append(loss(net(features, w, b), labels).mean().asnumpy())
print('w:', w, '\nb:', b, '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
最终,优化所得的模型参数值与它们的真实值较接近。
......
......@@ -9,9 +9,8 @@
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet import gluon, nd
from mxnet.gluon import nn
import numpy as np
import sys
sys.path.append('..')
import utils
......@@ -25,9 +24,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 线性回归模型。
net = nn.Sequential()
......@@ -41,7 +40,7 @@ net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'adagrad',
{'learning_rate': 0.9})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=None,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
## 小结
......
......@@ -75,9 +75,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 初始化模型参数。
def init_params():
......@@ -96,24 +96,23 @@ def init_params():
```{.python .input n=3}
net = utils.linreg
squared_loss = utils.squared_loss
loss = utils.squared_loss
def optimize(batch_size, lr, num_epochs, log_interval):
[w, b], sqrs = init_params()
y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
ls = [loss(net(features, w, b), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
for batch_i, (features, label) in enumerate(
utils.data_iter(batch_size, num_examples, X, y)):
for batch_i, (X, y) in enumerate(
utils.data_iter(batch_size, num_examples, features, labels)):
with autograd.record():
output = net(features, w, b)
loss = squared_loss(output, label)
loss.backward()
l = loss(net(X, w, b), y)
l.backward()
adagrad([w, b], sqrs, lr, batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
ls.append(loss(net(features, w, b), labels).mean().asnumpy())
print('w:', w, '\nb:', b, '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
最终,优化所得的模型参数值与它们的真实值较接近。
......
......@@ -8,9 +8,8 @@
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet import gluon, nd
from mxnet.gluon import nn
import numpy as np
import sys
sys.path.append('..')
import utils
......@@ -24,9 +23,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 线性回归模型。
net = nn.Sequential()
......@@ -39,7 +38,7 @@ net.add(nn.Dense(1))
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.1})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=None,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
## 小结
......
......@@ -84,9 +84,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 初始化模型参数。
def init_params():
......@@ -107,27 +107,26 @@ def init_params():
```{.python .input n=2}
net = utils.linreg
squared_loss = utils.squared_loss
loss = utils.squared_loss
def optimize(batch_size, lr, num_epochs, log_interval):
[w, b], vs, sqrs = init_params()
y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
ls = [loss(net(features, w, b), labels).mean().asnumpy()]
t = 0
for epoch in range(1, num_epochs + 1):
for batch_i, (features, label) in enumerate(
utils.data_iter(batch_size, num_examples, X, y)):
for batch_i, (X, y) in enumerate(
utils.data_iter(batch_size, num_examples, features, labels)):
with autograd.record():
output = net(features, w, b)
loss = squared_loss(output, label)
loss.backward()
l = loss(net(X, w, b), y)
l.backward()
# 必须在调用Adam前。
t += 1
adam([w, b], vs, sqrs, lr, batch_size, t)
if batch_i * batch_size % log_interval == 0:
y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
ls.append(loss(net(features, w, b), labels).mean().asnumpy())
print('w:', w, '\nb:', b, '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
最终,优化所得的模型参数值与它们的真实值较接近。
......
......@@ -24,9 +24,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 线性回归模型。
net = nn.Sequential()
......@@ -37,28 +37,27 @@ net.add(nn.Dense(1))
```{.python .input n=2}
# 优化目标函数。
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y,
net):
dataset = gdata.ArrayDataset(X, y)
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval,
features, labels, net):
dataset = gdata.ArrayDataset(features, labels)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
square_loss = gloss.L2Loss()
y_vals = [square_loss(net(X), y).mean().asnumpy()]
loss = gloss.L2Loss()
ls = [loss(net(features), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
# 学习率自我衰减。
if decay_epoch and epoch > decay_epoch:
trainer.set_learning_rate(trainer.learning_rate * 0.1)
for batch_i, (features, label) in enumerate(data_iter):
for batch_i, (X, y) in enumerate(data_iter):
with autograd.record():
output = net(features)
loss = square_loss(output, label)
loss.backward()
l = loss(net(X), y)
l.backward()
trainer.step(batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(square_loss(net(X), y).mean().asnumpy())
ls.append(loss(net(features), labels).mean().asnumpy())
# 为了便于打印,改变输出形状并转化成numpy数组。
print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
以下几组实验分别重现了["梯度下降和随机梯度下降——从零开始"](gd-sgd-scratch.md)一节中实验结果。
......@@ -67,35 +66,35 @@ def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y,
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.2})
optimize(batch_size=1, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
```{.python .input n=4}
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.999})
optimize(batch_size=1000, trainer=trainer, num_epochs=3, decay_epoch=None,
log_interval=1000, X=X, y=y, net=net)
log_interval=1000, features=features, labels=labels, net=net)
```
```{.python .input n=5}
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.2})
optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
```{.python .input n=6}
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 5})
optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
```{.python .input n=7}
net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.002})
optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
## 小结
......
......@@ -134,9 +134,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 初始化模型参数。
def init_params():
......@@ -152,16 +152,16 @@ def linreg(X, w, b):
return nd.dot(X, w) + b
# 平方损失函数。
def squared_loss(yhat, y):
return (yhat - y.reshape(yhat.shape)) ** 2 / 2
def squared_loss(y_hat, y):
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
# 遍历数据集。
def data_iter(batch_size, num_examples, X, y):
idx = list(range(num_examples))
random.shuffle(idx)
def data_iter(batch_size, num_examples, features, labels):
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
j = nd.array(idx[i: min(i + batch_size, num_examples)])
yield X.take(j), y.take(j)
j = nd.array(indices[i: min(i + batch_size, num_examples)])
yield features.take(j), labels.take(j)
```
下面我们描述一下优化函数`optimize`
......@@ -172,26 +172,26 @@ def data_iter(batch_size, num_examples, X, y):
```{.python .input n=3}
net = linreg
loss = squared_loss
def optimize(batch_size, lr, num_epochs, log_interval, decay_epoch):
w, b = init_params()
y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
ls = [squared_loss(net(features, w, b), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
# 学习率自我衰减。
if decay_epoch and epoch > decay_epoch:
lr *= 0.1
for batch_i, (features, label) in enumerate(
data_iter(batch_size, num_examples, X, y)):
for batch_i, (X, y) in enumerate(
data_iter(batch_size, num_examples, features, labels)):
with autograd.record():
output = net(features, w, b)
loss = squared_loss(output, label)
loss.backward()
l = loss(net(X, w, b), y)
l.backward()
sgd([w, b], lr, batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
ls.append(loss(net(features, w, b), labels).mean().asnumpy())
print('w:', w, '\nb:', b, '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
当批量大小为1时,优化使用的是随机梯度下降。在当前学习率下,损失函数值在早期快速下降后略有波动。这是由于随机梯度的方差在迭代过程中无法减小。当迭代周期大于2,学习率自我衰减后,损失函数值下降后较平稳。最终,优化所得的模型参数值`w``b`与它们的真实值[2, -3.4]和4.2较接近。
......
......@@ -8,9 +8,8 @@
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet import gluon, nd
from mxnet.gluon import nn
import numpy as np
import sys
sys.path.append('..')
import utils
......@@ -24,9 +23,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 线性回归模型。
net = nn.Sequential()
......@@ -40,7 +39,7 @@ net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': 0.2, 'momentum': 0.99})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
```{.python .input}
......@@ -48,7 +47,7 @@ net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': 0.2, 'momentum': 0.9})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
```{.python .input}
......@@ -56,7 +55,7 @@ net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': 0.2, 'momentum': 0.5})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=2,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
## 小结
......
......@@ -101,9 +101,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 初始化模型参数。
def init_params():
......@@ -122,27 +122,26 @@ def init_params():
```{.python .input n=3}
net = utils.linreg
squared_loss = utils.squared_loss
loss = utils.squared_loss
def optimize(batch_size, lr, mom, num_epochs, log_interval):
[w, b], vs = init_params()
y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
ls = [loss(net(features, w, b), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
# 学习率自我衰减。
if epoch > 2:
lr *= 0.1
for batch_i, (features, label) in enumerate(
utils.data_iter(batch_size, num_examples, X, y)):
for batch_i, (X, y) in enumerate(
utils.data_iter(batch_size, num_examples, features, labels)):
with autograd.record():
output = net(features, w, b)
loss = squared_loss(output, label)
loss.backward()
l = loss(net(X, w, b), y)
l.backward()
sgd_momentum([w, b], vs, lr, mom, batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
ls.append(loss(net(features, w, b), labels).mean().asnumpy())
print('w:', w, '\nb:', b, '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
我们先将动量超参数$\gamma$(`mom`)设0.99。此时,小梯度随机梯度下降可被看作使用了特殊梯度:这个特殊梯度是最近100个时刻的$100\nabla f_\mathcal{B}(\boldsymbol{x})$的加权平均。我们观察到,损失函数值在3个迭代周期后上升。这很可能是由于特殊梯度中较大的系数100造成的。
......
......@@ -9,9 +9,8 @@
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet import gluon, nd
from mxnet.gluon import nn
import numpy as np
import sys
sys.path.append('..')
import utils
......@@ -25,9 +24,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 线性回归模型。
net = nn.Sequential()
......@@ -41,7 +40,7 @@ net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'rmsprop',
{'learning_rate': 0.03, 'gamma1': 0.9})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=None,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
```{.python .input}
......@@ -49,7 +48,7 @@ net.collect_params().initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'rmsprop',
{'learning_rate': 0.03, 'gamma1': 0.999})
utils.optimize(batch_size=10, trainer=trainer, num_epochs=3, decay_epoch=None,
log_interval=10, X=X, y=y, net=net)
log_interval=10, features=features, labels=labels, net=net)
```
## 小结
......
......@@ -64,9 +64,9 @@ num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += nd.random.normal(scale=0.01, shape=y.shape)
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
# 初始化模型参数。
def init_params():
......@@ -85,24 +85,23 @@ def init_params():
```{.python .input n=2}
net = utils.linreg
squared_loss = utils.squared_loss
loss = utils.squared_loss
def optimize(batch_size, lr, gamma, num_epochs, log_interval):
[w, b], sqrs = init_params()
y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
ls = [loss(net(features, w, b), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
for batch_i, (features, label) in enumerate(
utils.data_iter(batch_size, num_examples, X, y)):
for batch_i, (X, y) in enumerate(
utils.data_iter(batch_size, num_examples, features, labels)):
with autograd.record():
output = net(features, w, b)
loss = squared_loss(output, label)
loss.backward()
l = loss(net(X, w, b), y)
l.backward()
rmsprop([w, b], sqrs, lr, gamma, batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
ls.append(loss(net(features, w, b), labels).mean().asnumpy())
print('w:', w, '\nb:', b, '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
utils.semilogy(es, ls, 'epoch', 'loss')
```
我们将初始学习率设为0.03,并将$\gamma$(`gamma`)设为0.9。此时,变量$\boldsymbol{s}$可看作是最近$1/(1-0.9) = 10$个时刻的平方项$\boldsymbol{g} \odot \boldsymbol{g}$的加权平均。我们观察到,损失函数在迭代后期较震荡。
......
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="5 35 380 165" width="380" height="165">
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="7 35 378 161" width="378" height="161">
<defs>
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="StickArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 10 8" markerWidth="10" markerHeight="8" color="black">
<g>
<path d="M 8 0 L 0 0 M 0 -3 L 8 0 L 0 3" fill="none" stroke="currentColor" stroke-width="1"/>
</g>
</marker>
<font-face font-family="Arial" font-size="12" panose-1="2 11 6 4 2 2 2 9 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="-1e3" x-height="518.5547" cap-height="715.8203" ascent="905.2734" descent="-211.91406" font-style="italic" font-weight="400">
<font-face font-family="Arial" font-size="9" panose-1="2 11 6 4 2 2 2 9 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="-1333.3333" x-height="518.5547" cap-height="715.8203" ascent="905.2734" descent="-211.91406" font-style="italic" font-weight="400">
<font-face-src>
<font-face-name name="Arial-ItalicMT"/>
</font-face-src>
</font-face>
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 8 8" markerWidth="8" markerHeight="8" color="#5b7daa">
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 10 8" markerWidth="10" markerHeight="8" color="#5b7daa">
<g>
<path d="M 5.866667 0 L 0 -2.2 L 0 2.2 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
<path d="M 8 0 L 0 -3 L 0 3 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
</g>
</marker>
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="Ball_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-6 -4 7 8" markerWidth="7" markerHeight="8" color="#5b7daa">
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="Ball_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-7 -4 8 8" markerWidth="8" markerHeight="8" color="#5b7daa">
<g>
<circle cx="-2.199999" cy="0" r="2.1999980557011" fill="none" stroke="currentColor" stroke-width="1"/>
<circle cx="-2.9999986" cy="0" r="2.99999734868332" fill="none" stroke="currentColor" stroke-width="1"/>
</g>
</marker>
</defs>
<metadata> Produced by OmniGraffle 7.7.1
<dc:date>2018-04-01 20:05:43 +0000</dc:date>
<dc:date>2018-05-13 01:22:49 +0000</dc:date>
</metadata>
<g id="Canvas_1" fill-opacity="1" stroke-dasharray="none" stroke="none" stroke-opacity="1" fill="none">
<title>Canvas 1</title>
......@@ -37,26 +37,26 @@
<line x1="35" y1="173" x2="173.12344" y2="173" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_17">
<text transform="translate(10 41.692383)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".33691406" y="11">f(x)</tspan>
<text transform="translate(12.5 41.741455)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".002685547" y="8">f(x)</tspan>
</text>
</g>
<g id="Graphic_16">
<text transform="translate(170.5 179.80371)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x="0" y="11">x</tspan>
<text transform="translate(171 179.85278)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".25" y="8">x</tspan>
</text>
</g>
<g id="Line_15">
<path d="M 58.5 60 C 58.5 60 79.024875 156.25132 106.09766 156.07812 C 133.17044 155.90493 154.06055 59.38867 154.06055 59.38867" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<path d="M 58.5 60 C 58.5 60 79.024875 156.25132 106.09766 156.07812 C 133.17044 155.90493 154.06055 59.38867 154.06055 59.38867" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_14">
<line x1="61.74284" y1="72.97964" x2="64.23549" y2="82.310445" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="61.523466" y1="72.15844" x2="64.62263" y2="83.75963" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_13">
<line x1="69.28829" y1="99.12173" x2="70.72686" y2="103.38009" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="69.01625" y1="98.31644" x2="71.20694" y2="104.80119" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_12">
<line x1="76.51754" y1="118.90492" x2="77.91111" y2="122.14559" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="76.18175" y1="118.12405" x2="78.50369" y2="123.52359" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_11">
<line x1="235.5" y1="172.5" x2="235.5" y2="46.28867" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
......@@ -65,29 +65,29 @@
<line x1="235.5" y1="173" x2="373.62344" y2="173" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_9">
<text transform="translate(210.5 41.692383)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".33691406" y="11">f(x)</tspan>
<text transform="translate(213 41.741455)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".002685547" y="8">f(x)</tspan>
</text>
</g>
<g id="Graphic_8">
<text transform="translate(371 179.80371)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x="0" y="11">x</tspan>
<text transform="translate(371.5 179.85278)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".25" y="8">x</tspan>
</text>
</g>
<g id="Line_7">
<path d="M 259 60 C 259 60 279.52488 156.25132 306.59766 156.07812 C 333.67044 155.90493 354.56055 59.38867 354.56055 59.38867" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<path d="M 259 60 C 259 60 279.52488 156.25132 306.59766 156.07812 C 333.67044 155.90493 354.56055 59.38867 354.56055 59.38867" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_6">
<line x1="298.55188" y1="144.89358" x2="317.14566" y2="141.40002" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="297.7165" y1="145.05053" x2="318.61986" y2="141.12303" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_5">
<line x1="320.11955" y1="136.01634" x2="287.00124" y2="125.30652" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="320.9283" y1="136.27788" x2="285.574" y2="124.84498" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_4">
<line x1="285.25747" y1="120.53153" x2="331.5463" y2="109.44607" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="284.43085" y1="120.7295" x2="333.00505" y2="109.09672" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_3">
<line x1="334.1477" y1="104.24621" x2="274.7326" y2="84.50641" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<line x1="334.95433" y1="104.5142" x2="273.30912" y2="84.03348" marker-end="url(#FilledArrow_Marker)" marker-start="url(#Ball_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
</g>
</g>
......
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="6.5 5 306.14414 158.5" width="306.14414" height="158.5">
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="7.5 5 305.14414 153.5" width="305.14414" height="153.5">
<defs>
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="StickArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 8 8" markerWidth="8" markerHeight="8" color="#5b7daa">
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="StickArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 10 8" markerWidth="10" markerHeight="8" color="#5b7daa">
<g>
<path d="M 5.866667 0 L 0 0 M 0 -2.2 L 5.866667 0 L 0 2.2" fill="none" stroke="currentColor" stroke-width="1"/>
<path d="M 8 0 L 0 0 M 0 -3 L 8 0 L 0 3" fill="none" stroke="currentColor" stroke-width="1"/>
</g>
</marker>
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="StickArrow_Marker_2" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 10 8" markerWidth="10" markerHeight="8" color="black">
......@@ -12,95 +12,95 @@
<path d="M 8 0 L 0 0 M 0 -3 L 8 0 L 0 3" fill="none" stroke="currentColor" stroke-width="1"/>
</g>
</marker>
<font-face font-family="Arial" font-size="12" panose-1="2 11 6 4 2 2 2 9 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="-1e3" x-height="518.5547" cap-height="715.8203" ascent="905.2734" descent="-211.91406" font-style="italic" font-weight="400">
<font-face font-family="Arial" font-size="9" panose-1="2 11 6 4 2 2 2 9 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="-1333.3333" x-height="518.5547" cap-height="715.8203" ascent="905.2734" descent="-211.91406" font-style="italic" font-weight="400">
<font-face-src>
<font-face-name name="Arial-ItalicMT"/>
</font-face-src>
</font-face>
<font-face font-family="Arial" font-size="8" panose-1="2 11 6 4 2 2 2 2 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="0" x-height="518.5547" cap-height="716.3086" ascent="905.2734" descent="-211.91406" font-weight="400">
<font-face font-family="Arial" font-size="7" panose-1="2 11 6 4 2 2 2 2 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="0" x-height="518.5547" cap-height="716.3086" ascent="905.2734" descent="-211.91406" font-weight="400">
<font-face-src>
<font-face-name name="ArialMT"/>
</font-face-src>
</font-face>
</defs>
<metadata> Produced by OmniGraffle 7.7.1
<dc:date>2018-04-05 18:37:56 +0000</dc:date>
<dc:date>2018-05-13 01:23:28 +0000</dc:date>
</metadata>
<g id="Canvas_1" fill-opacity="1" stroke-dasharray="none" stroke="none" stroke-opacity="1" fill="none">
<title>Canvas 1</title>
<g id="Canvas_1: Layer 1">
<title>Layer 1</title>
<g id="Graphic_14">
<g id="Graphic_21">
<ellipse cx="163.69034" cy="72.75" rx="123.690538554121" ry="51.7500826914231" fill="white"/>
<ellipse cx="163.69034" cy="72.75" rx="123.690538554121" ry="51.7500826914231" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_13">
<g id="Graphic_20">
<ellipse cx="163.69034" cy="72.75" rx="67.8239720122439" ry="21.5625344547596" fill="white"/>
<ellipse cx="163.69034" cy="72.75" rx="67.8239720122439" ry="21.5625344547596" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_12">
<g id="Graphic_19">
<ellipse cx="164.86648" cy="72.84801" rx="15.877866280323" ry="5.19461057319209" fill="white"/>
<ellipse cx="164.86648" cy="72.84801" rx="15.877866280323" ry="5.19461057319209" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_11">
<line x1="64.50284" y1="76.627575" x2="69.95741" y2="61.879636" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_18">
<line x1="64.50284" y1="76.627575" x2="70.47774" y2="60.472776" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_9">
<path d="M 74.107955 52.167614 L 75.546875 67.35156 L 77.24887 79.1592" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_17">
<path d="M 74.107955 52.167614 L 75.546875 67.35156 L 77.46287 80.64385" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_8">
<line x1="78.60427" y1="88.56241" x2="89.16032" y2="64.82744" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_16">
<line x1="78.60427" y1="88.56241" x2="89.76987" y2="63.456875" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_7">
<line x1="93.79292" y1="54.411155" x2="99.21977" y2="70.88387" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_15">
<line x1="93.79292" y1="54.411155" x2="99.68927" y2="72.309" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_6">
<line x1="102.53125" y1="81.79231" x2="106.39071" y2="74.86708" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_14">
<line x1="102.53125" y1="81.79231" x2="107.12092" y2="73.55682" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_5">
<line x1="111.94034" y1="65.46653" x2="115.32754" y2="70.911195" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_13">
<line x1="111.94034" y1="65.46653" x2="116.1199" y2="72.18484" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_4">
<line x1="121.34943" y1="79.22334" x2="122.94727" y2="77.52486" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_12">
<line x1="121.34943" y1="79.22334" x2="123.97507" y2="76.43233" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_3">
<line x1="130.75852" y1="69.36708" x2="132.21151" y2="70.85814" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<g id="Line_11">
<line x1="130.75852" y1="69.36708" x2="133.25837" y2="71.93243" marker-end="url(#StickArrow_Marker)" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_19">
<g id="Graphic_10">
<circle cx="63.38367" cy="78.88367" r="2.38367080105348" fill="white"/>
<circle cx="63.38367" cy="78.88367" r="2.38367080105348" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"/>
<circle cx="63.38367" cy="78.88367" r="2.38367080105348" stroke="#5b7daa" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_32">
<g id="Line_9">
<line x1="28.5" y1="139.5" x2="292.34414" y2="139.5" marker-end="url(#StickArrow_Marker_2)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Line_33">
<g id="Line_8">
<line x1="28.5" y1="139.5" x2="28.5" y2="16.730078" marker-end="url(#StickArrow_Marker_2)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_35">
<text transform="translate(288.05273 141.86914)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".27539062" y="11">x</tspan>
<tspan font-family="Arial" font-size="8" font-weight="400" fill="black" y="14">1</tspan>
<g id="Graphic_7">
<text transform="translate(289.05273 141.8855)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".3034668" y="8">x</tspan>
<tspan font-family="Arial" font-size="7" font-weight="400" fill="black" y="10">1</tspan>
</text>
</g>
<g id="Graphic_36">
<text transform="translate(11.5 10.369141)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".27539062" y="11">x</tspan>
<tspan font-family="Arial" font-size="8" font-weight="400" fill="black" y="14">2</tspan>
<g id="Graphic_6">
<text transform="translate(12.5 10.385498)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".3034668" y="8">x</tspan>
<tspan font-family="Arial" font-size="7" font-weight="400" fill="black" y="10">2</tspan>
</text>
</g>
<g id="Graphic_37">
<text transform="translate(268.55273 30.80371)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".32128906" y="11">f = 10</tspan>
<g id="Graphic_5">
<text transform="translate(268.8 32.752784)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".1159668" y="8">f = 10</tspan>
</text>
</g>
<g id="Graphic_38">
<text transform="translate(183.19602 60.89462)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".15820312" y="11">f = 1</tspan>
<g id="Graphic_4">
<text transform="translate(186.19602 60.94369)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".11865234" y="8">f = 1</tspan>
</text>
</g>
<g id="Graphic_39">
<text transform="translate(222.79261 48.30371)" fill="black">
<tspan font-family="Arial" font-size="12" font-style="italic" font-weight="400" fill="black" x=".15820312" y="11">f = 5</tspan>
<g id="Graphic_3">
<text transform="translate(222.6 50.152784)" fill="black">
<tspan font-family="Arial" font-size="9" font-style="italic" font-weight="400" fill="black" x=".11865234" y="8">f = 5</tspan>
</text>
</g>
</g>
......
......@@ -3,7 +3,7 @@ from mxnet import gluon
from mxnet import autograd
from mxnet import nd
from mxnet import image
from mxnet.gluon import nn
from mxnet.gluon import nn, data as gdata, loss as gloss
import mxnet as mx
import numpy as np
from time import time
......@@ -357,13 +357,13 @@ def set_fig_size(mpl, figsize=(3.5, 2.5)):
mpl.rcParams['figure.figsize'] = figsize
def data_iter(batch_size, num_examples, X, y):
def data_iter(batch_size, num_examples, features, labels):
"""遍历数据集。"""
idx = list(range(num_examples))
random.shuffle(idx)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
j = nd.array(idx[i: min(i + batch_size, num_examples)])
yield X.take(j), y.take(j)
j = nd.array(indices[i: min(i + batch_size, num_examples)])
yield features.take(j), labels.take(j)
def linreg(X, w, b):
......@@ -371,33 +371,33 @@ def linreg(X, w, b):
return nd.dot(X, w) + b
def squared_loss(yhat, y):
def squared_loss(y_hat, y):
"""平方损失函数。"""
return (yhat - y.reshape(yhat.shape)) ** 2 / 2
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y,
net):
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval,
features, labels, net):
"""优化目标函数。"""
dataset = gluon.data.ArrayDataset(X, y)
data_iter = gluon.data.DataLoader(dataset, batch_size, shuffle=True)
square_loss = gluon.loss.L2Loss()
y_vals = [square_loss(net(X), y).mean().asnumpy()]
dataset = gdata.ArrayDataset(features, labels)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
loss = gloss.L2Loss()
ls = [loss(net(features), labels).mean().asnumpy()]
for epoch in range(1, num_epochs + 1):
# 学习率自我衰减。
if decay_epoch and epoch > decay_epoch:
trainer.set_learning_rate(trainer.learning_rate * 0.1)
for batch_i, (features, label) in enumerate(data_iter):
for batch_i, (X, y) in enumerate(data_iter):
with autograd.record():
output = net(features)
loss = square_loss(output, label)
loss.backward()
l = loss(net(X), y)
l.backward()
trainer.step(batch_size)
if batch_i * batch_size % log_interval == 0:
y_vals.append(square_loss(net(X), y).mean().asnumpy())
ls.append(loss(net(features), labels).mean().asnumpy())
# 为了便于打印,改变输出形状并转化成numpy数组。
print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n')
x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
semilogy(x_vals, y_vals, 'epoch', 'loss')
es = np.linspace(0, num_epochs, len(ls), endpoint=True)
semilogy(es, ls, 'epoch', 'loss')
def semilogy(x_vals, y_vals, x_label, y_label, figsize=(3.5, 2.5)):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册