mlp and util

2dd0ea42 · Aston Zhang · 6c33d6f1 · 2dd0ea42 · 2dd0ea42
隐藏空白更改
内联并排

Showing with 48 addition and 42 deletion

chapter_supervised-learning/mlp-scratch.md chapter_supervised-learning/mlp-scratch.md +7 -8

gluonbook/utils.py gluonbook/utils.py +41 -34

未找到文件。
--- a/chapter_supervised-learning/mlp-scratch.md
+++ b/chapter_supervised-learning/mlp-scratch.md
@@ -6,10 +6,14 @@

 我们继续使用FashionMNIST数据集。

-```{.python .input  n=1}
+```{.python .input}
 import sys
 sys.path.append('..')
 import gluonbook as gb
+from mxnet import autograd, gluon, nd
+```
+
+```{.python .input  n=1}
 batch_size = 256
 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
 ```
@@ -23,13 +27,11 @@ train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
 这里我们定义一个只有一个隐含层的模型，这个隐含层输出256个节点。

 ```{.python .input  n=2}
-from mxnet import ndarray as nd
-
-num_inputs = 28*28
+num_inputs = 784
 num_outputs = 10

 num_hidden = 256
-weight_scale = .01
+weight_scale = 0.01

 W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
 b1 = nd.zeros(num_hidden)
@@ -75,7 +77,6 @@ def net(X):
 在多类Logistic回归里我们提到分开实现Softmax和交叉熵损失函数可能导致数值不稳定。这里我们直接使用Gluon提供的函数

 ```{.python .input  n=6}
-from mxnet import gluon
 loss = gluon.loss.SoftmaxCrossEntropyLoss()
 ```

@@ -84,8 +85,6 @@ loss = gluon.loss.SoftmaxCrossEntropyLoss()
 训练跟之前一样。

 ```{.python .input  n=8}
-from mxnet import autograd as autograd
-
 num_epochs = 5
 lr = 0.5


--- a/gluonbook/utils.py
+++ b/gluonbook/utils.py
@@ -9,6 +9,7 @@ from mxnet import autograd, gluon, image, nd
 from mxnet.gluon import nn, data as gdata, loss as gloss, utils as gutils
 import numpy as np

+
 class DataLoader(object):
    """similiar to gluon.data.DataLoader, but might be faster.

@@ -44,6 +45,7 @@ class DataLoader(object):
    def __len__(self):
        return len(self.dataset)//self.batch_size

+
 def load_data_fashion_mnist(batch_size, resize=None, root="~/.mxnet/datasets/fashion-mnist"):
    """download the fashion mnist dataest and then load into memory"""
    def transform_mnist(data, label):
@@ -64,6 +66,7 @@ def load_data_fashion_mnist(batch_size, resize=None, root="~/.mxnet/datasets/fas
    test_data = DataLoader(mnist_test, batch_size, shuffle=False, transform=transform_mnist)
    return (train_data, test_data)

+
 def try_gpu():
    """If GPU is available, return mx.gpu(0); else return mx.cpu()"""
    try:
@@ -73,44 +76,49 @@ def try_gpu():
        ctx = mx.cpu()
    return ctx

+
 def try_all_gpus():
    """Return all available GPUs, or [mx.gpu()] if there is no GPU"""
-    ctx_list = []
+    ctxes = []
    try:
        for i in range(16):
            ctx = mx.gpu(i)
            _ = nd.array([0], ctx=ctx)
-            ctx_list.append(ctx)
+            ctxes.append(ctx)
    except:
        pass
-    if not ctx_list:
-        ctx_list = [mx.cpu()]
-    return ctx_list
+    if not ctxes:
+        ctxes = [mx.cpu()]
+    return ctxes
+

 def SGD(params, lr):
    """DEPRECATED!"""
    for param in params:
        param[:] = param - lr * param.grad

+
 def sgd(params, lr, batch_size):
    """Mini-batch stochastic gradient descent."""
    for param in params:
        param[:] = param - lr * param.grad / batch_size

+
 def accuracy(y_hat, y):
    """Get accuracy."""
    return (y_hat.argmax(axis=1) == y).mean().asscalar()

+
 def _get_batch(batch, ctx):
-    """return data and label on ctx"""
+    """return features and labels on ctx"""
    if isinstance(batch, mx.io.DataBatch):
-        data = batch.data[0]
-        label = batch.label[0]
+        features = batch.data[0]
+        labels = batch.label[0]
    else:
-        data, label = batch
-    return (gluon.utils.split_and_load(data, ctx),
-            gluon.utils.split_and_load(label, ctx),
-            data.shape[0])
+        features, labels = batch
+    return (gutils.split_and_load(features, ctx),
+            gutils.split_and_load(labels, ctx),
+            features.shape[0])


 def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
@@ -154,38 +162,37 @@ def train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size,
                 train_acc_sum / len(train_iter), test_acc))


-def train(train_data, test_data, net, loss, trainer, ctx, num_epochs, print_batches=None):
+def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs, print_batches=None):
    """Train and evaluate a model."""
-    print("Start training on ", ctx)
+    print("training on ", ctx)
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
-    for epoch in range(num_epochs):
-        train_loss, train_acc, n, m = 0.0, 0.0, 0.0, 0.0
-        if isinstance(train_data, mx.io.MXDataIter):
-            train_data.reset()
+    for epoch in range(1, num_epochs + 1):
+        train_l_sum, train_acc_sum, n, m = 0.0, 0.0, 0.0, 0.0
+        if isinstance(train_iter, mx.io.MXDataIter):
+            train_iter.reset()
        start = time()
-        for i, batch in enumerate(train_data):
-            data, label, batch_size = _get_batch(batch, ctx)
-            losses = []
+        for i, batch in enumerate(train_iter):
+            Xs, ys, batch_size = _get_batch(batch, ctx)
+            ls = []
            with autograd.record():
-                outputs = [net(X) for X in data]
-                losses = [loss(yhat, y) for yhat, y in zip(outputs, label)]
-            for l in losses:
+                y_hats = [net(X) for X in Xs]
+                ls = [loss(y_hat, y) for y_hat, y in zip(y_hats, ys)]
+            for l in ls:
                l.backward()
-            train_acc += sum([(yhat.argmax(axis=1)==y).sum().asscalar()
-                              for yhat, y in zip(outputs, label)])
-            train_loss += sum([l.sum().asscalar() for l in losses])
+            train_acc_sum += sum([(y_hat.argmax(axis=1) == y).sum().asscalar()
+                                 for y_hat, y in zip(y_hats, ys)])
+            train_l_sum += sum([l.sum().asscalar() for l in ls])
            trainer.step(batch_size)
            n += batch_size
-            m += sum([y.size for y in label])
+            m += sum([y.size for y in ys])
            if print_batches and (i+1) % print_batches == 0:
-                print("batch %d, loss: %f, train acc %f" % (
-                    n, train_loss/n, train_acc/m
+                print("batch %d, loss %f, train acc %f" % (
+                    n, train_l_sum / n, train_acc_sum / m
                ))
-
-        test_acc = evaluate_accuracy(test_data, net, ctx)
-        print("epoch %d, loss: %.4f, train acc %.3f, test acc %.3f, time %.1f sec" % (
-            epoch, train_loss/n, train_acc/m, test_acc, time() - start
+        test_acc = evaluate_accuracy(test_iter, net, ctx)
+        print("epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec" % (
+            epoch, train_l_sum / n, train_acc_sum / m, test_acc, time() - start
        ))