Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
d2l-zh
提交
2dd0ea42
D
d2l-zh
项目概览
OpenDocCN
/
d2l-zh
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
d2l-zh
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2dd0ea42
编写于
5月 16, 2018
作者:
A
Aston Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
mlp and util
上级
6c33d6f1
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
48 addition
and
42 deletion
+48
-42
chapter_supervised-learning/mlp-scratch.md
chapter_supervised-learning/mlp-scratch.md
+7
-8
gluonbook/utils.py
gluonbook/utils.py
+41
-34
未找到文件。
chapter_supervised-learning/mlp-scratch.md
浏览文件 @
2dd0ea42
...
...
@@ -6,10 +6,14 @@
我们继续使用FashionMNIST数据集。
```
{.python .input
n=1
}
```
{.python .input}
import sys
sys.path.append('..')
import gluonbook as gb
from mxnet import autograd, gluon, nd
```
```
{.python .input n=1}
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
```
...
...
@@ -23,13 +27,11 @@ train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
这里我们定义一个只有一个隐含层的模型,这个隐含层输出256个节点。
```
{.python .input n=2}
from mxnet import ndarray as nd
num_inputs = 28*28
num_inputs = 784
num_outputs = 10
num_hidden = 256
weight_scale = .01
weight_scale =
0
.01
W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
b1 = nd.zeros(num_hidden)
...
...
@@ -75,7 +77,6 @@ def net(X):
在多类Logistic回归里我们提到分开实现Softmax和交叉熵损失函数可能导致数值不稳定。这里我们直接使用Gluon提供的函数
```
{.python .input n=6}
from mxnet import gluon
loss = gluon.loss.SoftmaxCrossEntropyLoss()
```
...
...
@@ -84,8 +85,6 @@ loss = gluon.loss.SoftmaxCrossEntropyLoss()
训练跟之前一样。
```
{.python .input n=8}
from mxnet import autograd as autograd
num_epochs = 5
lr = 0.5
...
...
gluonbook/utils.py
浏览文件 @
2dd0ea42
...
...
@@ -9,6 +9,7 @@ from mxnet import autograd, gluon, image, nd
from
mxnet.gluon
import
nn
,
data
as
gdata
,
loss
as
gloss
,
utils
as
gutils
import
numpy
as
np
class
DataLoader
(
object
):
"""similiar to gluon.data.DataLoader, but might be faster.
...
...
@@ -44,6 +45,7 @@ class DataLoader(object):
def
__len__
(
self
):
return
len
(
self
.
dataset
)
//
self
.
batch_size
def
load_data_fashion_mnist
(
batch_size
,
resize
=
None
,
root
=
"~/.mxnet/datasets/fashion-mnist"
):
"""download the fashion mnist dataest and then load into memory"""
def
transform_mnist
(
data
,
label
):
...
...
@@ -64,6 +66,7 @@ def load_data_fashion_mnist(batch_size, resize=None, root="~/.mxnet/datasets/fas
test_data
=
DataLoader
(
mnist_test
,
batch_size
,
shuffle
=
False
,
transform
=
transform_mnist
)
return
(
train_data
,
test_data
)
def
try_gpu
():
"""If GPU is available, return mx.gpu(0); else return mx.cpu()"""
try
:
...
...
@@ -73,44 +76,49 @@ def try_gpu():
ctx
=
mx
.
cpu
()
return
ctx
def
try_all_gpus
():
"""Return all available GPUs, or [mx.gpu()] if there is no GPU"""
ctx
_list
=
[]
ctx
es
=
[]
try
:
for
i
in
range
(
16
):
ctx
=
mx
.
gpu
(
i
)
_
=
nd
.
array
([
0
],
ctx
=
ctx
)
ctx
_list
.
append
(
ctx
)
ctx
es
.
append
(
ctx
)
except
:
pass
if
not
ctx_list
:
ctx_list
=
[
mx
.
cpu
()]
return
ctx_list
if
not
ctxes
:
ctxes
=
[
mx
.
cpu
()]
return
ctxes
def
SGD
(
params
,
lr
):
"""DEPRECATED!"""
for
param
in
params
:
param
[:]
=
param
-
lr
*
param
.
grad
def
sgd
(
params
,
lr
,
batch_size
):
"""Mini-batch stochastic gradient descent."""
for
param
in
params
:
param
[:]
=
param
-
lr
*
param
.
grad
/
batch_size
def
accuracy
(
y_hat
,
y
):
"""Get accuracy."""
return
(
y_hat
.
argmax
(
axis
=
1
)
==
y
).
mean
().
asscalar
()
def
_get_batch
(
batch
,
ctx
):
"""return
data and label
on ctx"""
"""return
features and labels
on ctx"""
if
isinstance
(
batch
,
mx
.
io
.
DataBatch
):
data
=
batch
.
data
[
0
]
label
=
batch
.
label
[
0
]
features
=
batch
.
data
[
0
]
label
s
=
batch
.
label
[
0
]
else
:
data
,
label
=
batch
return
(
g
luon
.
utils
.
split_and_load
(
data
,
ctx
),
g
luon
.
utils
.
split_and_load
(
label
,
ctx
),
data
.
shape
[
0
])
features
,
labels
=
batch
return
(
g
utils
.
split_and_load
(
features
,
ctx
),
g
utils
.
split_and_load
(
labels
,
ctx
),
features
.
shape
[
0
])
def
evaluate_accuracy
(
data_iter
,
net
,
ctx
=
[
mx
.
cpu
()]):
...
...
@@ -154,38 +162,37 @@ def train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size,
train_acc_sum
/
len
(
train_iter
),
test_acc
))
def
train
(
train_
data
,
test_data
,
net
,
loss
,
trainer
,
ctx
,
num_epochs
,
print_batches
=
None
):
def
train
(
train_
iter
,
test_iter
,
net
,
loss
,
trainer
,
ctx
,
num_epochs
,
print_batches
=
None
):
"""Train and evaluate a model."""
print
(
"
Start
training on "
,
ctx
)
print
(
"training on "
,
ctx
)
if
isinstance
(
ctx
,
mx
.
Context
):
ctx
=
[
ctx
]
for
epoch
in
range
(
num_epochs
):
train_l
oss
,
train_acc
,
n
,
m
=
0.0
,
0.0
,
0.0
,
0.0
if
isinstance
(
train_
data
,
mx
.
io
.
MXDataIter
):
train_
data
.
reset
()
for
epoch
in
range
(
1
,
num_epochs
+
1
):
train_l
_sum
,
train_acc_sum
,
n
,
m
=
0.0
,
0.0
,
0.0
,
0.0
if
isinstance
(
train_
iter
,
mx
.
io
.
MXDataIter
):
train_
iter
.
reset
()
start
=
time
()
for
i
,
batch
in
enumerate
(
train_
data
):
data
,
label
,
batch_size
=
_get_batch
(
batch
,
ctx
)
l
osse
s
=
[]
for
i
,
batch
in
enumerate
(
train_
iter
):
Xs
,
ys
,
batch_size
=
_get_batch
(
batch
,
ctx
)
ls
=
[]
with
autograd
.
record
():
outputs
=
[
net
(
X
)
for
X
in
data
]
l
osses
=
[
loss
(
yhat
,
y
)
for
yhat
,
y
in
zip
(
outputs
,
label
)]
for
l
in
l
osse
s
:
y_hats
=
[
net
(
X
)
for
X
in
Xs
]
l
s
=
[
loss
(
y_hat
,
y
)
for
y_hat
,
y
in
zip
(
y_hats
,
ys
)]
for
l
in
ls
:
l
.
backward
()
train_acc
+=
sum
([(
yhat
.
argmax
(
axis
=
1
)
==
y
).
sum
().
asscalar
()
for
yhat
,
y
in
zip
(
outputs
,
label
)])
train_l
oss
+=
sum
([
l
.
sum
().
asscalar
()
for
l
in
losse
s
])
train_acc
_sum
+=
sum
([(
y_hat
.
argmax
(
axis
=
1
)
==
y
).
sum
().
asscalar
()
for
y_hat
,
y
in
zip
(
y_hats
,
ys
)])
train_l
_sum
+=
sum
([
l
.
sum
().
asscalar
()
for
l
in
l
s
])
trainer
.
step
(
batch_size
)
n
+=
batch_size
m
+=
sum
([
y
.
size
for
y
in
label
])
m
+=
sum
([
y
.
size
for
y
in
ys
])
if
print_batches
and
(
i
+
1
)
%
print_batches
==
0
:
print
(
"batch %d, loss
:
%f, train acc %f"
%
(
n
,
train_l
oss
/
n
,
train_acc
/
m
print
(
"batch %d, loss %f, train acc %f"
%
(
n
,
train_l
_sum
/
n
,
train_acc_sum
/
m
))
test_acc
=
evaluate_accuracy
(
test_data
,
net
,
ctx
)
print
(
"epoch %d, loss: %.4f, train acc %.3f, test acc %.3f, time %.1f sec"
%
(
epoch
,
train_loss
/
n
,
train_acc
/
m
,
test_acc
,
time
()
-
start
test_acc
=
evaluate_accuracy
(
test_iter
,
net
,
ctx
)
print
(
"epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec"
%
(
epoch
,
train_l_sum
/
n
,
train_acc_sum
/
m
,
test_acc
,
time
()
-
start
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录