Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
d2l-zh
提交
8450429f
D
d2l-zh
项目概览
OpenDocCN
/
d2l-zh
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
d2l-zh
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
8450429f
编写于
5月 27, 2018
作者:
A
Aston Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rnn code
上级
c5a424b9
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
72 addition
and
81 deletion
+72
-81
chapter_recurrent-neural-networks/gru-scratch.md
chapter_recurrent-neural-networks/gru-scratch.md
+3
-3
chapter_recurrent-neural-networks/lstm-scratch.md
chapter_recurrent-neural-networks/lstm-scratch.md
+9
-4
chapter_recurrent-neural-networks/rnn-scratch.md
chapter_recurrent-neural-networks/rnn-scratch.md
+55
-69
gluonbook/utils.py
gluonbook/utils.py
+5
-5
未找到文件。
chapter_recurrent-neural-networks/gru-scratch.md
浏览文件 @
8450429f
...
...
@@ -151,13 +151,13 @@ def gru_rnn(inputs, H, *params):
```
{.python .input n=5}
get_inputs = gb.to_onehot
num_epochs =
3
50
num_epochs =
1
50
num_steps = 35
batch_size = 32
lr = 0.
7
lr = 0.
25
clipping_theta = 5
prefixes = ['分开', '不分开']
pred_period =
7
0
pred_period =
3
0
pred_len = 100
gb.train_and_predict_rnn(gru_rnn, False, num_epochs, num_steps, num_hiddens,
...
...
chapter_recurrent-neural-networks/lstm-scratch.md
浏览文件 @
8450429f
...
...
@@ -96,7 +96,12 @@ vocab_size = len(char_to_idx)
以下部分对模型参数进行初始化。参数
`hidden_dim`
定义了隐含状态的长度。
```
{.python .input n=3}
ctx = gb.try_gpu()
#ctx = gb.try_gpu()
import mxnet as mx
ctx = mx.gpu(3)
input_dim = vocab_size
num_hiddens = 256
output_dim = vocab_size
...
...
@@ -170,13 +175,13 @@ def lstm_rnn(inputs, state_h, state_c, *params):
```
{.python .input n=5}
get_inputs = gb.to_onehot
num_epochs =
4
50
num_epochs =
1
50
num_steps = 35
batch_size = 32
lr = 0.
6
lr = 0.
25
clipping_theta = 5
prefixes = ['分开', '不分开']
pred_period =
9
0
pred_period =
3
0
pred_len = 100
gb.train_and_predict_rnn(lstm_rnn, False, num_epochs, num_steps, num_hiddens,
...
...
chapter_recurrent-neural-networks/rnn-scratch.md
浏览文件 @
8450429f
...
...
@@ -345,74 +345,66 @@ $$\text{loss} = -\frac{1}{N} \sum_{i=1}^N \log p_{\text{target}_i}$$
任何一个有效模型的困惑度值必须小于预测集中元素的数量。在本例中,困惑度必须小于字典中的字符数$|W|$。如果一个模型可以取得较低的困惑度的值(更靠近1),通常情况下,该模型预测更加准确。
```
{.python .input n=17}
from math import exp
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
learning_rate, clipping_theta, batch_size,
pred_period, pred_len, seqs, get_params, get_inputs,
ctx, corpus_indices, idx_to_char, char_to_idx,
is_lstm=False):
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps,
num_hiddens, lr, clipping_theta, batch_size,
vocab_size, pred_period, pred_len, prefixes,
get_params, get_inputs, ctx, corpus_indices,
idx_to_char, char_to_idx, is_lstm=False):
if is_random_iter:
data_iter = data_iter_random
else:
data_iter = data_iter_consecutive
params = get_params()
softmax_cross_entropy = gloss.SoftmaxCrossEntropyLoss()
loss = gloss.SoftmaxCrossEntropyLoss()
for e
in range(1,
epochs + 1):
# 如使用相邻批量采样,在同一个
epoch中,隐含变量只需要在该epoch开始的时候
初始化。
for e
poch in range(1, num_
epochs + 1):
# 如使用相邻批量采样,在同一个
epoch 中,隐藏变量只需要在该 epoch 开始时
初始化。
if not is_random_iter:
state_h = nd.zeros(shape=(batch_size,
hidden_dim
), ctx=ctx)
state_h = nd.zeros(shape=(batch_size,
num_hiddens
), ctx=ctx)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
train_loss, num_examples = 0, 0
for data, label in data_iter(corpus_indices, batch_size, num_steps,
ctx):
# 如使用随机批量采样,处理每个随机小批量前都需要初始化隐含变量。
state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
train_l_sum = nd.array([0], ctx=ctx)
train_l_cnt = 0
for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
# 如使用随机批量采样,处理每个随机小批量前都需要初始化隐藏变量。
if is_random_iter:
state_h = nd.zeros(shape=(batch_size,
hidden_dim
), ctx=ctx)
state_h = nd.zeros(shape=(batch_size,
num_hiddens
), ctx=ctx)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
state_c = nd.zeros(shape=(batch_size, hidden_dim),
ctx=ctx)
state_c = nd.zeros(shape=(batch_size, num_hiddens),
ctx=ctx)
# 如使用相邻批量采样,需要从计算图分离隐藏状态变量。
else:
state_h = state_h.detach()
if is_lstm:
state_c = state_c.detach()
state_c = state_c.detach()
with autograd.record():
# outputs
尺寸
:(batch_size, vocab_size)
# outputs
形状
:(batch_size, vocab_size)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
outputs, state_h, state_c = rnn(get_inputs(data, vocab_size), state_h,
state_c, *params)
outputs, state_h, state_c = rnn(
get_inputs(X, vocab_size), state_h, state_c, *params)
else:
outputs, state_h = rnn(get_inputs(data, vocab_size), state_h, *params)
# 设t_ib_j为i时间批量中的j元素:
# label 尺寸:(batch_size * num_steps)
# label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]
label = label.T.reshape((-1,))
# 拼接outputs,尺寸:(batch_size * num_steps, vocab_size)。
outputs, state_h = rnn(
get_inputs(X, vocab_size), state_h, *params)
# 设 t_ib_j 为时间步 i 批量中的元素 j:
# Y 形状:(batch_size * num_steps)
# Y = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]
y = Y.T.reshape((-1,))
# 拼接 outputs,形状:(batch_size * num_steps, vocab_size)。
outputs = nd.concat(*outputs, dim=0)
# 经上述操作,outputs和label已对齐。
loss = softmax_cross_entropy(outputs, label)
loss.backward()
# loss(outputs, y) 形状:(batch_size * num_steps,)
l = loss(outputs, y)
l.backward()
grad_clipping(params, clipping_theta, ctx)
gb.sgd(params, learning_rate, 1)
train_loss += nd.sum(loss).asscalar()
num_examples += loss.size
if e % pred_period == 0:
print("Epoch %d. Perplexity %f" % (e,
exp(train_loss/num_examples)))
for seq in seqs:
print(' - ', predict_rnn(rnn, seq, pred_len, params,
hidden_dim, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs,
is_lstm))
print()
gb.sgd(params, lr, 1)
train_l_sum = train_l_sum + l.sum()
train_l_cnt += l.size
if epoch % pred_period == 0:
print("\nepoch %d, perplexity %f"
% (epoch, (train_l_sum / train_l_cnt).exp().asscalar()))
for prefix in prefixes:
print(' - ', predict_rnn(
rnn, prefix, pred_len, params, num_hiddens, vocab_size,
ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
```
我们将
`to_onehot`
、
`data_iter_random`
、
`data_iter_consecutive`
、
`grad_clipping`
、
`predict_rnn`
和
`train_and_predict_rnn`
、函数定义在gluonbook包中供后面章节调用。
...
...
@@ -420,38 +412,32 @@ def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
以下定义模型参数和预测序列前缀。
```
{.python .input}
epochs = 200
num_
epochs = 200
num_steps = 35
batch_size = 32
seq1 = '分开'
seq2 = '不分开'
seq3 = '战争中部队'
seqs = [seq1, seq2, seq3]
lr = 0.25
clipping_theta = 5
prefixes = ['分开', '不分开']
pred_period = 40
pred_len = 100
```
我们先采用随机批量采样实验循环神经网络谱写歌词。我们假定谱写歌词的前缀分别为“分开”、“不分开”和“战争中部队”。
```
{.python .input n=18}
train_and_predict_rnn(rnn=rnn, is_random_iter=False, epochs=200, num_steps=35,
hidden_dim=num_hiddens, learning_rate=0.2,
clipping_theta=5, batch_size=32, pred_period=40,
pred_len=100, seqs=seqs, get_params=get_params,
get_inputs=get_inputs, ctx=ctx,
corpus_indices=corpus_indices, idx_to_char=idx_to_char,
char_to_idx=char_to_idx)
train_and_predict_rnn(rnn, True, num_epochs, num_steps, num_hiddens, lr,
clipping_theta, batch_size, vocab_size, pred_period,
pred_len, prefixes, get_params, get_inputs, ctx,
corpus_indices, idx_to_char, char_to_idx)
```
我们再采用相邻批量采样实验循环神经网络谱写歌词。
```
{.python .input n=19}
train_and_predict_rnn(rnn=rnn, is_random_iter=False, epochs=200, num_steps=35,
hidden_dim=num_hiddens, learning_rate=0.2,
clipping_theta=5, batch_size=32, pred_period=40,
pred_len=100, seqs=seqs, get_params=get_params,
get_inputs=get_inputs, ctx=ctx,
corpus_indices=corpus_indices, idx_to_char=idx_to_char,
char_to_idx=char_to_idx)
train_and_predict_rnn(rnn, False, num_epochs, num_steps, num_hiddens, lr,
clipping_theta, batch_size, vocab_size, pred_period,
pred_len, prefixes, get_params, get_inputs, ctx,
corpus_indices, idx_to_char, char_to_idx)
```
可以看到一开始学到简单的字符,然后简单的词,接着是复杂点的词,然后看上去似乎像个句子了。
...
...
gluonbook/utils.py
浏览文件 @
8450429f
...
...
@@ -344,7 +344,7 @@ def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps,
if
is_lstm
:
state_c
=
nd
.
zeros
(
shape
=
(
batch_size
,
num_hiddens
),
ctx
=
ctx
)
train_l_sum
=
nd
.
array
([
0
],
ctx
=
ctx
)
num_iters
=
0
train_l_cnt
=
0
for
X
,
Y
in
data_iter
(
corpus_indices
,
batch_size
,
num_steps
,
ctx
):
if
is_random_iter
:
state_h
=
nd
.
zeros
(
shape
=
(
batch_size
,
num_hiddens
),
ctx
=
ctx
)
...
...
@@ -364,15 +364,15 @@ def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps,
get_inputs
(
X
,
vocab_size
),
state_h
,
*
params
)
y
=
Y
.
T
.
reshape
((
-
1
,))
outputs
=
nd
.
concat
(
*
outputs
,
dim
=
0
)
l
=
loss
(
outputs
,
y
)
.
sum
()
/
(
batch_size
*
num_steps
)
l
=
loss
(
outputs
,
y
)
l
.
backward
()
grad_clipping
(
params
,
clipping_theta
,
ctx
)
sgd
(
params
,
lr
,
1
)
train_l_sum
=
train_l_sum
+
l
num_iters
+=
1
train_l_sum
=
train_l_sum
+
l
.
sum
()
train_l_cnt
+=
l
.
size
if
epoch
%
pred_period
==
0
:
print
(
"
\n
epoch %d, perplexity %f"
%
(
epoch
,
(
train_l_sum
/
num_iters
).
exp
().
asscalar
()))
%
(
epoch
,
(
train_l_sum
/
train_l_cnt
).
exp
().
asscalar
()))
for
prefix
in
prefixes
:
print
(
' - '
,
predict_rnn
(
rnn
,
prefix
,
pred_len
,
params
,
num_hiddens
,
vocab_size
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录