Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
d2l-zh
提交
ca29bb50
D
d2l-zh
项目概览
OpenDocCN
/
d2l-zh
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
d2l-zh
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
ca29bb50
编写于
1月 10, 2018
作者:
A
Aston Zhang
提交者:
GitHub
1月 10, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #144 from astonzhang/rnn
add is_lstm flag
上级
1174e3bf
89a30816
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
55 addition
and
18 deletion
+55
-18
chapter_recurrent-neural-networks/rnn-scratch.md
chapter_recurrent-neural-networks/rnn-scratch.md
+28
-8
utils.py
utils.py
+27
-10
未找到文件。
chapter_recurrent-neural-networks/rnn-scratch.md
浏览文件 @
ca29bb50
...
...
@@ -285,15 +285,22 @@ print('state shape: ', state_new.shape)
```
{.python .input n=15}
def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char,
char_to_idx, get_inputs):
char_to_idx, get_inputs
, is_lstm=False
):
# 预测以 prefix 开始的接下来的 num_chars 个字符。
prefix = prefix.lower()
state = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
output = [char_to_idx[prefix[0]]]
for i in range(num_chars + len(prefix)):
X = nd.array([output[-1]], ctx=ctx)
# 在序列中循环迭代隐含变量。
Y, state = rnn(get_inputs(X), state, *params)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c, *params)
else:
Y, state_h = rnn(get_inputs(X), state_h, *params)
if i < len(prefix)-1:
next_input = char_to_idx[prefix[i+1]]
else:
...
...
@@ -358,7 +365,8 @@ from math import exp
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
learning_rate, clipping_theta, batch_size,
pred_period, pred_len, seqs, get_params, get_inputs,
ctx, corpus_indices, idx_to_char, char_to_idx):
ctx, corpus_indices, idx_to_char, char_to_idx,
is_lstm=False):
if is_random_iter:
data_iter = data_iter_random
else:
...
...
@@ -370,16 +378,27 @@ def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
for e in range(1, epochs + 1):
# 如使用相邻批量采样,在同一个epoch中,隐含变量只需要在该epoch开始的时候初始化。
if not is_random_iter:
state = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
train_loss, num_examples = 0, 0
for data, label in data_iter(corpus_indices, batch_size, num_steps,
ctx):
# 如使用随机批量采样,处理每个随机小批量前都需要初始化隐含变量。
if is_random_iter:
state = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
with autograd.record():
# outputs 尺寸:(batch_size, vocab_size)
outputs, state = rnn(get_inputs(data), state, *params)
if is_lstm:
# 当RNN使用LSTM时才会用到,这里可以忽略。
outputs, state_h, state_c = rnn(get_inputs(data), state_h,
state_c, *params)
else:
outputs, state_h = rnn(get_inputs(data), state_h, *params)
# 设t_ib_j为i时间批量中的j元素:
# label 尺寸:(batch_size * num_steps)
# label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]
...
...
@@ -401,7 +420,8 @@ def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
exp(train_loss/num_examples)))
for seq in seqs:
print(' - ', predict_rnn(rnn, seq, pred_len, params,
hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs))
hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
is_lstm))
print()
```
...
...
utils.py
浏览文件 @
ca29bb50
...
...
@@ -238,6 +238,7 @@ def data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx=None):
label
=
indices
[:,
i
+
1
:
i
+
num_steps
+
1
]
yield
data
,
label
def
grad_clipping
(
params
,
theta
,
ctx
):
"""Gradient clipping."""
if
theta
is
not
None
:
...
...
@@ -249,15 +250,21 @@ def grad_clipping(params, theta, ctx):
for
p
in
params
:
p
.
grad
[:]
*=
theta
/
norm
def
predict_rnn
(
rnn
,
prefix
,
num_chars
,
params
,
hidden_dim
,
ctx
,
idx_to_char
,
char_to_idx
,
get_inputs
):
char_to_idx
,
get_inputs
,
is_lstm
=
False
):
"""Predict the next chars given the prefix."""
prefix
=
prefix
.
lower
()
state
=
nd
.
zeros
(
shape
=
(
1
,
hidden_dim
),
ctx
=
ctx
)
state_h
=
nd
.
zeros
(
shape
=
(
1
,
hidden_dim
),
ctx
=
ctx
)
if
is_lstm
:
state_c
=
nd
.
zeros
(
shape
=
(
1
,
hidden_dim
),
ctx
=
ctx
)
output
=
[
char_to_idx
[
prefix
[
0
]]]
for
i
in
range
(
num_chars
+
len
(
prefix
)):
X
=
nd
.
array
([
output
[
-
1
]],
ctx
=
ctx
)
Y
,
state
=
rnn
(
get_inputs
(
X
),
state
,
*
params
)
if
is_lstm
:
Y
,
state_h
,
state_c
=
rnn
(
get_inputs
(
X
),
state_h
,
state_c
,
*
params
)
else
:
Y
,
state_h
=
rnn
(
get_inputs
(
X
),
state_h
,
*
params
)
if
i
<
len
(
prefix
)
-
1
:
next_input
=
char_to_idx
[
prefix
[
i
+
1
]]
else
:
...
...
@@ -265,10 +272,12 @@ def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char,
output
.
append
(
next_input
)
return
''
.
join
([
idx_to_char
[
i
]
for
i
in
output
])
def
train_and_predict_rnn
(
rnn
,
is_random_iter
,
epochs
,
num_steps
,
hidden_dim
,
learning_rate
,
clipping_theta
,
batch_size
,
pred_period
,
pred_len
,
seqs
,
get_params
,
get_inputs
,
ctx
,
corpus_indices
,
idx_to_char
,
char_to_idx
):
ctx
,
corpus_indices
,
idx_to_char
,
char_to_idx
,
is_lstm
=
False
):
"""Train an RNN model and predict the next item in the sequence."""
if
is_random_iter
:
data_iter
=
data_iter_random
...
...
@@ -278,21 +287,29 @@ def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
softmax_cross_entropy
=
gluon
.
loss
.
SoftmaxCrossEntropyLoss
()
for
e
in
range
(
1
,
epochs
+
1
):
for
e
in
range
(
1
,
epochs
+
1
):
# If consecutive sampling is used, in the same epoch, the hidden state
# is initialized only at the beginning of the epoch.
if
not
is_random_iter
:
state
=
nd
.
zeros
(
shape
=
(
batch_size
,
hidden_dim
),
ctx
=
ctx
)
state_h
=
nd
.
zeros
(
shape
=
(
batch_size
,
hidden_dim
),
ctx
=
ctx
)
if
is_lstm
:
state_c
=
nd
.
zeros
(
shape
=
(
batch_size
,
hidden_dim
),
ctx
=
ctx
)
train_loss
,
num_examples
=
0
,
0
for
data
,
label
in
data_iter
(
corpus_indices
,
batch_size
,
num_steps
,
ctx
):
# If random sampling is used, the hidden state has to be
# initialized for each mini-batch.
if
is_random_iter
:
state
=
nd
.
zeros
(
shape
=
(
batch_size
,
hidden_dim
),
ctx
=
ctx
)
state_h
=
nd
.
zeros
(
shape
=
(
batch_size
,
hidden_dim
),
ctx
=
ctx
)
if
is_lstm
:
state_c
=
nd
.
zeros
(
shape
=
(
batch_size
,
hidden_dim
),
ctx
=
ctx
)
with
autograd
.
record
():
# outputs shape:(batch_size, vocab_size)
outputs
,
state
=
rnn
(
get_inputs
(
data
),
state
,
*
params
)
if
is_lstm
:
outputs
,
state_h
,
state_c
=
rnn
(
get_inputs
(
data
),
state_h
,
state_c
,
*
params
)
else
:
outputs
,
state_h
=
rnn
(
get_inputs
(
data
),
state_h
,
*
params
)
# Let t_ib_j be the j-th element of the mini-batch at time i.
# label shape:(batch_size * num_steps)
# label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
...
...
@@ -315,6 +332,6 @@ def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
exp
(
train_loss
/
num_examples
)))
for
seq
in
seqs
:
print
(
' - '
,
predict_rnn
(
rnn
,
seq
,
pred_len
,
params
,
hidden_dim
,
ctx
,
idx_to_char
,
char_to_idx
,
get_inputs
))
hidden_dim
,
ctx
,
idx_to_char
,
char_to_idx
,
get_inputs
,
is_lstm
))
print
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录