Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
a26546593
dive-into-dl-pytorch
提交
a86f344a
D
dive-into-dl-pytorch
项目概览
a26546593
/
dive-into-dl-pytorch
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dive-into-dl-pytorch
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a86f344a
编写于
11月 26, 2019
作者:
S
ShusenTang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bug(#64), update mask, fix typo
上级
c5d0f74a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
40 addition
and
62 deletion
+40
-62
code/chapter10_natural-language-processing/10.12_machine-translation.ipynb
...tural-language-processing/10.12_machine-translation.ipynb
+31
-54
docs/chapter10_natural-language-processing/10.12_machine-translation.md
..._natural-language-processing/10.12_machine-translation.md
+9
-8
未找到文件。
code/chapter10_natural-language-processing/10.12_machine-translation.ipynb
浏览文件 @
a86f344a
...
...
@@ -16,7 +16,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.
0
.0 cpu\n"
"1.
2
.0 cpu\n"
]
}
],
...
...
@@ -52,9 +52,7 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# 将一个序列中所有的词记录在all_tokens中以便之后构造词典,然后在该序列后面添加PAD直到序列\n",
...
...
@@ -75,9 +73,7 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def read_data(max_seq_len):\n",
...
...
@@ -130,9 +126,7 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"class Encoder(nn.Module):\n",
...
...
@@ -183,9 +177,7 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def attention_model(input_size, attention_size):\n",
...
...
@@ -198,9 +190,7 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def attention_forward(model, enc_states, dec_state):\n",
...
...
@@ -250,9 +240,7 @@
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"class Decoder(nn.Module):\n",
...
...
@@ -261,8 +249,9 @@
" super(Decoder, self).__init__()\n",
" self.embedding = nn.Embedding(vocab_size, embed_size)\n",
" self.attention = attention_model(2*num_hiddens, attention_size)\n",
" # GRU的输入包含attention输出的c和实际输入, 所以尺寸是 2*embed_size\n",
" self.rnn = nn.GRU(2*embed_size, num_hiddens, num_layers, dropout=drop_prob)\n",
" # GRU的输入包含attention输出的c和实际输入, 所以尺寸是 num_hiddens+embed_size\n",
" self.rnn = nn.GRU(num_hiddens + embed_size, num_hiddens, \n",
" num_layers, dropout=drop_prob)\n",
" self.out = nn.Linear(num_hiddens, vocab_size)\n",
"\n",
" def forward(self, cur_input, state, enc_states):\n",
...
...
@@ -272,8 +261,8 @@
" \"\"\"\n",
" # 使用注意力机制计算背景向量\n",
" c = attention_forward(self.attention, enc_states, state[-1])\n",
" # 将嵌入后的输入和背景向量在特征维连结\n",
" input_and_c = torch.cat((self.embedding(cur_input), c), dim=1)
# (批量大小, 2*embed_size)
\n",
" # 将嵌入后的输入和背景向量在特征维连结
, (批量大小, num_hiddens+embed_size)
\n",
" input_and_c = torch.cat((self.embedding(cur_input), c), dim=1) \n",
" # 为输入和背景向量的连结增加时间步维,时间步个数为1\n",
" output, state = self.rnn(input_and_c.unsqueeze(0), state)\n",
" # 移除时间步维,输出形状为(批量大小, 输出词典大小)\n",
...
...
@@ -295,9 +284,7 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def batch_loss(encoder, decoder, X, Y, loss):\n",
...
...
@@ -308,7 +295,7 @@
" dec_state = decoder.begin_state(enc_state)\n",
" # 解码器在最初时间步的输入是BOS\n",
" dec_input = torch.tensor([out_vocab.stoi[BOS]] * batch_size)\n",
" # 我们将使用掩码变量mask来忽略掉标签为填充项PAD的损失\n",
" # 我们将使用掩码变量mask来忽略掉标签为填充项PAD的损失
, 初始全1
\n",
" mask, num_not_pad_tokens = torch.ones(batch_size,), 0\n",
" l = torch.tensor([0.0])\n",
" for y in Y.permute(1,0): # Y shape: (batch, seq_len)\n",
...
...
@@ -316,17 +303,15 @@
" l = l + (mask * loss(dec_output, y)).sum()\n",
" dec_input = y # 使用强制教学\n",
" num_not_pad_tokens += mask.sum().item()\n",
" #
将PAD对应位置的掩码设成0, 原文这里是 y != out_vocab.stoi[EOS], 感觉有误
\n",
" mask = mask * (y != out_vocab.stoi[
PAD
]).float()\n",
" #
EOS后面全是PAD. 下面一行保证一旦遇到EOS接下来的循环中mask就一直是0
\n",
" mask = mask * (y != out_vocab.stoi[
EOS
]).float()\n",
" return l / num_not_pad_tokens"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def train(encoder, decoder, dataset, lr, batch_size, num_epochs):\n",
...
...
@@ -358,11 +343,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 10, loss 0.4
41
\n",
"epoch 20, loss 0.
183
\n",
"epoch 30, loss 0.1
00
\n",
"epoch 40, loss 0.0
46
\n",
"epoch 50, loss 0.0
25
\n"
"epoch 10, loss 0.4
75
\n",
"epoch 20, loss 0.
245
\n",
"epoch 30, loss 0.1
57
\n",
"epoch 40, loss 0.0
52
\n",
"epoch 50, loss 0.0
39
\n"
]
}
],
...
...
@@ -386,9 +371,7 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def translate(encoder, decoder, input_seq, max_seq_len):\n",
...
...
@@ -443,9 +426,7 @@
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def bleu(pred_tokens, label_tokens, k):\n",
...
...
@@ -466,9 +447,7 @@
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def score(input_seq, label_seq, k):\n",
...
...
@@ -504,29 +483,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"bleu 0.658, predict: they are
russian
.\n"
"bleu 0.658, predict: they are
exhausted
.\n"
]
}
],
"source": [
"score('ils sont canadien
s
.', 'they are canadian .', k=2)"
"score('ils sont canadien
ne
.', 'they are canadian .', k=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:
anaconda3
]",
"display_name": "Python [conda env:
py36
]",
"language": "python",
"name": "conda-env-
anaconda3
-py"
"name": "conda-env-
py36
-py"
},
"language_info": {
"codemirror_mode": {
...
...
@@ -538,7 +515,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.
8
"
"version": "3.6.
2
"
}
},
"nbformat": 4,
...
...
docs/chapter10_natural-language-processing/10.12_machine-translation.md
浏览文件 @
a86f344a
...
...
@@ -165,8 +165,9 @@ class Decoder(nn.Module):
super
(
Decoder
,
self
).
__init__
()
self
.
embedding
=
nn
.
Embedding
(
vocab_size
,
embed_size
)
self
.
attention
=
attention_model
(
2
*
num_hiddens
,
attention_size
)
# GRU的输入包含attention输出的c和实际输入, 所以尺寸是 2*embed_size
self
.
rnn
=
nn
.
GRU
(
2
*
embed_size
,
num_hiddens
,
num_layers
,
dropout
=
drop_prob
)
# GRU的输入包含attention输出的c和实际输入, 所以尺寸是 num_hiddens+embed_size
self
.
rnn
=
nn
.
GRU
(
num_hiddens
+
embed_size
,
num_hiddens
,
num_layers
,
dropout
=
drop_prob
)
self
.
out
=
nn
.
Linear
(
num_hiddens
,
vocab_size
)
def
forward
(
self
,
cur_input
,
state
,
enc_states
):
...
...
@@ -176,8 +177,8 @@ class Decoder(nn.Module):
"""
# 使用注意力机制计算背景向量
c
=
attention_forward
(
self
.
attention
,
enc_states
,
state
[
-
1
])
# 将嵌入后的输入和背景向量在特征维连结
input_and_c
=
torch
.
cat
((
self
.
embedding
(
cur_input
),
c
),
dim
=
1
)
# (批量大小, 2*embed_size)
# 将嵌入后的输入和背景向量在特征维连结
, (批量大小, num_hiddens+embed_size)
input_and_c
=
torch
.
cat
((
self
.
embedding
(
cur_input
),
c
),
dim
=
1
)
# 为输入和背景向量的连结增加时间步维,时间步个数为1
output
,
state
=
self
.
rnn
(
input_and_c
.
unsqueeze
(
0
),
state
)
# 移除时间步维,输出形状为(批量大小, 输出词典大小)
...
...
@@ -210,8 +211,8 @@ def batch_loss(encoder, decoder, X, Y, loss):
l
=
l
+
(
mask
*
loss
(
dec_output
,
y
)).
sum
()
dec_input
=
y
# 使用强制教学
num_not_pad_tokens
+=
mask
.
sum
().
item
()
#
将PAD对应位置的掩码设成0, 原文这里是 y != out_vocab.stoi[EOS], 感觉有误
mask
=
mask
*
(
y
!=
out_vocab
.
stoi
[
PAD
]).
float
()
#
EOS后面全是PAD. 下面一行保证一旦遇到EOS接下来的循环中mask就一直是0
mask
=
mask
*
(
y
!=
out_vocab
.
stoi
[
EOS
]).
float
()
return
l
/
num_not_pad_tokens
```
...
...
@@ -299,7 +300,7 @@ translate(encoder, decoder, input_seq, max_seq_len)
评价机器翻译结果通常使用BLEU(Bilingual Evaluation Understudy)[1]。对于模型预测序列中任意的子序列,BLEU考察这个子序列是否出现在标签序列中。
具体来说,设词数为$n$的子序列的精度为$p_n$。它是预测序列与标签序列匹配词数为$n$的子序列的数量与预测序列中词数为$n$的子序列的数量之比。举个例子,假设标签序列为$A$、$B$、$C$、$D$、$E$、$F$,预测序列为$A$、$B$、$B$、$C$、$D$,那么$p_1 = 4/5,
\
p_2 = 3/4,
\
p_3 = 1/3,
\
p_4 = 0$。设$len_{
\t
ext{label}}$和$len_{
\t
ext{pred}}$分别为标签序列和预测序列的词数,那么,BLEU的定义为
具体来说,设词数为$n$的子序列的精度为$p_n$。它是预测序列与标签序列匹配词数为$n$的子序列的数量与预测序列中词数为$n$的子序列的数量之比。举个例子,假设标签序列为$A$、$B$、$C$、$D$、$E$、$F$,预测序列为$A$、$B$、$B$、$C$、$D$,那么$p_1 = 4/5,
p_2 = 3/4, p_3 = 1/3,
p_4 = 0$。设$len_{
\t
ext{label}}$和$len_{
\t
ext{pred}}$分别为标签序列和预测序列的词数,那么,BLEU的定义为
$$
\e
xp
\l
eft(
\m
in
\l
eft(0, 1 -
\f
rac{len_{
\t
ext{label}}}{len_{
\t
ext{pred}}}
\r
ight)
\r
ight)
\p
rod_{n=1}^k p_n^{1/2^n},$$
...
...
@@ -348,7 +349,7 @@ bleu 1.000, predict: they are watching .
测试一个不在训练集中的样本。
```
python
score
(
'ils sont canadien
s
.'
,
'they are canadian .'
,
k
=
2
)
score
(
'ils sont canadien
ne
.'
,
'they are canadian .'
,
k
=
2
)
```
输出:
```
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录