Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
592f84a4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
592f84a4
编写于
6月 12, 2018
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Complete the docs of beam_search_op, beam_searc_decode_op and the python wrapper
上级
5e20a8ef
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
182 addition
and
33 deletion
+182
-33
paddle/fluid/operators/beam_search_decode_op.cc
paddle/fluid/operators/beam_search_decode_op.cc
+20
-9
paddle/fluid/operators/beam_search_decode_op.h
paddle/fluid/operators/beam_search_decode_op.h
+1
-1
paddle/fluid/operators/beam_search_op.cc
paddle/fluid/operators/beam_search_op.cc
+37
-16
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+111
-4
python/paddle/fluid/tests/book/test_machine_translation.py
python/paddle/fluid/tests/book/test_machine_translation.py
+13
-3
未找到文件。
paddle/fluid/operators/beam_search_decode_op.cc
浏览文件 @
592f84a4
...
...
@@ -148,21 +148,32 @@ class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"Ids"
,
"(LodTensorArray)"
"
score of the candidate words in each step
"
);
"
The LodTensorArray containing the selected ids of all steps
"
);
AddInput
(
"Scores"
,
"(LodTensorArray)"
"score of the candidate words in each step"
);
AddOutput
(
"SentenceIds"
,
"(LodTensor)"
"All possible result sentences of word ids"
);
AddOutput
(
"SentenceScores"
,
"(LodTensor)"
"All possible result sentences of word scores"
);
"The LodTensorArray containing the selected scores of all steps"
);
AddOutput
(
"SentenceIds"
,
"(LodTensor)"
"An LodTensor containing all generated id sequences for all source "
"sentences"
);
AddOutput
(
"SentenceScores"
,
"(LodTensor)"
"An LodTensor containing scores corresponding to Output(SentenceIds)"
);
AddAttr
<
int
>
(
"beam_size"
,
"beam size for beam search"
);
AddAttr
<
int
>
(
"end_id"
,
"the token id which indicates the end of a sequence"
);
AddComment
(
R"DOC(
Pack the result of Beam search op into SentenceIds and SentenceScores.
Beam Search Decode Operator. This Operator constructs the full hypotheses for
each source sentence by walking back along the LoDTensorArray Input(ids)
whose lods can be used to restore the path in the beam search tree.
The Output(SentenceIds) and Output(SentenceScores) separately contain the
generated id sequences and the corresponding scores. The shapes and lods of the
two LodTensor are same. The lod level is 2 and the two levels separately
indicate how many hypotheses each source sentence has and how many ids each
hypothesis has.
)DOC"
);
}
};
...
...
paddle/fluid/operators/beam_search_decode_op.h
浏览文件 @
592f84a4
...
...
@@ -27,7 +27,7 @@ using LoDTensor = framework::LoDTensor;
using
LoDTensorArray
=
framework
::
LoDTensorArray
;
// all the lod have 2 levels.
// The
F
irst is source level, the second is sentence level.
// The
f
irst is source level, the second is sentence level.
// source level describe how many prefixes (branchs) for each source sentece
// (beam). sentence level describe how these candidates belong to the prefixes.
const
size_t
kSourceLevel
=
0
;
...
...
paddle/fluid/operators/beam_search_op.cc
浏览文件 @
592f84a4
...
...
@@ -129,12 +129,9 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
// for each source sentence, select the top beam_size items across all
// candidate sets.
while
(
NextItemSet
(
pre_ids
,
pre_scores
,
&
items
))
{
std
::
nth_element
(
std
::
begin
(
items
),
std
::
begin
(
items
)
+
beam_size_
,
std
::
end
(
items
),
[](
const
Item
&
a
,
const
Item
&
b
)
{
// TODO(superjom) make score's comparation customizable.
// partial sort in descending order
return
a
.
score
>
b
.
score
;
});
std
::
nth_element
(
std
::
begin
(
items
),
std
::
begin
(
items
)
+
beam_size_
,
std
::
end
(
items
),
[](
const
Item
&
a
,
const
Item
&
b
)
{
return
a
.
score
>
b
.
score
;
});
// prune the top beam_size items.
if
(
items
.
size
()
>
beam_size_
)
{
items
.
resize
(
beam_size_
);
...
...
@@ -218,16 +215,27 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
// inputs and outputs stored in proto
AddInput
(
"pre_ids"
,
"ids in the previous step"
);
AddInput
(
"pre_scores"
,
"accumulated scores in the previous step"
);
AddInput
(
"ids"
,
"a LoDTensor of shape of [None,k]"
);
AddInput
(
"pre_ids"
,
"(LoDTensor) The LoDTensor containing the selected ids at the "
"previous step. It should be a tensor with shape (batch_size, 1) "
"and lod `[[0, 1, ... , batch_size], [0, 1, ..., batch_size]]` at "
"thefirst step."
);
AddInput
(
"pre_scores"
,
"(LoDTensor) The LoDTensor containing the accumulated "
"scores corresponding to the selected ids at the previous step."
);
AddInput
(
"ids"
,
"(LoDTensor) The LoDTensor containing the candidates ids. Its "
"shape should be (batch_size * beam_size, K), where K supposed to "
"be beam_size."
);
AddInput
(
"scores"
,
"a LoDTensor that has the same shape and LoD with `ids`"
);
"(LoDTensor) The LodTensor containing the accumulated scores "
"corresponding to Input(ids) and its shape is the same as the "
"shape of Input(ids)."
);
AddOutput
(
"selected_ids"
,
"
a LoDTensor that stores the IDs selected by beam search
"
);
AddOutput
(
"selected_scores"
,
"a LoDTensor that has the same shape and LoD with `selected_ids`
"
);
"
A LodTensor that stores the IDs selected by beam search.
"
);
AddOutput
(
"selected_scores"
,
"A LoDTensor containing the accumulated scores corresponding to "
"Output(selected_ids).
"
);
// Attributes stored in AttributeMap
AddAttr
<
int
>
(
"level"
,
"the level of LoDTensor"
);
...
...
@@ -235,8 +243,21 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
int
>
(
"end_id"
,
"the token id which indicates the end of a sequence"
);
AddComment
(
"This is a beam search operator that help to generate sequences."
);
AddComment
(
R"DOC(
This operator does the search in beams for one time step.
Specifically, it selects the top-K candidate word ids of current step from
Input(ids) according to their Input(scores) for all source sentences,
where K is Attr(beam_size) and Input(ids), Input(scores) are predicted results
from the computation cell. Additionally, Input(pre_ids) and Input(pre_scores)
are the output of beam_search at previous step, they are needed for special use
to handle ended candidate translations. The paths linking prefixes and selected
candidates are organized and reserved in lod.
Note that the Input(scores) passed in should be accumulated scores, and
length penalty should be done with extra operators before calculating the
accumulated scores if needed, also suggest finding top-K before it and
using the top-K candidates following.
)DOC"
);
}
};
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
592f84a4
...
...
@@ -1687,6 +1687,40 @@ def layer_norm(input,
def
beam_search_decode
(
ids
,
scores
,
beam_size
,
end_id
,
name
=
None
):
"""
Beam Search Decode Layer. This layer constructs the full hypotheses for
each source sentence by walking back along the LoDTensorArray :attr:`ids`
whose lods can be used to restore the path in the beam search tree.
Please see the following demo for a fully beam search usage example:
fluid/tests/book/test_machine_translation.py
Args:
ids(Variable): The LodTensorArray variable containing the selected ids
of all steps.
scores(Variable): The LodTensorArray variable containing the selected
scores of all steps.
beam_size(int): The beam width used in beam search.
end_id(int): The id of end token.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The LodTensor pair containing the generated id sequences
\
and the corresponding scores. The shapes and lods of the two
\
LodTensor are same. The lod level is 2 and the two levels
\
separately indicate how many hypotheses each source sentence has
\
and how many ids each hypothesis has.
Examples:
.. code-block:: python
# Suppose `ids` and `scores` are LodTensorArray variables reserving
# the selected ids and scores of all steps
finished_ids, finished_scores = layers.beam_search_decode(
ids, scores, beam_size=5, end_id=0)
"""
helper
=
LayerHelper
(
'beam_search_decode'
,
**
locals
())
sentence_ids
=
helper
.
create_tmp_variable
(
dtype
=
ids
.
dtype
)
sentence_scores
=
helper
.
create_tmp_variable
(
dtype
=
ids
.
dtype
)
...
...
@@ -1928,10 +1962,83 @@ def sequence_expand(x, y, ref_level=-1, name=None):
return
tmp
def
beam_search
(
pre_ids
,
pre_scores
,
ids
,
scores
,
beam_size
,
end_id
,
level
=
0
):
'''
This function implements the beam search algorithm.
'''
def
beam_search
(
pre_ids
,
pre_scores
,
ids
,
scores
,
beam_size
,
end_id
,
level
=
0
,
name
=
None
):
"""
Beam Search Layer. This layer does the search in beams for one time step.
Specifically, it selects the top-K candidate word ids of current step from
:attr:`ids` according to their :attr:`scores` for all source sentences,
where K is :attr:`beam_size` and :attr:`ids, scores` are predicted results
from the computation cell. Additionally, :attr:`pre_ids` and
:attr:`pre_scores` are the output of beam_search at previous step, they are
needed for special use to handle ended candidate translations.
Note that the :attr:`scores` passed in should be accumulated scores, and
length penalty should be done with extra operators before calculating the
accumulated scores if needed, also suggest finding top-K before it and
using the top-K candidates following.
Please see the following demo for a fully beam search usage example:
fluid/tests/book/test_machine_translation.py
Args:
pre_ids(Variable): The LodTensor variable which is the output of
beam_search at previous step. It should be a LodTensor with shape
:math:`(batch_size, 1)` and lod
:math:`[[0, 1, ... , batch_size], [0, 1, ..., batch_size]]` at the
first step.
pre_scores(Variable): The LodTensor variable which is the output of
beam_search at previous step.
ids(Variable): The LodTensor variable containing the candidates ids.
Its shape should be :math:`(batch_size
\\
times beam_size, K)`,
where :math:`K` supposed to be :attr:`beam_size`.
scores(Variable): The LodTensor variable containing the accumulated
scores corresponding to :attr:`ids` and its shape is the same as
the shape of :attr:`ids`.
beam_size(int): The beam width used in beam search.
end_id(int): The id of end token.
level(int, default 0): It can be ignored and mustn't change currently.
It means the source level of lod, which is explained as following.
The lod level of :attr:`ids` should be 2. The first level is source
level which describes how many prefixes (branchs) for each source
sentece (beam), and the second level is sentence level which
describes how these candidates belong to the prefix. The paths
linking prefixes and selected candidates are organized and reserved
in lod.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The LodTensor pair containing the selected ids and the
\
corresponding scores.
Examples:
.. code-block:: python
# Suppose `probs` contains predicted results from the computation
# cell and `pre_ids` and `pre_scores` is the output of beam_search
# at previous step.
topk_scores, topk_indices = layers.topk(probs, k=beam_size)
accu_scores = layers.elementwise_add(
x=layers.log(x=topk_scores)),
y=layers.reshape(
pre_scores, shape=[-1]),
axis=0)
selected_ids, selected_scores = layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=beam_size,
end_id=end_id)
"""
helper
=
LayerHelper
(
'beam_search'
,
**
locals
())
score_type
=
scores
.
dtype
id_type
=
ids
.
dtype
...
...
python/paddle/fluid/tests/book/test_machine_translation.py
浏览文件 @
592f84a4
...
...
@@ -126,9 +126,19 @@ def decoder_decode(context, is_sparse):
current_score
=
pd
.
fc
(
input
=
current_state_with_lod
,
size
=
target_dict_dim
,
act
=
'softmax'
)
topk_scores
,
topk_indices
=
pd
.
topk
(
current_score
,
k
=
50
)
topk_scores
,
topk_indices
=
pd
.
topk
(
current_score
,
k
=
beam_size
)
# calculate accumulated scores after topk to reduce computation cost
accu_scores
=
pd
.
elementwise_add
(
x
=
pd
.
log
(
topk_scores
),
y
=
pd
.
reshape
(
pre_score
,
shape
=
[
-
1
]),
axis
=
0
)
selected_ids
,
selected_scores
=
pd
.
beam_search
(
pre_ids
,
topk_indices
,
topk_scores
,
beam_size
,
end_id
=
10
,
level
=
0
)
pre_ids
,
pre_score
,
topk_indices
,
accu_scores
,
beam_size
,
end_id
=
10
,
level
=
0
)
pd
.
increment
(
x
=
counter
,
value
=
1
,
in_place
=
True
)
...
...
@@ -140,7 +150,7 @@ def decoder_decode(context, is_sparse):
pd
.
less_than
(
x
=
counter
,
y
=
array_len
,
cond
=
cond
)
translation_ids
,
translation_scores
=
pd
.
beam_search_decode
(
ids
=
ids_array
,
scores
=
scores_array
)
ids
=
ids_array
,
scores
=
scores_array
,
beam_size
=
beam_size
,
end_id
=
10
)
# return init_ids, init_scores
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录