Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b22cd96a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b22cd96a
编写于
4月 11, 2017
作者:
J
jacquesqiao
提交者:
GitHub
4月 11, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1761 from jacquesqiao/beam_search
support Beam search in v2 api
上级
4b5a4322
b669b5fc
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
247 addition
and
109 deletion
+247
-109
demo/seqToseq/api_train_v2.py
demo/seqToseq/api_train_v2.py
+56
-25
python/paddle/v2/config_base.py
python/paddle/v2/config_base.py
+23
-8
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+166
-72
python/paddle/v2/topology.py
python/paddle/v2/topology.py
+2
-4
未找到文件。
demo/seqToseq/api_train_v2.py
浏览文件 @
b22cd96a
import
sys
import
paddle.v2
as
paddle
def
seqToseq_net
(
source_dict_dim
,
target_dict_dim
):
def
seqToseq_net
(
source_dict_dim
,
target_dict_dim
,
is_generating
=
False
):
### Network Architecture
word_vector_dim
=
512
# dimension of word vector
decoder_size
=
512
# dimension of hidden unit in GRU Decoder network
encoder_size
=
512
# dimension of hidden unit in GRU Encoder network
beam_size
=
3
max_length
=
250
#### Encoder
src_word_id
=
paddle
.
layer
.
data
(
name
=
'source_language_word'
,
...
...
@@ -67,30 +71,57 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2
=
paddle
.
layer
.
StaticInputV2
(
input
=
encoded_proj
,
is_seq
=
True
)
group_inputs
=
[
group_input1
,
group_input2
]
trg_embedding
=
paddle
.
layer
.
embedding
(
input
=
paddle
.
layer
.
data
(
name
=
'target_language_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
)),
size
=
word_vector_dim
,
param_attr
=
paddle
.
attr
.
ParamAttr
(
name
=
'_target_language_embedding'
))
group_inputs
.
append
(
trg_embedding
)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder
=
paddle
.
layer
.
recurrent_group
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
)
lbl
=
paddle
.
layer
.
data
(
name
=
'target_language_next_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder
,
label
=
lbl
)
return
cost
if
not
is_generating
:
trg_embedding
=
paddle
.
layer
.
embedding
(
input
=
paddle
.
layer
.
data
(
name
=
'target_language_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
)),
size
=
word_vector_dim
,
param_attr
=
paddle
.
attr
.
ParamAttr
(
name
=
'_target_language_embedding'
))
group_inputs
.
append
(
trg_embedding
)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder
=
paddle
.
layer
.
recurrent_group
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
)
lbl
=
paddle
.
layer
.
data
(
name
=
'target_language_next_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder
,
label
=
lbl
)
return
cost
else
:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding
=
paddle
.
layer
.
GeneratedInputV2
(
size
=
target_dict_dim
,
embedding_name
=
'_target_language_embedding'
,
embedding_size
=
word_vector_dim
)
group_inputs
.
append
(
trg_embedding
)
beam_gen
=
paddle
.
layer
.
beam_search
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
,
bos_id
=
0
,
eos_id
=
1
,
beam_size
=
beam_size
,
max_length
=
max_length
)
return
beam_gen
def
main
():
...
...
python/paddle/v2/config_base.py
浏览文件 @
b22cd96a
...
...
@@ -67,7 +67,16 @@ class Layer(object):
self
.
name
=
name
self
.
__context__
=
{}
self
.
__parent_layers__
=
parent_layers
self
.
__children_layers__
=
[]
# used for evaluator.
# some layer may have some extra parent layer
self
.
__extra_parent__
=
[]
# used for evaluator.
self
.
__children_layers__
=
[]
def
extra_parent
(
self
):
return
self
.
__extra_parent__
def
append_extra_parent
(
self
,
parent
):
self
.
__extra_parent__
.
append
(
parent
)
def
append_child
(
self
,
layer
,
parent_names
):
self
.
__children_layers__
.
append
((
layer
,
parent_names
))
...
...
@@ -78,14 +87,20 @@ class Layer(object):
"""
self
.
__context__
=
context
#
short cut if myself
is parsed before.
#
STEP: short cut if this layer
is parsed before.
if
self
.
context_name
()
in
context
:
if
self
.
use_context_name
():
return
context
[
self
.
context_name
()]
else
:
return
context
[
self
.
name
]
# parse parent before myself
# STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for
p
in
self
.
__extra_parent__
:
p
.
to_proto
(
context
=
context
)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs
=
dict
()
for
layer_name
in
self
.
__parent_layers__
:
if
not
isinstance
(
self
.
__parent_layers__
[
layer_name
],
...
...
@@ -97,14 +112,13 @@ class Layer(object):
self
.
__parent_layers__
[
layer_name
])
kwargs
[
layer_name
]
=
v1_layer
#
parse myself
.
#
STEP: parse myself and add myself into context
.
ret_val
=
self
.
to_proto_impl
(
**
kwargs
)
if
self
.
context_name
()
is
not
None
and
\
self
.
context_name
()
not
in
context
:
if
self
.
context_name
()
is
not
None
\
and
self
.
context_name
()
not
in
context
:
context
[
self
.
context_name
()]
=
ret_val
#
parse children
.
#
STEP: parse children that should be pased after this layer
.
for
layer
,
pnames
in
self
.
__children_layers__
:
drop
=
False
...
...
@@ -117,6 +131,7 @@ class Layer(object):
continue
layer
.
to_proto
(
context
=
context
)
# STEP: return v1 layer result
if
self
.
context_name
()
is
None
:
return
ret_val
elif
self
.
use_context_name
():
...
...
python/paddle/v2/layer.py
浏览文件 @
b22cd96a
...
...
@@ -33,22 +33,25 @@ The primary usage shows below.
import
collections
import
inspect
from
config_base
import
Layer
,
__convert_to_v2__
import
re
import
paddle.trainer_config_helpers
as
conf_helps
from
paddle.trainer.config_parser
import
\
RecurrentLayerGroupWithoutOutLinksBegin
,
RecurrentLayerGroupSetOutLink
,
\
RecurrentLayerGroupEnd
,
model_type
from
paddle.trainer_config_helpers.config_parser_utils
import
\
parse_network_config
as
__parse__
from
paddle.trainer_config_helpers.default_decorators
import
wrap_act_default
from
paddle.trainer_config_helpers.default_decorators
import
\
wrap_bias_attr_default
from
paddle.trainer_config_helpers.default_decorators
import
wrap_name_default
from
paddle.trainer_config_helpers.layers
import
RecurrentLayerGroupSetGenerator
,
Generator
from
paddle.trainer_config_helpers.layers
import
layer_support
from
paddle.trainer.config_parser
import
\
RecurrentLayerGroupWithoutOutLinksBegin
,
RecurrentLayerGroupSetOutLink
,
\
RecurrentLayerGroupEnd
,
model_type
import
activation
import
re
import
attr
import
data_type
from
config_base
import
Layer
,
__convert_to_v2__
__all__
=
[
'parse_network'
,
'data'
]
...
...
@@ -132,54 +135,23 @@ class DataLayerV2(Layer):
return
doc
class
WithExtraParent
(
Layer
):
def
extra_parent
(
self
):
return
self
.
__extra_parent__
def
__init__
(
self
,
name
=
None
,
parent_layers
=
None
):
self
.
__extra_parent__
=
[]
super
(
WithExtraParent
,
self
).
__init__
(
name
=
name
,
parent_layers
=
parent_layers
)
def
append_extra_parent
(
self
,
parent
):
self
.
__extra_parent__
.
append
(
parent
)
def
to_proto
(
self
,
context
):
class
MemoryV2
(
Layer
):
def
__init__
(
self
,
name
,
extra_input
=
None
,
**
kwargs
):
"""
function to set proto attribute
Init memory object, if memory is inited inside recurrent_group step
function, it may depend on a boot_layer that should be initialized
outside recurrent_group, so we:
1. add RecurrentLayerInput to extra_parent of self.
2. add boot_layer to the extra_parent of RecurrentLayerInput.
:param extra_input: list of RecurrentLayerInput
:type extra_input: [RecurrentLayerInput]
"""
kwargs
=
dict
()
for
p
in
self
.
__extra_parent__
:
p
.
to_proto
(
context
=
context
)
for
layer_name
in
self
.
__parent_layers__
:
if
not
isinstance
(
self
.
__parent_layers__
[
layer_name
],
collections
.
Sequence
):
v1_layer
=
self
.
__parent_layers__
[
layer_name
].
to_proto
(
context
=
context
)
else
:
v1_layer
=
map
(
lambda
x
:
x
.
to_proto
(
context
=
context
),
self
.
__parent_layers__
[
layer_name
])
kwargs
[
layer_name
]
=
v1_layer
if
self
.
context_name
()
is
None
:
return
self
.
to_proto_impl
(
context
=
context
,
**
kwargs
)
elif
self
.
context_name
()
not
in
context
:
context
[
self
.
context_name
()]
=
self
.
to_proto_impl
(
context
=
context
,
**
kwargs
)
if
self
.
use_context_name
():
return
context
[
self
.
context_name
()]
else
:
return
context
[
self
.
name
]
class
MemoryV2
(
WithExtraParent
):
def
__init__
(
self
,
name
,
**
kwargs
):
self
.
name
=
name
super
(
MemoryV2
,
self
).
__init__
(
name
=
name
,
parent_layers
=
dict
())
self
.
__kwargs__
=
kwargs
self
.
__boot_layer_name__
=
None
if
'boot_layer'
in
kwargs
:
begin_of_current_rnn
=
[]
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
...
...
@@ -202,11 +174,10 @@ class MemoryV2(WithExtraParent):
assert
begin_of_current_rnn
is
not
None
for
extra
in
begin_of_current_rnn
:
self
.
append_extra_parent
(
extra
)
assert
isinstance
(
extra
,
WithExtraParent
)
extra
.
append_extra_parent
(
kwargs
[
'boot_layer'
])
self
.
__boot_layer_name__
=
kwargs
[
'boot_layer'
].
name
def
to_proto_impl
(
self
,
context
,
**
kwargs
):
def
to_proto_impl
(
self
,
**
kwargs
):
args
=
dict
()
for
each
in
kwargs
:
args
[
each
]
=
kwargs
[
each
]
...
...
@@ -214,7 +185,7 @@ class MemoryV2(WithExtraParent):
args
[
each
]
=
self
.
__kwargs__
[
each
]
if
self
.
__boot_layer_name__
is
not
None
:
args
[
'boot_layer'
]
=
context
[
self
.
__boot_layer_name__
]
args
[
'boot_layer'
]
=
self
.
__context__
[
self
.
__boot_layer_name__
]
size
=
args
.
get
(
'size'
,
None
)
if
size
is
not
None
:
...
...
@@ -236,22 +207,6 @@ class MemoryV2(WithExtraParent):
return
True
class
LayerOutputV2
(
Layer
):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def
__init__
(
self
,
layer_output
):
assert
isinstance
(
layer_output
,
conf_helps
.
LayerOutput
)
self
.
layer_output
=
layer_output
super
(
LayerOutputV2
,
self
).
__init__
(
name
=
layer_output
.
name
,
parent_layers
=
dict
())
def
to_proto_impl
(
self
):
return
self
.
layer_output
class
StaticInputV2
(
object
):
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerV2
)
...
...
@@ -263,6 +218,66 @@ class StaticInputV2(object):
# assert input.size is not None or size is not None
class
BaseGeneratedInputV2
(
object
):
def
__init__
(
self
):
self
.
bos_id
=
None
self
.
eos_id
=
None
def
before_real_step
(
self
):
raise
NotImplementedError
()
def
after_real_step
(
self
,
*
args
):
raise
NotImplementedError
()
class
GeneratedInputV2
(
BaseGeneratedInputV2
):
def
__init__
(
self
,
size
,
embedding_name
,
embedding_size
):
super
(
GeneratedInputV2
,
self
).
__init__
()
self
.
size
=
size
self
.
embedding_name
=
embedding_name
self
.
embedding_size
=
embedding_size
def
after_real_step
(
self
,
input
):
return
max_id
(
input
=
input
,
name
=
'__beam_search_predict__'
)
def
before_real_step
(
self
):
predict_id
=
memory
(
name
=
'__beam_search_predict__'
,
size
=
self
.
size
,
boot_with_const_id
=
self
.
bos_id
)
trg_emb
=
embedding
(
input
=
predict_id
,
size
=
self
.
embedding_size
,
param_attr
=
attr
.
ParamAttr
(
name
=
self
.
embedding_name
))
return
trg_emb
class
RecurrentLayerGroupSetGeneratorV2
(
Layer
):
def
__init__
(
self
,
eos_name
,
max_length
,
beam_size
,
num_results_per_sample
):
self
.
eos_name
=
eos_name
self
.
max_length
=
max_length
self
.
beam_size
=
beam_size
self
.
num_results_per_sample
=
num_results_per_sample
super
(
RecurrentLayerGroupSetGeneratorV2
,
self
).
__init__
(
name
=
eos_name
,
parent_layers
=
{})
def
to_proto_impl
(
self
,
**
kwargs
):
RecurrentLayerGroupSetGenerator
(
Generator
(
eos_layer_name
=
self
.
eos_name
,
max_num_frames
=
self
.
max_length
,
beam_size
=
self
.
beam_size
,
num_results_per_sample
=
self
.
num_results_per_sample
))
return
self
def
context_name
(
self
):
return
self
.
eos_name
+
".fake"
def
use_context_name
(
self
):
return
True
class
MixedLayerV2
(
Layer
):
"""
This class is use to support `with` grammar. If not, the following code
...
...
@@ -341,18 +356,24 @@ def mixed(size=0,
return
MixedLayerV2
(
size
,
input
,
name
,
act
,
bias_attr
,
layer_attr
)
class
RecurrentLayerInput
(
WithExtraParent
):
class
RecurrentLayerInput
(
Layer
):
def
__init__
(
self
,
recurrent_name
,
index
,
parent_layers
):
assert
len
(
parent_layers
)
==
1
self
.
__parents__
=
parent_layers
.
values
()[
0
]
super
(
RecurrentLayerInput
,
self
).
__init__
(
name
=
self
.
__parents__
[
index
].
name
,
parent_layers
=
parent_layers
)
parents_len
=
len
(
parent_layers
)
assert
parents_len
<=
1
if
parents_len
==
0
:
self
.
__parents__
=
[]
else
:
self
.
__parents__
=
parent_layers
.
values
()[
0
]
self
.
__recurrent_name__
=
recurrent_name
name
=
self
.
__parents__
[
index
].
name
if
index
>=
0
else
self
.
context_name
()
super
(
RecurrentLayerInput
,
self
).
__init__
(
name
=
name
,
parent_layers
=
parent_layers
)
def
context_name
(
self
):
return
self
.
__recurrent_name__
+
".begin"
def
to_proto_impl
(
self
,
context
,
**
kwargs
):
def
to_proto_impl
(
self
,
**
kwargs
):
model_type
(
'recurrent_nn'
)
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
self
.
__recurrent_name__
,
...
...
@@ -449,6 +470,11 @@ def recurrent_group(step, input, name=None):
for
i
in
xrange
(
len
(
non_static_inputs
))
]
extra_input
=
None
if
len
(
non_static_inputs
)
==
0
:
extra_input
=
RecurrentLayerInput
(
recurrent_name
=
name
,
index
=-
1
,
parent_layers
=
{})
def
__real_step__
(
*
args
):
rnn_input
=
list
(
args
)
static_inputs
=
filter
(
lambda
x
:
isinstance
(
x
,
StaticInputV2
),
input
)
...
...
@@ -456,6 +482,7 @@ def recurrent_group(step, input, name=None):
mem_name
=
"__%s_memory__"
%
static_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
extra_input
=
extra_input
,
is_seq
=
static_input
.
is_seq
,
size
=
static_input
.
input
.
calculate_size
,
boot_layer
=
static_input
.
input
)
...
...
@@ -485,6 +512,73 @@ def recurrent_group(step, input, name=None):
return
retv
@
wrap_name_default
()
def
beam_search
(
step
,
input
,
bos_id
,
eos_id
,
beam_size
,
max_length
=
500
,
name
=
None
,
num_results_per_sample
=
None
):
if
num_results_per_sample
is
None
:
num_results_per_sample
=
beam_size
assert
num_results_per_sample
<=
beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if
isinstance
(
input
,
StaticInputV2
)
or
isinstance
(
input
,
BaseGeneratedInputV2
):
input
=
[
input
]
generated_input_index
=
-
1
real_input
=
[]
for
i
,
each_input
in
enumerate
(
input
):
assert
isinstance
(
each_input
,
StaticInputV2
)
or
isinstance
(
each_input
,
BaseGeneratedInputV2
)
if
isinstance
(
each_input
,
BaseGeneratedInputV2
):
assert
generated_input_index
==
-
1
generated_input_index
=
i
else
:
real_input
.
append
(
each_input
)
assert
generated_input_index
!=
-
1
gipt
=
input
[
generated_input_index
]
assert
isinstance
(
gipt
,
BaseGeneratedInputV2
)
gipt
.
bos_id
=
bos_id
gipt
.
eos_id
=
eos_id
def
__real_step__
(
*
args
):
eos_name
=
"__%s_eos_layer__"
%
name
generator
=
RecurrentLayerGroupSetGeneratorV2
(
eos_name
,
max_length
,
beam_size
,
num_results_per_sample
)
args
=
list
(
args
)
before_step_layer
=
gipt
.
before_real_step
()
before_step_layer
.
append_child
(
layer
=
generator
,
parent_names
=
[
before_step_layer
.
name
])
args
.
insert
(
generated_input_index
,
before_step_layer
)
predict
=
gipt
.
after_real_step
(
step
(
*
args
))
eos_layer
=
eos
(
input
=
predict
,
eos_id
=
eos_id
,
name
=
eos_name
)
predict
.
append_child
(
layer
=
eos_layer
,
parent_names
=
[
predict
.
name
])
return
predict
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp
=
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
name
=
name
)
return
tmp
__projection_names__
=
filter
(
lambda
x
:
x
.
endswith
(
'_projection'
),
dir
(
conf_helps
))
...
...
python/paddle/v2/topology.py
浏览文件 @
b22cd96a
...
...
@@ -17,7 +17,6 @@ import collections
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
import
layer
as
v2_layer
from
layer
import
WithExtraParent
__all__
=
[
'Topology'
]
...
...
@@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers):
__break__
=
callback
(
each_layer
)
if
__break__
:
return
__layers__
=
each_layer
.
__parent_layers__
.
values
()
if
isinstance
(
each_layer
,
WithExtraParent
):
__layers__
=
__layers__
+
each_layer
.
extra_parent
()
__layers__
=
each_layer
.
__parent_layers__
.
values
()
+
\
each_layer
.
extra_parent
()
__bfs_travel__
(
callback
,
*
__layers__
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录