Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
6a6e1c74
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6a6e1c74
编写于
7月 20, 2017
作者:
C
Cao Ying
提交者:
GitHub
7月 20, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2944 from lcy-seso/fix_recurrent_parse_bug
fix recurrent_group parsing bug.
上级
75ad5a6a
1b6faffe
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
86 addition
and
97 deletion
+86
-97
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+41
-47
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+39
-45
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
...onfig_helpers/tests/configs/protostr/shared_lstm.protostr
+2
-2
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+1
-1
python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
...addle/trainer_config_helpers/tests/configs/shared_lstm.py
+3
-2
未找到文件。
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
6a6e1c74
...
...
@@ -3173,11 +3173,11 @@ def memory(name,
@
wrap_bias_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
,
'state_act'
],
act
=
Sigmoid
Activation
())
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
'state_act'
],
act
=
Tanh
Activation
())
@
wrap_act_default
(
act
=
TanhActivation
())
@
wrap_name_default
(
'lstm_step'
)
@
layer_support
()
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
lstm_step_layer
(
input
,
state
,
size
=
None
,
...
...
@@ -3531,12 +3531,7 @@ def SubsequenceInput(input):
@
wrap_name_default
(
"recurrent_group"
)
def
recurrent_group
(
step
,
input
,
reverse
=
False
,
name
=
None
,
targetInlink
=
None
,
is_generating
=
False
):
def
recurrent_group
(
step
,
input
,
reverse
=
False
,
name
=
None
,
targetInlink
=
None
):
"""
Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a
...
...
@@ -3602,21 +3597,12 @@ def recurrent_group(step,
:type targetInlink: LayerOutput|SubsequenceInput
:param is_generating: If is generating, none of input type should be LayerOutput;
else, for training or testing, one of the input type must
be LayerOutput.
:type is_generating: bool
:return: LayerOutput object.
:rtype: LayerOutput
"""
model_type
(
'recurrent_nn'
)
def
is_single_input
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
if
is_single_input
(
input
):
if
isinstance
(
input
,
LayerOutput
)
or
isinstance
(
input
,
StaticInput
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
...
...
@@ -3630,13 +3616,8 @@ def recurrent_group(step,
in_links
=
map
(
lambda
x
:
x
.
name
,
in_links
),
seq_reversed
=
reverse
)
in_args
=
[]
has_LayerOutput
=
False
for
each_input
in
input
:
assert
is_single_input
(
each_input
)
if
isinstance
(
each_input
,
LayerOutput
):
in_args
.
append
(
each_input
)
has_LayerOutput
=
True
else
:
# StaticInput
if
isinstance
(
each_input
,
StaticInput
):
# StaticInput
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
name
=
None
,
...
...
@@ -3644,24 +3625,26 @@ def recurrent_group(step,
boot_layer
=
each_input
.
input
)
mem
.
set_input
(
mem
)
in_args
.
append
(
mem
)
assert
(
is_generating
!=
has_LayerOut
put
)
else
:
in_args
.
append
(
each_in
put
)
layer_outs
=
step
(
*
in_args
)
if
isinstance
(
layer_outs
,
LayerOutput
):
layer_outs
=
[
layer_outs
]
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
for
layer_out
in
layer_outs
:
assert
isinstance
(
layer_out
,
LayerOutput
),
"Type of step function's return value must be LayerOutput."
layer_out
.
reverse
=
reverse
RecurrentLayerGroupSetOutLink
(
layer_out
.
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
for
layer_out
in
layer_outs
:
# The
e previous full_name is the name is the rnn group
# We need a full_name outside the r
nn group
# The
previous full_name is the name inside the recurrent group.
# We need a full_name outside the r
ecurrent group.
layer_out
.
full_name
=
MakeLayerNameInSubmodel
(
layer_out
.
name
)
if
len
(
layer_outs
)
==
1
:
...
...
@@ -3684,7 +3667,20 @@ class BaseGeneratedInput(object):
class
GeneratedInput
(
BaseGeneratedInput
):
def
after_real_step
(
self
,
input
):
return
maxid_layer
(
input
=
input
,
name
=
'__beam_search_predict__'
)
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
elif
isinstance
(
input
,
collections
.
Sequence
):
input
=
list
(
input
)
if
len
(
input
)
>
1
:
logger
.
info
(
(
"More than one layers inside the recurrent_group "
"are returned as outputs of the entire recurrent_group "
"PLEASE garantee the first output is probability of "
"the predicted next word."
))
return
[
maxid_layer
(
input
=
input
[
0
],
name
=
'__beam_search_predict__'
)]
+
(
input
[
1
:]
if
len
(
input
)
>
1
else
[])
def
before_real_step
(
self
):
predict_id
=
memory
(
...
...
@@ -3871,6 +3867,7 @@ def beam_search(step,
:type step: callable
:param input: Input data for the recurrent unit, which should include the
previously generated words as a GeneratedInput object.
In beam_search, none of the input's type should be LayerOutput.
:type input: list
:param bos_id: Index of the start symbol in the dictionary. The start symbol
is a special token for NLP task, which indicates the
...
...
@@ -3912,15 +3909,18 @@ def beam_search(step,
real_input
=
[]
for
i
,
each_input
in
enumerate
(
input
):
assert
isinstance
(
each_input
,
StaticInput
)
or
isinstance
(
each_input
,
BaseGeneratedInput
)
assert
not
isinstance
(
each_input
,
LayerOutput
),
(
"in beam_search, "
"none of the input should has a type of LayerOutput."
)
if
isinstance
(
each_input
,
BaseGeneratedInput
):
assert
generated_input_index
==
-
1
assert
generated_input_index
==
-
1
,
(
"recurrent_group accepts "
"only one GeneratedInput."
)
generated_input_index
=
i
else
:
real_input
.
append
(
each_input
)
assert
generated_input_index
!=
-
1
assert
generated_input_index
!=
-
1
,
"No GeneratedInput is given."
gipt
=
input
[
generated_input_index
]
...
...
@@ -3941,17 +3941,11 @@ def beam_search(step,
predict
=
gipt
.
after_real_step
(
step
(
*
args
))
eos_layer
(
input
=
predict
,
eos_id
=
eos_id
,
name
=
eos_name
)
eos_layer
(
input
=
predict
[
0
]
,
eos_id
=
eos_id
,
name
=
eos_name
)
return
predict
tmp
=
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
reverse
=
False
,
name
=
name
,
is_generating
=
True
)
return
tmp
return
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
reverse
=
False
,
name
=
name
)
def
__cost_input__
(
input
,
label
,
weight
=
None
):
...
...
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
6a6e1c74
...
...
@@ -614,18 +614,17 @@ def simple_lstm(input,
@
wrap_name_default
(
'lstm_unit'
)
def
lstmemory_unit
(
input
,
memory_boot
=
None
,
out_memory
=
None
,
name
=
None
,
size
=
None
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
input_proj_bias_attr
=
None
,
input_proj_layer_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
get_output_layer_attr
=
None
):
lstm_layer_attr
=
None
):
"""
Define calculations that a LSTM unit performs during a single time step.
This function itself is not a recurrent layer, so it can not be
...
...
@@ -662,8 +661,8 @@ def lstmemory_unit(input,
:param input: input layer name.
:type input: LayerOutput
:param
memory_boot: the initialization state of the LSTM cell.
:type
memory_boot
: LayerOutput | None
:param
out_memory: output of previous time step
:type
out_memory
: LayerOutput | None
:param name: lstmemory unit name.
:type name: basestring
:param size: lstmemory unit size.
...
...
@@ -676,33 +675,35 @@ def lstmemory_unit(input,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param
mixed_bias_attr: bias parameter attribute of mixed layer
.
:param
input_proj_bias_attr: bias attribute for input-to-hidden projection
.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: lstmemory unit name.
:rtype: LayerOutput
"""
if
size
is
None
:
assert
input
.
size
%
4
==
0
size
=
input
.
size
/
4
if
out_memory
is
None
:
out_mem
=
memory
(
name
=
name
,
size
=
size
)
state_mem
=
memory
(
name
=
"%s_state"
%
name
,
size
=
size
,
boot_layer
=
memory_boot
)
else
:
out_mem
=
out_memory
state_mem
=
memory
(
name
=
"%s_state"
%
name
,
size
=
size
)
with
mixed_layer
(
name
=
"%s_input_recurrent"
%
name
,
size
=
size
*
4
,
bias_attr
=
mixed
_bias_attr
,
layer_attr
=
mixed
_layer_attr
,
bias_attr
=
input_proj
_bias_attr
,
layer_attr
=
input_proj
_layer_attr
,
act
=
IdentityActivation
())
as
m
:
m
+=
identity_projection
(
input
=
input
)
m
+=
full_matrix_projection
(
input
=
out_mem
,
param_attr
=
param_attr
)
...
...
@@ -717,11 +718,7 @@ def lstmemory_unit(input,
gate_act
=
gate_act
,
state_act
=
state_act
,
layer_attr
=
lstm_layer_attr
)
get_output_layer
(
name
=
'%s_state'
%
name
,
input
=
lstm_out
,
arg_name
=
'state'
,
layer_attr
=
get_output_layer_attr
)
get_output_layer
(
name
=
'%s_state'
%
name
,
input
=
lstm_out
,
arg_name
=
'state'
)
return
lstm_out
...
...
@@ -730,17 +727,16 @@ def lstmemory_unit(input,
def
lstmemory_group
(
input
,
size
=
None
,
name
=
None
,
memory_boot
=
None
,
out_memory
=
None
,
reverse
=
False
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
input_proj_bias_attr
=
None
,
input_proj_layer_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
get_output_layer_attr
=
None
):
lstm_layer_attr
=
None
):
"""
lstm_group is a recurrent_group version of Long Short Term Memory. It
does exactly the same calculation as the lstmemory layer (see lstmemory in
...
...
@@ -774,8 +770,8 @@ def lstmemory_group(input,
:type size: int
:param name: name of the lstmemory group.
:type name: basestring
:param
memory_boot: the initialization state of LSTM cell.
:type
memory_boot
: LayerOutput | None
:param
out_memory: output of previous time step
:type
out_memory
: LayerOutput | None
:param reverse: is lstm reversed
:type reverse: bool
:param param_attr: Parameter config, None if use default.
...
...
@@ -786,18 +782,17 @@ def lstmemory_group(input,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param input_proj_bias_attr: bias attribute for input-to-hidden projection.
False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: the lstmemory group.
:rtype: LayerOutput
"""
...
...
@@ -805,18 +800,17 @@ def lstmemory_group(input,
def
__lstm_step__
(
ipt
):
return
lstmemory_unit
(
input
=
ipt
,
memory_boot
=
memory_boot
,
name
=
name
,
size
=
size
,
mixed_bias_attr
=
mixed_bias_attr
,
mixed_layer_attr
=
mixed_layer_attr
,
param_attr
=
param_attr
,
lstm_bias_attr
=
lstm_bias_attr
,
act
=
act
,
gate_act
=
gate_act
,
state_act
=
state_act
,
out_memory
=
out_memory
,
input_proj_bias_attr
=
input_proj_bias_attr
,
input_proj_layer_attr
=
input_proj_layer_attr
,
param_attr
=
param_attr
,
lstm_layer_attr
=
lstm_layer_attr
,
get_output_layer_attr
=
get_output_layer
_attr
)
lstm_bias_attr
=
lstm_bias
_attr
)
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
浏览文件 @
6a6e1c74
...
...
@@ -104,7 +104,7 @@ layers {
}
bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid"
active_state_type: "
sigmoid
"
active_state_type: "
tanh
"
}
layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
...
...
@@ -183,7 +183,7 @@ layers {
}
bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid"
active_state_type: "
sigmoid
"
active_state_type: "
tanh
"
}
layers {
name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
6a6e1c74
...
...
@@ -258,7 +258,7 @@ layers {
}
bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
active_gate_type: "sigmoid"
active_state_type: "
sigmoid
"
active_state_type: "
tanh
"
}
layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
...
...
python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
浏览文件 @
6a6e1c74
...
...
@@ -20,12 +20,13 @@ lstm1 = lstmemory_group(
input
=
m1
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed_bias_attr
=
False
)
input_proj_bias_attr
=
False
)
lstm2
=
lstmemory_group
(
input
=
m2
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed
_bias_attr
=
False
)
input_proj
_bias_attr
=
False
)
softmax_param
=
ParamAttr
(
name
=
'softmax_param'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录