Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6a6e1c74
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6a6e1c74
编写于
7月 20, 2017
作者:
C
Cao Ying
提交者:
GitHub
7月 20, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2944 from lcy-seso/fix_recurrent_parse_bug
fix recurrent_group parsing bug.
上级
75ad5a6a
1b6faffe
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
86 addition
and
97 deletion
+86
-97
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+41
-47
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+39
-45
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
...onfig_helpers/tests/configs/protostr/shared_lstm.protostr
+2
-2
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+1
-1
python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
...addle/trainer_config_helpers/tests/configs/shared_lstm.py
+3
-2
未找到文件。
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
6a6e1c74
...
...
@@ -3173,11 +3173,11 @@ def memory(name,
@
wrap_bias_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
,
'state_act'
],
act
=
Sigmoid
Activation
())
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
'state_act'
],
act
=
Tanh
Activation
())
@
wrap_act_default
(
act
=
TanhActivation
())
@
wrap_name_default
(
'lstm_step'
)
@
layer_support
()
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
lstm_step_layer
(
input
,
state
,
size
=
None
,
...
...
@@ -3531,12 +3531,7 @@ def SubsequenceInput(input):
@
wrap_name_default
(
"recurrent_group"
)
def
recurrent_group
(
step
,
input
,
reverse
=
False
,
name
=
None
,
targetInlink
=
None
,
is_generating
=
False
):
def
recurrent_group
(
step
,
input
,
reverse
=
False
,
name
=
None
,
targetInlink
=
None
):
"""
Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a
...
...
@@ -3602,21 +3597,12 @@ def recurrent_group(step,
:type targetInlink: LayerOutput|SubsequenceInput
:param is_generating: If is generating, none of input type should be LayerOutput;
else, for training or testing, one of the input type must
be LayerOutput.
:type is_generating: bool
:return: LayerOutput object.
:rtype: LayerOutput
"""
model_type
(
'recurrent_nn'
)
def
is_single_input
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
if
is_single_input
(
input
):
if
isinstance
(
input
,
LayerOutput
)
or
isinstance
(
input
,
StaticInput
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
...
...
@@ -3630,13 +3616,8 @@ def recurrent_group(step,
in_links
=
map
(
lambda
x
:
x
.
name
,
in_links
),
seq_reversed
=
reverse
)
in_args
=
[]
has_LayerOutput
=
False
for
each_input
in
input
:
assert
is_single_input
(
each_input
)
if
isinstance
(
each_input
,
LayerOutput
):
in_args
.
append
(
each_input
)
has_LayerOutput
=
True
else
:
# StaticInput
if
isinstance
(
each_input
,
StaticInput
):
# StaticInput
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
name
=
None
,
...
...
@@ -3644,24 +3625,26 @@ def recurrent_group(step,
boot_layer
=
each_input
.
input
)
mem
.
set_input
(
mem
)
in_args
.
append
(
mem
)
assert
(
is_generating
!=
has_LayerOut
put
)
else
:
in_args
.
append
(
each_in
put
)
layer_outs
=
step
(
*
in_args
)
if
isinstance
(
layer_outs
,
LayerOutput
):
layer_outs
=
[
layer_outs
]
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
for
layer_out
in
layer_outs
:
assert
isinstance
(
layer_out
,
LayerOutput
),
"Type of step function's return value must be LayerOutput."
layer_out
.
reverse
=
reverse
RecurrentLayerGroupSetOutLink
(
layer_out
.
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
for
layer_out
in
layer_outs
:
# The
e previous full_name is the name is the rnn group
# We need a full_name outside the r
nn group
# The
previous full_name is the name inside the recurrent group.
# We need a full_name outside the r
ecurrent group.
layer_out
.
full_name
=
MakeLayerNameInSubmodel
(
layer_out
.
name
)
if
len
(
layer_outs
)
==
1
:
...
...
@@ -3684,7 +3667,20 @@ class BaseGeneratedInput(object):
class
GeneratedInput
(
BaseGeneratedInput
):
def
after_real_step
(
self
,
input
):
return
maxid_layer
(
input
=
input
,
name
=
'__beam_search_predict__'
)
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
elif
isinstance
(
input
,
collections
.
Sequence
):
input
=
list
(
input
)
if
len
(
input
)
>
1
:
logger
.
info
(
(
"More than one layers inside the recurrent_group "
"are returned as outputs of the entire recurrent_group "
"PLEASE garantee the first output is probability of "
"the predicted next word."
))
return
[
maxid_layer
(
input
=
input
[
0
],
name
=
'__beam_search_predict__'
)]
+
(
input
[
1
:]
if
len
(
input
)
>
1
else
[])
def
before_real_step
(
self
):
predict_id
=
memory
(
...
...
@@ -3871,6 +3867,7 @@ def beam_search(step,
:type step: callable
:param input: Input data for the recurrent unit, which should include the
previously generated words as a GeneratedInput object.
In beam_search, none of the input's type should be LayerOutput.
:type input: list
:param bos_id: Index of the start symbol in the dictionary. The start symbol
is a special token for NLP task, which indicates the
...
...
@@ -3912,15 +3909,18 @@ def beam_search(step,
real_input
=
[]
for
i
,
each_input
in
enumerate
(
input
):
assert
isinstance
(
each_input
,
StaticInput
)
or
isinstance
(
each_input
,
BaseGeneratedInput
)
assert
not
isinstance
(
each_input
,
LayerOutput
),
(
"in beam_search, "
"none of the input should has a type of LayerOutput."
)
if
isinstance
(
each_input
,
BaseGeneratedInput
):
assert
generated_input_index
==
-
1
assert
generated_input_index
==
-
1
,
(
"recurrent_group accepts "
"only one GeneratedInput."
)
generated_input_index
=
i
else
:
real_input
.
append
(
each_input
)
assert
generated_input_index
!=
-
1
assert
generated_input_index
!=
-
1
,
"No GeneratedInput is given."
gipt
=
input
[
generated_input_index
]
...
...
@@ -3941,17 +3941,11 @@ def beam_search(step,
predict
=
gipt
.
after_real_step
(
step
(
*
args
))
eos_layer
(
input
=
predict
,
eos_id
=
eos_id
,
name
=
eos_name
)
eos_layer
(
input
=
predict
[
0
]
,
eos_id
=
eos_id
,
name
=
eos_name
)
return
predict
tmp
=
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
reverse
=
False
,
name
=
name
,
is_generating
=
True
)
return
tmp
return
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
reverse
=
False
,
name
=
name
)
def
__cost_input__
(
input
,
label
,
weight
=
None
):
...
...
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
6a6e1c74
...
...
@@ -614,18 +614,17 @@ def simple_lstm(input,
@
wrap_name_default
(
'lstm_unit'
)
def
lstmemory_unit
(
input
,
memory_boot
=
None
,
out_memory
=
None
,
name
=
None
,
size
=
None
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
input_proj_bias_attr
=
None
,
input_proj_layer_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
get_output_layer_attr
=
None
):
lstm_layer_attr
=
None
):
"""
Define calculations that a LSTM unit performs during a single time step.
This function itself is not a recurrent layer, so it can not be
...
...
@@ -662,8 +661,8 @@ def lstmemory_unit(input,
:param input: input layer name.
:type input: LayerOutput
:param
memory_boot: the initialization state of the LSTM cell.
:type
memory_boot
: LayerOutput | None
:param
out_memory: output of previous time step
:type
out_memory
: LayerOutput | None
:param name: lstmemory unit name.
:type name: basestring
:param size: lstmemory unit size.
...
...
@@ -676,33 +675,35 @@ def lstmemory_unit(input,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param
mixed_bias_attr: bias parameter attribute of mixed layer
.
:param
input_proj_bias_attr: bias attribute for input-to-hidden projection
.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: lstmemory unit name.
:rtype: LayerOutput
"""
if
size
is
None
:
assert
input
.
size
%
4
==
0
size
=
input
.
size
/
4
if
out_memory
is
None
:
out_mem
=
memory
(
name
=
name
,
size
=
size
)
state_mem
=
memory
(
name
=
"%s_state"
%
name
,
size
=
size
,
boot_layer
=
memory_boot
)
else
:
out_mem
=
out_memory
state_mem
=
memory
(
name
=
"%s_state"
%
name
,
size
=
size
)
with
mixed_layer
(
name
=
"%s_input_recurrent"
%
name
,
size
=
size
*
4
,
bias_attr
=
mixed
_bias_attr
,
layer_attr
=
mixed
_layer_attr
,
bias_attr
=
input_proj
_bias_attr
,
layer_attr
=
input_proj
_layer_attr
,
act
=
IdentityActivation
())
as
m
:
m
+=
identity_projection
(
input
=
input
)
m
+=
full_matrix_projection
(
input
=
out_mem
,
param_attr
=
param_attr
)
...
...
@@ -717,11 +718,7 @@ def lstmemory_unit(input,
gate_act
=
gate_act
,
state_act
=
state_act
,
layer_attr
=
lstm_layer_attr
)
get_output_layer
(
name
=
'%s_state'
%
name
,
input
=
lstm_out
,
arg_name
=
'state'
,
layer_attr
=
get_output_layer_attr
)
get_output_layer
(
name
=
'%s_state'
%
name
,
input
=
lstm_out
,
arg_name
=
'state'
)
return
lstm_out
...
...
@@ -730,17 +727,16 @@ def lstmemory_unit(input,
def
lstmemory_group
(
input
,
size
=
None
,
name
=
None
,
memory_boot
=
None
,
out_memory
=
None
,
reverse
=
False
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
input_proj_bias_attr
=
None
,
input_proj_layer_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
get_output_layer_attr
=
None
):
lstm_layer_attr
=
None
):
"""
lstm_group is a recurrent_group version of Long Short Term Memory. It
does exactly the same calculation as the lstmemory layer (see lstmemory in
...
...
@@ -774,8 +770,8 @@ def lstmemory_group(input,
:type size: int
:param name: name of the lstmemory group.
:type name: basestring
:param
memory_boot: the initialization state of LSTM cell.
:type
memory_boot
: LayerOutput | None
:param
out_memory: output of previous time step
:type
out_memory
: LayerOutput | None
:param reverse: is lstm reversed
:type reverse: bool
:param param_attr: Parameter config, None if use default.
...
...
@@ -786,18 +782,17 @@ def lstmemory_group(input,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param input_proj_bias_attr: bias attribute for input-to-hidden projection.
False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: the lstmemory group.
:rtype: LayerOutput
"""
...
...
@@ -805,18 +800,17 @@ def lstmemory_group(input,
def
__lstm_step__
(
ipt
):
return
lstmemory_unit
(
input
=
ipt
,
memory_boot
=
memory_boot
,
name
=
name
,
size
=
size
,
mixed_bias_attr
=
mixed_bias_attr
,
mixed_layer_attr
=
mixed_layer_attr
,
param_attr
=
param_attr
,
lstm_bias_attr
=
lstm_bias_attr
,
act
=
act
,
gate_act
=
gate_act
,
state_act
=
state_act
,
out_memory
=
out_memory
,
input_proj_bias_attr
=
input_proj_bias_attr
,
input_proj_layer_attr
=
input_proj_layer_attr
,
param_attr
=
param_attr
,
lstm_layer_attr
=
lstm_layer_attr
,
get_output_layer_attr
=
get_output_layer
_attr
)
lstm_bias_attr
=
lstm_bias
_attr
)
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
浏览文件 @
6a6e1c74
...
...
@@ -104,7 +104,7 @@ layers {
}
bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid"
active_state_type: "
sigmoid
"
active_state_type: "
tanh
"
}
layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
...
...
@@ -183,7 +183,7 @@ layers {
}
bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid"
active_state_type: "
sigmoid
"
active_state_type: "
tanh
"
}
layers {
name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
6a6e1c74
...
...
@@ -258,7 +258,7 @@ layers {
}
bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
active_gate_type: "sigmoid"
active_state_type: "
sigmoid
"
active_state_type: "
tanh
"
}
layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
...
...
python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
浏览文件 @
6a6e1c74
...
...
@@ -20,12 +20,13 @@ lstm1 = lstmemory_group(
input
=
m1
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed_bias_attr
=
False
)
input_proj_bias_attr
=
False
)
lstm2
=
lstmemory_group
(
input
=
m2
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed
_bias_attr
=
False
)
input_proj
_bias_attr
=
False
)
softmax_param
=
ParamAttr
(
name
=
'softmax_param'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录