Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
24b00ac6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
24b00ac6
编写于
3月 20, 2017
作者:
Y
Yu Yang
提交者:
GitHub
3月 20, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1313 from emailweixu/memory.set_input
Make it possible to postpone setting the layer name for a memory.
上级
53090e34
94e38bb5
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
210 addition
and
27 deletion
+210
-27
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+29
-11
python/paddle/trainer_config_helpers/default_decorators.py
python/paddle/trainer_config_helpers/default_decorators.py
+3
-3
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+58
-12
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+107
-0
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
...le/trainer_config_helpers/tests/configs/test_rnn_group.py
+13
-1
未找到文件。
python/paddle/trainer/config_parser.py
浏览文件 @
24b00ac6
...
...
@@ -2222,7 +2222,10 @@ def Link(
# memory for recurrent layer group.
# *name* and *size* are actual layer's name and size.
# will return name of the memory,
# If *name* is None, need to provide *memory_name* and need to use
# SetMemoryInput() later to specify the layer which this memory remembers.
#
# return the name of the memory,
# use this name if you assign the memory as other layer's input
#
# boot frame of memory is zeroed by default,
...
...
@@ -2234,15 +2237,18 @@ def Link(
# can only be initailized by a *boot_layer* which is a sequence.
#
@
config_func
def
Memory
(
name
,
def
Memory
(
name
,
size
,
is_sequence
=
False
,
boot_layer
=
None
,
boot_bias
=
False
,
boot_bias_active_type
=
""
,
boot_with_const_id
=
None
,
):
agent_name
=
name
+
"+delay1"
boot_with_const_id
=
None
,
memory_name
=
None
):
if
not
memory_name
:
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
memory_name
=
name
+
"+delay1"
agent_name
=
memory_name
if
is_sequence
:
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
else
:
...
...
@@ -2250,6 +2256,7 @@ def Memory(
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
'Memory should be used in recurrent layer group only'
)
memory
=
g_current_submodel
.
memories
.
add
()
if
name
is
not
None
:
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
is_sequence
=
is_sequence
...
...
@@ -2274,6 +2281,17 @@ def Memory(
return
agent_name
@
config_func
def
SetMemoryInput
(
memory_name
,
layer_name
):
memory_name
=
MakeLayerNameInSubmodel
(
memory_name
)
layer_name
=
MakeLayerNameInSubmodel
(
layer_name
)
for
mem
in
g_current_submodel
.
memories
:
if
mem
.
link_name
==
memory_name
:
mem
.
layer_name
=
layer_name
return
logger
.
fatal
(
"Nonexistent memory name: "
+
memory_name
)
# Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)
...
...
python/paddle/trainer_config_helpers/default_decorators.py
浏览文件 @
24b00ac6
...
...
@@ -97,13 +97,13 @@ def reset_hook():
register_parse_config_hook
(
reset_hook
)
def
wrap_name_default
(
name_prefix
=
None
):
def
wrap_name_default
(
name_prefix
=
None
,
name_param
=
"name"
):
"""
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python
@
default_name
("some_name")
@
wrap_name_default
("some_name")
def func(name=None):
print name # name will never be None. If name is not set,
# name will be "some_name_%d"
...
...
@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None):
"""
factory
=
DefaultNameFactory
(
name_prefix
)
_name_factories
.
append
(
factory
)
return
wrap_param_default
([
"name"
],
factory
)
return
wrap_param_default
([
name_param
],
factory
)
def
wrap_param_attr_default
(
param_names
=
None
,
default_factory
=
None
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
24b00ac6
...
...
@@ -288,6 +288,14 @@ class LayerOutput(object):
"""
assert
False
,
"this method should not be invoked"
def
set_input
(
self
,
input
):
"""
Set the input for a memory layer. Can only be used for memory layer
"""
assert
isinstance
(
input
,
LayerOutput
)
assert
self
.
layer_type
==
LayerType
.
MEMORY
SetMemoryInput
(
self
.
name
,
input
.
name
)
ERROR_CLIPPING
=
'error_clipping_threshold'
DROPOUT
=
'drop_rate'
...
...
@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
size
=
a
.
size
)
@
wrap_name_default
(
"memory"
,
"memory_name"
)
def
memory
(
name
,
size
,
memory_name
=
None
,
is_seq
=
False
,
boot_layer
=
None
,
boot_bias
=
None
,
...
...
@@ -2782,14 +2792,32 @@ def memory(name,
If boot_layer is not null, the memory is just the boot_layer's output.
Set :code:`is_seq` is true boot layer is sequence.
The same name layer in recurrent group will set memory on each time
step.
:param name: memory's name.
.. code-block:: python
mem = memory(size=256, name='state')
state = fc_layer(input=mem, size=256, name='state')
If you do not want to specify the name, you can equivalently use set_input()
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
:type name: basestring
:param size: size of memory.
:type size: int
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
...
...
@@ -2811,13 +2839,21 @@ def memory(name,
boot_bias
=
ParamAttr
.
to_bias
(
boot_bias
)
assert
boot_layer
is
None
or
isinstance
(
boot_layer
,
LayerOutput
)
if
name
is
not
None
:
memory_name
=
None
agent_name
=
Memory
(
name
,
size
,
is_seq
,
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
,
boot_bias_active_type
.
name
,
boot_with_const_id
)
memory_name
=
Memory
(
name
,
size
,
is_sequence
=
is_seq
,
boot_layer
=
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
=
boot_bias
,
boot_bias_active_type
=
boot_bias_active_type
.
name
,
boot_with_const_id
=
boot_with_const_id
,
memory_name
=
memory_name
)
lout
=
LayerOutput
(
name
=
agent
_name
,
name
=
memory
_name
,
size
=
size
,
layer_type
=
LayerType
.
MEMORY
,
parents
=
[
boot_layer
]
if
boot_layer
is
not
None
else
None
)
...
...
@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None):
ipts
=
[
Input
(
input
.
name
),
Input
(
label
.
name
)]
parents
=
[
input
,
label
]
if
weight
is
not
None
:
assert
weight
.
layer_type
==
LayerType
.
DATA
assert
weight
.
size
==
1
ipts
.
append
(
Input
(
weight
.
name
))
parents
.
append
(
weight
)
return
ipts
,
parents
...
...
@@ -4946,7 +4982,12 @@ def lambda_cost(input,
@
wrap_name_default
()
@
layer_support
()
def
cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
layer_attr
=
None
):
def
cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
weight
=
None
,
layer_attr
=
None
):
"""
A loss layer for multi class entropy.
...
...
@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward.
:type coeff: float.
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:type weight: LayerOutout
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
ipts
,
parents
=
__cost_input__
(
input
,
label
,
weight
)
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY
,
inputs
=
[
input
.
name
,
label
.
name
]
,
inputs
=
ipts
,
coeff
=
coeff
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
[
input
,
label
],
size
=
1
)
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
parents
,
size
=
1
)
@
wrap_name_default
()
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
24b00ac6
...
...
@@ -331,6 +331,54 @@ layers {
}
trans_type: "non-seq"
}
layers {
name: "__recurrent_group_3__"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "seq_input@__recurrent_group_3__"
type: "scatter_agent"
size: 100
active_type: ""
}
layers {
name: "__memory_6__@__recurrent_group_3__"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__fc_layer_0__@__recurrent_group_3__"
type: "fc"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "seq_input@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
}
inputs {
input_layer_name: "__memory_6__@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
}
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
}
layers {
name: "__fc_layer_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__fc_layer_0__"
}
trans_type: "non-seq"
}
parameters {
name: "___mixed_0__.w0"
size: 40000
...
...
@@ -481,6 +529,36 @@ parameters {
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w1"
size: 40000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
size: 200
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 200
initial_strategy: 0
initial_smart: false
}
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
...
...
@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
sub_models {
name: "root"
layer_names: "seq_input"
...
...
@@ -510,6 +589,9 @@ sub_models {
layer_names: "__gru_group_0___recurrent_group"
layer_names: "__gru_group_0__"
layer_names: "__last_seq_3__"
layer_names: "__recurrent_group_3__"
layer_names: "__fc_layer_0__"
layer_names: "__last_seq_4__"
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
...
...
@@ -517,6 +599,7 @@ sub_models {
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
is_recurrent_layer_group: false
}
sub_models {
...
...
@@ -647,4 +730,28 @@ sub_models {
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
layer_names: "seq_input@__recurrent_group_3__"
layer_names: "__memory_6__@__recurrent_group_3__"
layer_names: "__fc_layer_0__@__recurrent_group_3__"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
浏览文件 @
24b00ac6
...
...
@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
return
rnn_simple
def
generate_rnn_simple_no_name
():
def
rnn_simple
(
s
):
m
=
memory
(
name
=
None
,
size
=
200
)
fc
=
fc_layer
(
input
=
[
s
,
m
],
size
=
200
)
m
.
set_input
(
fc
)
return
fc
return
rnn_simple
with
mixed_layer
()
as
lstm_param
:
# test lstm unit, rnn group
lstm_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
4
)
...
...
@@ -33,4 +43,6 @@ outputs(
last_seq
(
input
=
lstmemory_group
(
input
=
lstm_param
,
size
=
100
)),
last_seq
(
input
=
gru_group
(
input
=
gru_param
,
size
=
100
)))
input
=
gru_param
,
size
=
100
)),
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple_no_name
(),
input
=
seq
)),
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录