Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
24b00ac6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
24b00ac6
编写于
3月 20, 2017
作者:
Y
Yu Yang
提交者:
GitHub
3月 20, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1313 from emailweixu/memory.set_input
Make it possible to postpone setting the layer name for a memory.
上级
53090e34
94e38bb5
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
210 addition
and
27 deletion
+210
-27
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+29
-11
python/paddle/trainer_config_helpers/default_decorators.py
python/paddle/trainer_config_helpers/default_decorators.py
+3
-3
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+58
-12
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+107
-0
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
...le/trainer_config_helpers/tests/configs/test_rnn_group.py
+13
-1
未找到文件。
python/paddle/trainer/config_parser.py
浏览文件 @
24b00ac6
...
...
@@ -2222,7 +2222,10 @@ def Link(
# memory for recurrent layer group.
# *name* and *size* are actual layer's name and size.
# will return name of the memory,
# If *name* is None, need to provide *memory_name* and need to use
# SetMemoryInput() later to specify the layer which this memory remembers.
#
# return the name of the memory,
# use this name if you assign the memory as other layer's input
#
# boot frame of memory is zeroed by default,
...
...
@@ -2234,15 +2237,18 @@ def Link(
# can only be initailized by a *boot_layer* which is a sequence.
#
@
config_func
def
Memory
(
name
,
size
,
is_sequence
=
False
,
boot_layer
=
None
,
boot_bias
=
False
,
boot_bias_active_type
=
""
,
boot_with_const_id
=
None
,
):
agent_name
=
name
+
"+delay1"
def
Memory
(
name
,
size
,
is_sequence
=
False
,
boot_layer
=
None
,
boot_bias
=
False
,
boot_bias_active_type
=
""
,
boot_with_const_id
=
None
,
memory_name
=
None
):
if
not
memory_name
:
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
memory_name
=
name
+
"+delay1"
agent_name
=
memory_name
if
is_sequence
:
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
else
:
...
...
@@ -2250,7 +2256,8 @@ def Memory(
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
'Memory should be used in recurrent layer group only'
)
memory
=
g_current_submodel
.
memories
.
add
()
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
if
name
is
not
None
:
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
is_sequence
=
is_sequence
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
...
...
@@ -2274,6 +2281,17 @@ def Memory(
return
agent_name
@
config_func
def
SetMemoryInput
(
memory_name
,
layer_name
):
memory_name
=
MakeLayerNameInSubmodel
(
memory_name
)
layer_name
=
MakeLayerNameInSubmodel
(
layer_name
)
for
mem
in
g_current_submodel
.
memories
:
if
mem
.
link_name
==
memory_name
:
mem
.
layer_name
=
layer_name
return
logger
.
fatal
(
"Nonexistent memory name: "
+
memory_name
)
# Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)
...
...
python/paddle/trainer_config_helpers/default_decorators.py
浏览文件 @
24b00ac6
...
...
@@ -97,13 +97,13 @@ def reset_hook():
register_parse_config_hook
(
reset_hook
)
def
wrap_name_default
(
name_prefix
=
None
):
def
wrap_name_default
(
name_prefix
=
None
,
name_param
=
"name"
):
"""
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python
@
default_name
("some_name")
@
wrap_name_default
("some_name")
def func(name=None):
print name # name will never be None. If name is not set,
# name will be "some_name_%d"
...
...
@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None):
"""
factory
=
DefaultNameFactory
(
name_prefix
)
_name_factories
.
append
(
factory
)
return
wrap_param_default
([
"name"
],
factory
)
return
wrap_param_default
([
name_param
],
factory
)
def
wrap_param_attr_default
(
param_names
=
None
,
default_factory
=
None
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
24b00ac6
...
...
@@ -288,6 +288,14 @@ class LayerOutput(object):
"""
assert
False
,
"this method should not be invoked"
def
set_input
(
self
,
input
):
"""
Set the input for a memory layer. Can only be used for memory layer
"""
assert
isinstance
(
input
,
LayerOutput
)
assert
self
.
layer_type
==
LayerType
.
MEMORY
SetMemoryInput
(
self
.
name
,
input
.
name
)
ERROR_CLIPPING
=
'error_clipping_threshold'
DROPOUT
=
'drop_rate'
...
...
@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
size
=
a
.
size
)
@
wrap_name_default
(
"memory"
,
"memory_name"
)
def
memory
(
name
,
size
,
memory_name
=
None
,
is_seq
=
False
,
boot_layer
=
None
,
boot_bias
=
None
,
...
...
@@ -2782,14 +2792,32 @@ def memory(name,
If boot_layer is not null, the memory is just the boot_layer's output.
Set :code:`is_seq` is true boot layer is sequence.
The same name layer in recurrent group will set memory on each time
step.
:param name: memory's name.
.. code-block:: python
mem = memory(size=256, name='state')
state = fc_layer(input=mem, size=256, name='state')
If you do not want to specify the name, you can equivalently use set_input()
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
:type name: basestring
:param size: size of memory.
:type size: int
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
...
...
@@ -2811,13 +2839,21 @@ def memory(name,
boot_bias
=
ParamAttr
.
to_bias
(
boot_bias
)
assert
boot_layer
is
None
or
isinstance
(
boot_layer
,
LayerOutput
)
if
name
is
not
None
:
memory_name
=
None
agent_name
=
Memory
(
name
,
size
,
is_seq
,
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
,
boot_bias_active_type
.
name
,
boot_with_const_id
)
memory_name
=
Memory
(
name
,
size
,
is_sequence
=
is_seq
,
boot_layer
=
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
=
boot_bias
,
boot_bias_active_type
=
boot_bias_active_type
.
name
,
boot_with_const_id
=
boot_with_const_id
,
memory_name
=
memory_name
)
lout
=
LayerOutput
(
name
=
agent
_name
,
name
=
memory
_name
,
size
=
size
,
layer_type
=
LayerType
.
MEMORY
,
parents
=
[
boot_layer
]
if
boot_layer
is
not
None
else
None
)
...
...
@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None):
ipts
=
[
Input
(
input
.
name
),
Input
(
label
.
name
)]
parents
=
[
input
,
label
]
if
weight
is
not
None
:
assert
weight
.
layer_type
==
LayerType
.
DATA
assert
weight
.
size
==
1
ipts
.
append
(
Input
(
weight
.
name
))
parents
.
append
(
weight
)
return
ipts
,
parents
...
...
@@ -4946,7 +4982,12 @@ def lambda_cost(input,
@
wrap_name_default
()
@
layer_support
()
def
cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
layer_attr
=
None
):
def
cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
weight
=
None
,
layer_attr
=
None
):
"""
A loss layer for multi class entropy.
...
...
@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward.
:type coeff: float.
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:type weight: LayerOutout
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
ipts
,
parents
=
__cost_input__
(
input
,
label
,
weight
)
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY
,
inputs
=
[
input
.
name
,
label
.
name
]
,
inputs
=
ipts
,
coeff
=
coeff
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
[
input
,
label
],
size
=
1
)
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
parents
,
size
=
1
)
@
wrap_name_default
()
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
24b00ac6
...
...
@@ -331,6 +331,54 @@ layers {
}
trans_type: "non-seq"
}
layers {
name: "__recurrent_group_3__"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "seq_input@__recurrent_group_3__"
type: "scatter_agent"
size: 100
active_type: ""
}
layers {
name: "__memory_6__@__recurrent_group_3__"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__fc_layer_0__@__recurrent_group_3__"
type: "fc"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "seq_input@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
}
inputs {
input_layer_name: "__memory_6__@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
}
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
}
layers {
name: "__fc_layer_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__fc_layer_0__"
}
trans_type: "non-seq"
}
parameters {
name: "___mixed_0__.w0"
size: 40000
...
...
@@ -481,6 +529,36 @@ parameters {
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w1"
size: 40000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
size: 200
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 200
initial_strategy: 0
initial_smart: false
}
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
...
...
@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
sub_models {
name: "root"
layer_names: "seq_input"
...
...
@@ -510,6 +589,9 @@ sub_models {
layer_names: "__gru_group_0___recurrent_group"
layer_names: "__gru_group_0__"
layer_names: "__last_seq_3__"
layer_names: "__recurrent_group_3__"
layer_names: "__fc_layer_0__"
layer_names: "__last_seq_4__"
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
...
...
@@ -517,6 +599,7 @@ sub_models {
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
is_recurrent_layer_group: false
}
sub_models {
...
...
@@ -647,4 +730,28 @@ sub_models {
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
layer_names: "seq_input@__recurrent_group_3__"
layer_names: "__memory_6__@__recurrent_group_3__"
layer_names: "__fc_layer_0__@__recurrent_group_3__"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
浏览文件 @
24b00ac6
...
...
@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
return
rnn_simple
def
generate_rnn_simple_no_name
():
def
rnn_simple
(
s
):
m
=
memory
(
name
=
None
,
size
=
200
)
fc
=
fc_layer
(
input
=
[
s
,
m
],
size
=
200
)
m
.
set_input
(
fc
)
return
fc
return
rnn_simple
with
mixed_layer
()
as
lstm_param
:
# test lstm unit, rnn group
lstm_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
4
)
...
...
@@ -33,4 +43,6 @@ outputs(
last_seq
(
input
=
lstmemory_group
(
input
=
lstm_param
,
size
=
100
)),
last_seq
(
input
=
gru_group
(
input
=
gru_param
,
size
=
100
)))
input
=
gru_param
,
size
=
100
)),
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple_no_name
(),
input
=
seq
)),
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录