Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
c1f9cd9d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c1f9cd9d
编写于
2月 01, 2017
作者:
E
emailweixu
提交者:
GitHub
2月 01, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1241 from wangyang59/rnnParaShare
make gru_group parameters sharable
上级
650f7791
0bd67524
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
357 addition
and
11 deletion
+357
-11
paddle/gserver/layers/GruStepLayer.cpp
paddle/gserver/layers/GruStepLayer.cpp
+2
-2
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+2
-2
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+1
-1
python/paddle/trainer/recurrent_units.py
python/paddle/trainer/recurrent_units.py
+3
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+3
-1
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+6
-0
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
.../paddle/trainer_config_helpers/tests/configs/file_list.sh
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
...config_helpers/tests/configs/protostr/shared_gru.protostr
+295
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+4
-4
python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
...paddle/trainer_config_helpers/tests/configs/shared_gru.py
+40
-0
未找到文件。
paddle/gserver/layers/GruStepLayer.cpp
浏览文件 @
c1f9cd9d
...
...
@@ -68,8 +68,8 @@ bool GruStepLayer::init(const LayerMap& layerMap,
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
return
false
;
CHECK_EQ
(
2U
,
inputLayers_
.
size
());
CHECK_EQ
(
getSize
()
*
getSize
()
*
3
,
parameters_
[
0
]
->
getSize
());
weight_
.
reset
(
new
Weight
(
getSize
(),
getSize
()
*
3
,
parameters_
[
0
]));
CHECK_EQ
(
getSize
()
*
getSize
()
*
3
,
parameters_
[
1
]
->
getSize
());
weight_
.
reset
(
new
Weight
(
getSize
(),
getSize
()
*
3
,
parameters_
[
1
]));
if
(
biasParameter_
.
get
()
!=
NULL
)
{
CHECK_EQ
(
getSize
()
*
3
,
biasParameter_
->
getSize
());
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
c1f9cd9d
...
...
@@ -1404,9 +1404,9 @@ TEST(Layer, GruStepLayer) {
config
.
biasSize
=
12
;
config
.
inputDefs
.
push_back
(
{
INPUT_DATA
,
"layer_0"
,
/* dim= */
12
,
/* paraSize= */
48
});
{
INPUT_DATA
,
"layer_0"
,
/* dim= */
12
,
/* paraSize= */
0
});
config
.
inputDefs
.
push_back
(
{
INPUT_DATA
,
"layer_1"
,
/* dim= */
4
,
/* paraSize= */
0
});
{
INPUT_DATA
,
"layer_1"
,
/* dim= */
4
,
/* paraSize= */
48
});
config
.
layerConfig
.
add_inputs
();
config
.
layerConfig
.
add_inputs
();
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
c1f9cd9d
...
...
@@ -2996,7 +2996,7 @@ class GruStepLayer(LayerBase):
config_assert
(
input_layer1
.
size
==
size
,
'input_layer1.size != layer.size'
)
self
.
config
.
active_gate_type
=
active_gate_type
self
.
create_input_parameter
(
0
,
size
*
size
*
3
,
[
size
,
size
*
3
])
self
.
create_input_parameter
(
1
,
size
*
size
*
3
,
[
size
,
size
*
3
])
self
.
create_bias_parameter
(
bias
,
size
*
3
)
...
...
python/paddle/trainer/recurrent_units.py
浏览文件 @
c1f9cd9d
...
...
@@ -19,6 +19,9 @@
# to use these units, import this module in your config_file:
# import trainer.recurrent_units
#
# The modules in this file are DEPRECATED.
# If you would like to use lstm/gru
# please use the functions defined in paddle.trainer_config_helpers.
from
paddle.trainer.config_parser
import
*
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
c1f9cd9d
...
...
@@ -2682,6 +2682,7 @@ def lstm_step_layer(input,
@
wrap_bias_attr_default
()
@
wrap_param_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
act
=
TanhActivation
())
@
wrap_name_default
(
'gru_step'
)
...
...
@@ -2693,6 +2694,7 @@ def gru_step_layer(input,
name
=
None
,
gate_act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
"""
...
...
@@ -2714,7 +2716,7 @@ def gru_step_layer(input,
Layer
(
name
=
name
,
type
=
LayerType
.
GRU_STEP_LAYER
,
inputs
=
[
input
.
name
,
output_mem
.
name
],
inputs
=
[
input
.
name
,
Input
(
output_mem
.
name
,
**
param_attr
.
attr
)
],
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
size
=
size
,
active_type
=
act
.
name
,
...
...
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
c1f9cd9d
...
...
@@ -822,6 +822,7 @@ def gru_unit(input,
size
=
None
,
name
=
None
,
gru_bias_attr
=
None
,
gru_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
...
...
@@ -862,6 +863,7 @@ def gru_unit(input,
output_mem
=
out_mem
,
size
=
size
,
bias_attr
=
gru_bias_attr
,
param_attr
=
gru_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
layer_attr
=
gru_layer_attr
)
...
...
@@ -874,6 +876,7 @@ def gru_group(input,
name
=
None
,
reverse
=
False
,
gru_bias_attr
=
None
,
gru_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
...
...
@@ -922,6 +925,7 @@ def gru_group(input,
name
=
name
,
size
=
size
,
gru_bias_attr
=
gru_bias_attr
,
gru_param_attr
=
gru_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
gru_layer_attr
=
gru_layer_attr
)
...
...
@@ -942,6 +946,7 @@ def simple_gru(input,
mixed_bias_param_attr
=
None
,
mixed_layer_attr
=
None
,
gru_bias_attr
=
None
,
gru_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
...
...
@@ -1010,6 +1015,7 @@ def simple_gru(input,
input
=
m
,
reverse
=
reverse
,
gru_bias_attr
=
gru_bias_attr
,
gru_param_attr
=
gru_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
gru_layer_attr
=
gru_layer_attr
)
...
...
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
浏览文件 @
c1f9cd9d
...
...
@@ -3,7 +3,7 @@ export configs=(test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
test_rnn_group shared_fc shared_lstm
shared_gru
test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
)
export
whole_configs
=(
test_split_datasource
)
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
0 → 100644
浏览文件 @
c1f9cd9d
type: "recurrent_nn"
layers {
name: "data_a"
type: "data"
size: 100
active_type: ""
}
layers {
name: "data_b"
type: "data"
size: 100
active_type: ""
}
layers {
name: "__simple_gru_0___transform"
type: "mixed"
size: 600
active_type: ""
inputs {
input_layer_name: "data_a"
input_parameter_name: "mixed_param"
proj_conf {
type: "fc"
name: "___simple_gru_0___transform.w0"
input_size: 100
output_size: 600
}
}
}
layers {
name: "__simple_gru_0___recurrent_group"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
type: "scatter_agent"
size: 600
active_type: ""
}
layers {
name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
type: "gru_step"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
}
inputs {
input_layer_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
input_parameter_name: "gru_param"
}
bias_parameter_name: "gru_bias"
active_gate_type: "sigmoid"
}
layers {
name: "__simple_gru_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__simple_gru_1___transform"
type: "mixed"
size: 600
active_type: ""
inputs {
input_layer_name: "data_b"
input_parameter_name: "mixed_param"
proj_conf {
type: "fc"
name: "___simple_gru_1___transform.w0"
input_size: 100
output_size: 600
}
}
}
layers {
name: "__simple_gru_1___recurrent_group"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
type: "scatter_agent"
size: 600
active_type: ""
}
layers {
name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
type: "gru_step"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
}
inputs {
input_layer_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
input_parameter_name: "gru_param"
}
bias_parameter_name: "gru_bias"
active_gate_type: "sigmoid"
}
layers {
name: "__simple_gru_1__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__simple_gru_0__"
}
trans_type: "non-seq"
}
layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__simple_gru_1__"
}
trans_type: "non-seq"
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 10
active_type: "softmax"
inputs {
input_layer_name: "__last_seq_0__"
input_parameter_name: "softmax_param"
}
inputs {
input_layer_name: "__last_seq_1__"
input_parameter_name: "softmax_param"
}
}
layers {
name: "label"
type: "data"
size: 10
active_type: ""
}
layers {
name: "__cost_0__"
type: "multi-class-cross-entropy"
size: 1
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
inputs {
input_layer_name: "label"
}
coeff: 1.0
}
parameters {
name: "mixed_param"
size: 60000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 600
initial_strategy: 0
initial_smart: true
}
parameters {
name: "gru_param"
size: 120000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 600
initial_strategy: 0
initial_smart: true
}
parameters {
name: "gru_bias"
size: 600
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 600
initial_strategy: 0
initial_smart: false
}
parameters {
name: "softmax_param"
size: 2000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 10
initial_strategy: 0
initial_smart: true
}
input_layer_names: "data_a"
input_layer_names: "data_b"
input_layer_names: "label"
output_layer_names: "__cost_0__"
evaluators {
name: "classification_error_evaluator"
type: "classification_error"
input_layers: "__fc_layer_0__"
input_layers: "label"
}
sub_models {
name: "root"
layer_names: "data_a"
layer_names: "data_b"
layer_names: "__simple_gru_0___transform"
layer_names: "__simple_gru_0___recurrent_group"
layer_names: "__simple_gru_0__"
layer_names: "__simple_gru_1___transform"
layer_names: "__simple_gru_1___recurrent_group"
layer_names: "__simple_gru_1__"
layer_names: "__last_seq_0__"
layer_names: "__last_seq_1__"
layer_names: "__fc_layer_0__"
layer_names: "label"
layer_names: "__cost_0__"
input_layer_names: "data_a"
input_layer_names: "data_b"
input_layer_names: "label"
output_layer_names: "__cost_0__"
evaluator_names: "classification_error_evaluator"
is_recurrent_layer_group: false
}
sub_models {
name: "__simple_gru_0___recurrent_group"
layer_names: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
layer_names: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
layer_names: "__simple_gru_0__@__simple_gru_0___recurrent_group"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__simple_gru_1___recurrent_group"
layer_names: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
layer_names: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
layer_names: "__simple_gru_1__@__simple_gru_1___recurrent_group"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
c1f9cd9d
...
...
@@ -307,10 +307,10 @@ layers {
active_type: "tanh"
inputs {
input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group"
input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
}
inputs {
input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w1"
}
bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
active_gate_type: "sigmoid"
...
...
@@ -462,14 +462,14 @@ parameters {
initial_smart: false
}
parameters {
name: "___gru_group_0__@__gru_group_0___recurrent_group.w
0
"
name: "___gru_group_0__@__gru_group_0___recurrent_group.w
1
"
size: 30000
initial_mean: 0.0
initial_std: 0.
0
1
initial_std: 0.1
dims: 100
dims: 300
initial_strategy: 0
initial_smart:
fals
e
initial_smart:
tru
e
}
parameters {
name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
...
...
python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
0 → 100644
浏览文件 @
c1f9cd9d
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
data_1
=
data_layer
(
name
=
'data_a'
,
size
=
100
)
data_2
=
data_layer
(
name
=
'data_b'
,
size
=
100
)
mixed_param
=
ParamAttr
(
name
=
'mixed_param'
)
gru_param
=
ParamAttr
(
name
=
'gru_param'
)
gru_bias
=
ParamAttr
(
name
=
'gru_bias'
,
initial_mean
=
0.
,
initial_std
=
0.
)
gru1
=
simple_gru
(
input
=
data_1
,
size
=
200
,
mixed_param_attr
=
mixed_param
,
mixed_bias_param_attr
=
False
,
gru_bias_attr
=
gru_bias
,
gru_param_attr
=
gru_param
)
gru2
=
simple_gru
(
input
=
data_2
,
size
=
200
,
mixed_param_attr
=
mixed_param
,
mixed_bias_param_attr
=
False
,
gru_bias_attr
=
gru_bias
,
gru_param_attr
=
gru_param
)
softmax_param
=
ParamAttr
(
name
=
'softmax_param'
)
predict
=
fc_layer
(
input
=
[
last_seq
(
input
=
gru1
),
last_seq
(
input
=
gru2
)],
size
=
10
,
param_attr
=
[
softmax_param
,
softmax_param
],
bias_attr
=
False
,
act
=
SoftmaxActivation
())
outputs
(
classification_cost
(
input
=
predict
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
)))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录