Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ddc2c6ef
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ddc2c6ef
编写于
4月 26, 2017
作者:
Y
Yu Yang
提交者:
GitHub
4月 26, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1893 from reyoung/Add_error_clipping_to_mt_demo
Add error clipping to MT demo.
上级
194e4920
da2adea9
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
120 addition
and
22 deletion
+120
-22
demo/seqToseq/seqToseq_net.py
demo/seqToseq/seqToseq_net.py
+18
-5
paddle/gserver/tests/sequence_layer_group.conf
paddle/gserver/tests/sequence_layer_group.conf
+1
-2
paddle/gserver/tests/sequence_nest_layer_group.conf
paddle/gserver/tests/sequence_nest_layer_group.conf
+1
-2
python/paddle/trainer_config_helpers/attrs.py
python/paddle/trainer_config_helpers/attrs.py
+9
-6
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+74
-1
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+16
-6
python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
...onfig_helpers/tests/configs/protostr/projections.protostr
+1
-0
未找到文件。
demo/seqToseq/seqToseq_net.py
浏览文件 @
ddc2c6ef
...
...
@@ -69,7 +69,8 @@ def gru_encoder_decoder(data_conf,
encoder_size
=
512
,
decoder_size
=
512
,
beam_size
=
3
,
max_length
=
250
):
max_length
=
250
,
error_clipping
=
50
):
"""
A wrapper for an attention version of GRU Encoder-Decoder network
is_generating: whether this config is used for generating
...
...
@@ -90,9 +91,19 @@ def gru_encoder_decoder(data_conf,
input
=
src_word_id
,
size
=
word_vector_dim
,
param_attr
=
ParamAttr
(
name
=
'_source_language_embedding'
))
src_forward
=
simple_gru
(
input
=
src_embedding
,
size
=
encoder_size
)
src_forward
=
simple_gru
(
input
=
src_embedding
,
size
=
encoder_size
,
naive
=
True
,
gru_layer_attr
=
ExtraLayerAttribute
(
error_clipping_threshold
=
error_clipping
))
src_backward
=
simple_gru
(
input
=
src_embedding
,
size
=
encoder_size
,
reverse
=
True
)
input
=
src_embedding
,
size
=
encoder_size
,
reverse
=
True
,
naive
=
True
,
gru_layer_attr
=
ExtraLayerAttribute
(
error_clipping_threshold
=
error_clipping
))
encoded_vector
=
concat_layer
(
input
=
[
src_forward
,
src_backward
])
with
mixed_layer
(
size
=
decoder_size
)
as
encoded_proj
:
...
...
@@ -117,11 +128,13 @@ def gru_encoder_decoder(data_conf,
decoder_inputs
+=
full_matrix_projection
(
input
=
context
)
decoder_inputs
+=
full_matrix_projection
(
input
=
current_word
)
gru_step
=
gru_step_layer
(
gru_step
=
gru_step_
naive_
layer
(
name
=
'gru_decoder'
,
input
=
decoder_inputs
,
output_mem
=
decoder_mem
,
size
=
decoder_size
)
size
=
decoder_size
,
layer_attr
=
ExtraLayerAttribute
(
error_clipping_threshold
=
error_clipping
))
with
mixed_layer
(
size
=
target_dict_dim
,
bias_attr
=
True
,
...
...
paddle/gserver/tests/sequence_layer_group.conf
浏览文件 @
ddc2c6ef
...
...
@@ -48,8 +48,7 @@ lstm = lstmemory_group(
size
=
hidden_dim
,
act
=
TanhActivation
(),
gate_act
=
SigmoidActivation
(),
state_act
=
TanhActivation
(),
lstm_layer_attr
=
ExtraLayerAttribute
(
error_clipping_threshold
=
50
))
state_act
=
TanhActivation
())
lstm_last
=
last_seq
(
input
=
lstm
)
...
...
paddle/gserver/tests/sequence_nest_layer_group.conf
浏览文件 @
ddc2c6ef
...
...
@@ -51,8 +51,7 @@ def lstm_group(lstm_group_input):
size
=
hidden_dim
,
act
=
TanhActivation
(),
gate_act
=
SigmoidActivation
(),
state_act
=
TanhActivation
(),
lstm_layer_attr
=
ExtraLayerAttribute
(
error_clipping_threshold
=
50
))
state_act
=
TanhActivation
())
return
lstm_output
...
...
python/paddle/trainer_config_helpers/attrs.py
浏览文件 @
ddc2c6ef
...
...
@@ -208,12 +208,15 @@ class ExtraLayerAttribute(object):
drop_rate
=
None
,
device
=
None
):
self
.
attr
=
dict
()
if
isinstance
(
error_clipping_threshold
,
float
):
assert
error_clipping_threshold
>
0
self
.
attr
[
"error_clipping_threshold"
]
=
error_clipping_threshold
if
isinstance
(
drop_rate
,
float
):
assert
drop_rate
>
0
if
error_clipping_threshold
is
not
None
:
error_clipping_threshold
=
float
(
error_clipping_threshold
)
if
error_clipping_threshold
<
0
:
raise
ValueError
(
"Error clipping must > 0"
)
self
.
attr
[
'error_clipping_threshold'
]
=
error_clipping_threshold
if
drop_rate
is
not
None
:
drop_rate
=
float
(
drop_rate
)
if
drop_rate
<
0
:
raise
ValueError
(
"Dropout rate must > 0"
)
self
.
attr
[
"drop_rate"
]
=
drop_rate
if
isinstance
(
device
,
int
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
ddc2c6ef
...
...
@@ -84,6 +84,7 @@ __all__ = [
'GeneratedInput'
,
'SubsequenceInput'
,
'gru_step_layer'
,
'gru_step_naive_layer'
,
'recurrent_layer'
,
'BaseGeneratedInput'
,
'conv_operator'
,
...
...
@@ -2284,7 +2285,7 @@ def img_pool_layer(input,
type_name
=
pool_type
.
name
+
'-projection'
\
if
(
isinstance
(
pool_type
,
AvgPooling
)
or
isinstance
(
pool_type
,
MaxPooling
))
\
isinstance
(
pool_type
,
AvgPooling
)
or
isinstance
(
pool_type
,
MaxPooling
))
\
else
pool_type
.
name
pool_size_y
=
pool_size
if
pool_size_y
is
None
else
pool_size_y
...
...
@@ -3084,6 +3085,78 @@ def gru_step_layer(input,
activation
=
act
)
@
wrap_bias_attr_default
()
@
wrap_param_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
act
=
TanhActivation
())
@
wrap_name_default
(
'gru_step'
)
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
gru_step_naive_layer
(
input
,
output_mem
,
size
=
None
,
name
=
None
,
act
=
None
,
gate_act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
"""
GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING
and DROPOUT.
:param input:
:param output_mem:
:param size:
:param name:
:param act:
:param gate_act:
:param bias_attr:
:param param_attr:
:param layer_attr:
:return:
"""
if
input
.
size
%
3
!=
0
:
raise
ValueError
(
"GruStep input size must be divided by 3"
)
if
size
is
None
:
size
=
input
.
size
/
3
def
__gate__
(
gate_name
,
offset
):
with
mixed_layer
(
name
=
name
+
"_"
+
gate_name
,
size
=
size
,
layer_attr
=
layer_attr
,
bias_attr
=
bias_attr
,
act
=
gate_act
)
as
gate
:
gate
+=
identity_projection
(
input
=
input
,
offset
=
offset
)
gate
+=
full_matrix_projection
(
input
=
output_mem
,
param_attr
=
param_attr
)
return
gate
update_gate
=
__gate__
(
"update"
,
0
)
reset_gate
=
__gate__
(
"reset"
,
size
)
with
mixed_layer
(
name
=
name
+
"_reset_output"
,
bias_attr
=
False
)
as
reset_output
:
reset_output
+=
dotmul_operator
(
a
=
output_mem
,
b
=
reset_gate
)
with
mixed_layer
(
name
=
name
+
"_output_candidate"
,
size
=
size
,
layer_attr
=
layer_attr
,
bias_attr
=
bias_attr
,
act
=
act
)
as
output_candidate
:
output_candidate
+=
identity_projection
(
input
=
input
,
offset
=
2
*
size
)
output_candidate
+=
full_matrix_projection
(
input
=
reset_output
,
param_attr
=
param_attr
)
with
mixed_layer
(
name
=
name
)
as
output
:
output
+=
identity_projection
(
output_mem
)
output
+=
dotmul_operator
(
a
=
output_mem
,
b
=
update_gate
,
scale
=-
1.0
)
output
+=
dotmul_operator
(
a
=
output_candidate
,
b
=
update_gate
)
return
output
@
wrap_name_default
()
@
layer_support
()
def
get_output_layer
(
input
,
arg_name
,
name
=
None
,
layer_attr
=
None
):
...
...
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
ddc2c6ef
...
...
@@ -825,7 +825,8 @@ def gru_unit(input,
gru_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
gru_layer_attr
=
None
,
naive
=
False
):
"""
Define calculations that a gated recurrent unit performs in a single time
step. This function itself is not a recurrent layer, so that it can not be
...
...
@@ -857,7 +858,12 @@ def gru_unit(input,
out_mem
=
memory
(
name
=
name
,
size
=
size
)
gru_out
=
gru_step_layer
(
if
naive
:
__step__
=
gru_step_naive_layer
else
:
__step__
=
gru_step_layer
gru_out
=
__step__
(
name
=
name
,
input
=
input
,
output_mem
=
out_mem
,
...
...
@@ -879,7 +885,8 @@ def gru_group(input,
gru_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
gru_layer_attr
=
None
,
naive
=
False
):
"""
gru_group is a recurrent layer group version of Gated Recurrent Unit. It
does exactly the same calculation as the grumemory layer does. A promising
...
...
@@ -928,7 +935,8 @@ def gru_group(input,
gru_param_attr
=
gru_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
gru_layer_attr
=
gru_layer_attr
)
gru_layer_attr
=
gru_layer_attr
,
naive
=
naive
)
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
...
...
@@ -949,7 +957,8 @@ def simple_gru(input,
gru_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
gru_layer_attr
=
None
,
naive
=
False
):
"""
You maybe see gru_step_layer, grumemory in layers.py, gru_unit, gru_group,
simple_gru in network.py. The reason why there are so many interfaces is
...
...
@@ -1018,7 +1027,8 @@ def simple_gru(input,
gru_param_attr
=
gru_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
gru_layer_attr
=
gru_layer_attr
)
gru_layer_attr
=
gru_layer_attr
,
naive
=
naive
)
@
wrap_name_default
(
'simple_gru2'
)
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
浏览文件 @
ddc2c6ef
...
...
@@ -320,6 +320,7 @@ layers {
}
}
drop_rate: 0.5
error_clipping_threshold: 40.0
}
parameters {
name: "___embedding_0__.w0"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录