未验证 提交 86d7cc97 编写于 作者: W whs 提交者: GitHub

Add bias for gru_unit_op and fix activation function (#10087)

上级 0d94ab13
...@@ -699,8 +699,8 @@ def dynamic_gru(input, ...@@ -699,8 +699,8 @@ def dynamic_gru(input,
def gru_unit(input, def gru_unit(input,
hidden, hidden,
size, size,
weight=None, param_attr=None,
bias=None, bias_attr=None,
activation='tanh', activation='tanh',
gate_activation='sigmoid'): gate_activation='sigmoid'):
""" """
...@@ -731,8 +731,8 @@ def gru_unit(input, ...@@ -731,8 +731,8 @@ def gru_unit(input,
input (Variable): The fc transformed input value of current step. input (Variable): The fc transformed input value of current step.
hidden (Variable): The hidden value of lstm unit from previous step. hidden (Variable): The hidden value of lstm unit from previous step.
size (integer): The input dimension value. size (integer): The input dimension value.
weight (ParamAttr): The weight parameters for gru unit. Default: None param_attr (ParamAttr): The weight parameters for gru unit. Default: None
bias (ParamAttr): The bias parameters for gru unit. Default: None bias_attr (ParamAttr): The bias parameters for gru unit. Default: None
activation (string): The activation type for cell (actNode). activation (string): The activation type for cell (actNode).
Default: 'tanh' Default: 'tanh'
gate_activation (string): The activation type for gates (actGate). gate_activation (string): The activation type for gates (actGate).
...@@ -764,34 +764,31 @@ def gru_unit(input, ...@@ -764,34 +764,31 @@ def gru_unit(input,
size = size / 3 size = size / 3
# create weight # create weight
if weight is None: weight = helper.create_parameter(
weight = helper.create_parameter( attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
gate = helper.create_tmp_variable(dtype)
reset_hidden_pre = helper.create_tmp_variable(dtype)
updated_hidden = helper.create_tmp_variable(dtype)
inputs = {'Input': input, 'HiddenPrev': hidden, 'Weight': weight}
# create bias # create bias
if helper.bias_attr:
if bias is None:
bias_size = [1, 3 * size] bias_size = [1, 3 * size]
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
inputs['Bias'] = bias
gate = helper.create_tmp_variable(dtype)
reset_hidden_pre = helper.create_tmp_variable(dtype)
updated_hidden = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type='gru_unit', type='gru_unit',
inputs={'Input': input, inputs=inputs,
'HiddenPrev': hidden,
'Weight': weight},
outputs={ outputs={
'Gate': gate, 'Gate': gate,
'ResetHiddenPrev': reset_hidden_pre, 'ResetHiddenPrev': reset_hidden_pre,
'Hidden': updated_hidden, 'Hidden': updated_hidden,
}, },
attrs={ attrs={
'activation': 0, 'activation': 2, # tanh
'gate_activation': 1, 'gate_activation': 1, # sigmoid
}) })
return updated_hidden, reset_hidden_pre, gate return updated_hidden, reset_hidden_pre, gate
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册