diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f0ee46262d3d78b830bf03a0639f6613b0303e11..78aa0778f8d1dca9fae82f0411be5a00e636cbc9 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5879,19 +5879,19 @@ def prelu_layer(input, size=l.config.size) -@layer_support(ERROR_CLIPPING, DROPOUT) @wrap_name_default() +@layer_support(ERROR_CLIPPING, DROPOUT) @wrap_act_default(act=LinearActivation()) def gated_unit_layer(input, size, act=None, name=None, gate_attr=None, - gate_bias_attr=True, gate_param_attr=None, + gate_bias_attr=True, + inproj_attr=None, inproj_param_attr=None, inproj_bias_attr=True, - inproj_layer_attr=None, layer_attr=None): """ The gated unit layer implements a simple gating mechanism over the input. @@ -5923,18 +5923,18 @@ def gated_unit_layer(input, clipping threshold, dropout and so on. See ExtraLayerAttribute for more details. :type gate_attr: ExtraLayerAttribute|None - :param gate_bias_attr: Attributes to tune the learnable bias of the gate. - :type gate_bias_attr: ParameterAttribute|None :param gate_param_attr: Attributes to tune the learnable projected matrix parameter of the gate. :type gate_param_attr: ParameterAttribute|None + :param gate_bias_attr: Attributes to tune the learnable bias of the gate. + :type gate_bias_attr: ParameterAttribute|None + :param inproj_attr: Attributes to the tune the projected input, for + example, error clipping threshold, dropout and so on. See + ExtraLayerAttribute for more details. + :type inproj_attr: ExtraLayerAttribute|None :param inproj_param_attr: Attributes to tune the learnable parameter of the projection of input. :type inproj_param_attr: ParameterAttribute|None - :param inproj_layer_attr: Attributes to the tune the projected input, for - example, error clipping threshold, dropout and so on. See - ExtraLayerAttribute for more details. - :type inproj_layer_attr: ExtraLayerAttribute|None :param inproj_bias_attr: Attributes to tune the learnable bias of projection of the input. :type inproj_bias_attr: ParameterAttribute|None @@ -5954,8 +5954,8 @@ def gated_unit_layer(input, name="%s_input_proj" % name, size=size, act=act, + layer_attr=inproj_attr, param_attr=inproj_param_attr, - layer_attr=inproj_layer_attr, bias_attr=inproj_bias_attr) gate = fc_layer( @@ -5963,8 +5963,8 @@ def gated_unit_layer(input, name="%s_gate" % name, act=SigmoidActivation(), input=input, - param_attr=gate_param_attr, layer_attr=gate_attr, + param_attr=gate_param_attr, bias_attr=gate_bias_attr) return mixed_layer( name="%s_gated_act" % name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py index 83aa51bf2864ff87c0cc730799fc701d91bfc10f..9dab45519c65b0ca686558ec7fe2064bb9ad8824 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py @@ -5,11 +5,11 @@ glu = gated_unit_layer( size=512, input=data, act=TanhActivation(), - gate_param_attr=ParamAttr(initial_std=1e-4), gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), + gate_param_attr=ParamAttr(initial_std=1e-4), gate_bias_attr=ParamAttr(initial_std=1), + inproj_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), inproj_param_attr=ParamAttr(initial_std=1e-4), - inproj_layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), inproj_bias_attr=ParamAttr(initial_std=1), layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0))