diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 4f4a9187bcbe8ef902e923622552909808b121d6..daee55b7f9adfffdf709ed2b5b0d957c7ca1aea4 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -474,6 +474,11 @@ prelu .. autoclass:: paddle.v2.layer.prelu :noindex: +gated_unit +----------- +.. autoclass:: paddle.v2.layer.gated_unit + :noindex: + Detection output Layer ====================== diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b0524a507bacec6768424045e58bf91305de2d08..f0ee46262d3d78b830bf03a0639f6613b0303e11 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -126,6 +126,7 @@ __all__ = [ 'row_conv_layer', 'dropout_layer', 'prelu_layer', + 'gated_unit_layer', ] @@ -5862,7 +5863,7 @@ def prelu_layer(input, :rtype: LayerOutput """ - assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input' + assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.' assert isinstance(param_attr, ParameterAttribute) l = Layer( @@ -5876,3 +5877,96 @@ def prelu_layer(input, layer_type=LayerType.PRELU, parents=input, size=l.config.size) + + +@layer_support(ERROR_CLIPPING, DROPOUT) +@wrap_name_default() +@wrap_act_default(act=LinearActivation()) +def gated_unit_layer(input, + size, + act=None, + name=None, + gate_attr=None, + gate_bias_attr=True, + gate_param_attr=None, + inproj_param_attr=None, + inproj_bias_attr=True, + inproj_layer_attr=None, + layer_attr=None): + """ + The gated unit layer implements a simple gating mechanism over the input. + The input :math:`X` is first projected into a new space :math:`X'`, and + it is also used to produce a gate weight :math:`\sigma`. Element-wise + prodict between :match:`X'` and :math:`\sigma` is finally returned. + + Reference: + Language Modeling with Gated Convolutional Networks + https://arxiv.org/abs/1612.08083 + + .. math:: + y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c) + + The example usage is: + + .. code-block:: python + gated_unit = gated_unit_layer(size=128, input=input_layer)) + + :param input: input for this layer. + :type input: LayerOutput + :param size: output size of the gated unit. + :type size: int + :param act: activation type of the projected input. + :type act: BaseActivation + :param name: name of this layer. + :type name: basestring + :param gate_attr: Attributes to tune the gate output, for example, error + clipping threshold, dropout and so on. See ExtraLayerAttribute for + more details. + :type gate_attr: ExtraLayerAttribute|None + :param gate_bias_attr: Attributes to tune the learnable bias of the gate. + :type gate_bias_attr: ParameterAttribute|None + :param gate_param_attr: Attributes to tune the learnable projected matrix + parameter of the gate. + :type gate_param_attr: ParameterAttribute|None + :param inproj_param_attr: Attributes to tune the learnable parameter of + the projection of input. + :type inproj_param_attr: ParameterAttribute|None + :param inproj_layer_attr: Attributes to the tune the projected input, for + example, error clipping threshold, dropout and so on. See + ExtraLayerAttribute for more details. + :type inproj_layer_attr: ExtraLayerAttribute|None + :param inproj_bias_attr: Attributes to tune the learnable bias of + projection of the input. + :type inproj_bias_attr: ParameterAttribute|None + :param layer_attr: Attributes to tune the final output of the gated unit, + for example, error clipping threshold, dropout and so on. See + ExtraLayerAttribute for more details. + :type layer_attr: ExtraLayerAttribute|None + :return: LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance( + input, LayerOutput), 'The gated linear unit accepts only one input.' + + input_proj = fc_layer( + input=input, + name="%s_input_proj" % name, + size=size, + act=act, + param_attr=inproj_param_attr, + layer_attr=inproj_layer_attr, + bias_attr=inproj_bias_attr) + + gate = fc_layer( + size=size, + name="%s_gate" % name, + act=SigmoidActivation(), + input=input, + param_attr=gate_param_attr, + layer_attr=gate_attr, + bias_attr=gate_bias_attr) + return mixed_layer( + name="%s_gated_act" % name, + input=dotmul_operator(input_proj, gate), + layer_attr=layer_attr) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 70e342fb79ab51e3376ea6ad8f593c4c3a1fff18..cdf9b2eab733adb173cf33cd6a93ef7b5abefc50 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology) +test_recursive_topology test_gated_unit_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..f1e4d894a5fb0040f48bdb5a751c3f0d956c23bb --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr @@ -0,0 +1,106 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 256 + active_type: "" +} +layers { + name: "__gated_unit_layer_0___input_proj" + type: "fc" + size: 512 + active_type: "tanh" + inputs { + input_layer_name: "input" + input_parameter_name: "___gated_unit_layer_0___input_proj.w0" + } + bias_parameter_name: "___gated_unit_layer_0___input_proj.wbias" + error_clipping_threshold: 100.0 +} +layers { + name: "__gated_unit_layer_0___gate" + type: "fc" + size: 512 + active_type: "sigmoid" + inputs { + input_layer_name: "input" + input_parameter_name: "___gated_unit_layer_0___gate.w0" + } + bias_parameter_name: "___gated_unit_layer_0___gate.wbias" + error_clipping_threshold: 100.0 +} +layers { + name: "__gated_unit_layer_0___gated_act" + type: "mixed" + size: 512 + active_type: "" + inputs { + input_layer_name: "__gated_unit_layer_0___input_proj" + } + inputs { + input_layer_name: "__gated_unit_layer_0___gate" + } + error_clipping_threshold: 100.0 + operator_confs { + type: "dot_mul" + input_indices: 0 + input_indices: 1 + input_sizes: 512 + input_sizes: 512 + output_size: 512 + dotmul_scale: 1 + } +} +parameters { + name: "___gated_unit_layer_0___input_proj.w0" + size: 131072 + initial_mean: 0.0 + initial_std: 0.0001 + dims: 256 + dims: 512 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___gated_unit_layer_0___input_proj.wbias" + size: 512 + initial_mean: 0.0 + initial_std: 1 + dims: 1 + dims: 512 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___gated_unit_layer_0___gate.w0" + size: 131072 + initial_mean: 0.0 + initial_std: 0.0001 + dims: 256 + dims: 512 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___gated_unit_layer_0___gate.wbias" + size: 512 + initial_mean: 0.0 + initial_std: 1 + dims: 1 + dims: 512 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "input" +output_layer_names: "__gated_unit_layer_0___gated_act" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__gated_unit_layer_0___input_proj" + layer_names: "__gated_unit_layer_0___gate" + layer_names: "__gated_unit_layer_0___gated_act" + input_layer_names: "input" + output_layer_names: "__gated_unit_layer_0___gated_act" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..83aa51bf2864ff87c0cc730799fc701d91bfc10f --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py @@ -0,0 +1,16 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=256) +glu = gated_unit_layer( + size=512, + input=data, + act=TanhActivation(), + gate_param_attr=ParamAttr(initial_std=1e-4), + gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), + gate_bias_attr=ParamAttr(initial_std=1), + inproj_param_attr=ParamAttr(initial_std=1e-4), + inproj_layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), + inproj_bias_attr=ParamAttr(initial_std=1), + layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0)) + +outputs(glu)