diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index d45e34b83c9049f37b52ce71bc16719fa6e68f9c..b50b73c7e169f3e8ae75322d9a0a3cad5072a9c7 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -497,8 +497,6 @@ message LayerConfig { repeated uint32 offset = 55; repeated uint32 shape = 56; - // for sub_nest_seq layer to select top k sequence with highest scores - optional uint32 top_k = 57 [default = 1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 43a6914a5090ad53a33c1c60672c9edeb237afa9..c8fc49e20da2e212330e0cccc10fbeb4e25b87a8 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2659,22 +2659,28 @@ class SubSequenceLayer(LayerBase): @config_layer('sub_nested_seq') class SubNestedSequenceLayer(LayerBase): - def __init__(self, name, inputs, top_k=1, bias=False, **xargs): + def __init__(self, name, inputs, selected_indices, bias=False, **xargs): + if isinstance(inputs, list): + assert len(inputs) == 1, ('the first input of sub_nested_seq ' + 'layer is a single nested sequence.') + inputs = inputs[0] + if isinstance(selected_indices, list): + assert len(selected_indices) == 1, ( + 'the second input of ' + 'sub_nested_seq layer is a single layer which is a ' + 'set of selected indices.') + selected_indices = selected_indices[0] + super(SubNestedSequenceLayer, self).__init__( - name, 'sub_nested_seq', 0, inputs=inputs, **xargs) - config_assert( - len(inputs) == 2, - ('SubNestSequenceLayer must have 2 inputs: ' - 'input1 is a nested sequence; input2 is a learnable distribution ' - 'or scores over each sentence in the nested sequence. ')) + name, + 'sub_nested_seq', + 0, + inputs=[inputs, selected_indices], + **xargs) input_layer0 = self.get_input_layer(0) size = input_layer0.size self.set_layer_size(size) - self.config.top_k = top_k - input_layer1 = self.get_input_layer(1) - assert (input_layer1.size == 1) - @config_layer('out_prod') class OuterProdLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 179a009c3d97a45d5974f922cbe5b437c6232400..ebbe95a0c72b9d4e137df1ca3e66add3b4d8b4ed 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6092,37 +6092,41 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): @wrap_name_default() @layer_support() -def sub_nested_seq_layer(input, name=None, top_k=1): +def sub_nested_seq_layer(input, selected_indices, name=None): """ The sub_nested_seq_layer accepts two inputs: the first one is a nested - sequence in PaddlePaddle; the second one is a learnable score or - distribution over each sequence in the nested sequence. + sequence; the second one is a set of selceted indices in the nested sequence. - Then sub_nest_seq_layer selects top k sentences with highest scores or - probabilites according to the second input. + + Then sub_nest_seq_layer selects trims the first input according to the + selected indices to give a new output. This layer is used in beam training. The example usage is: .. code-block:: python - prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) - sub_nest_seq = sub_nested_seq_layer(input=[data, prob], top_k=3) + sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) + - :param input: The two input layers. The first input must be a nested - sequence. The second input is a learnable scores, whose size must be 1. + :param input: A nested sequence. + :type input: LayerOutput + :param selected_indices: a set of sequence indices in the nested sequence. :type input: LayerOutput :param name: name of this layer. :type name: basestring - :param top_k: number of sequences with highest probabilies to select. - :type top_k: int :return: LayerOutput object. :rtype: LayerOutput """ - assert isinstance(input, collections.Sequence) and len(input) == 2, ( - 'sub_nest_seq_layer has exactly two inputs.') + assert isinstance(input, LayerOutput), ( + 'The first input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + assert isinstance(selected_indices, LayerOutput), ( + 'The second input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + l = Layer( - inputs=[x.name for x in input], + inputs=input.name, + selected_indices=selected_indices.name, name=name, - top_k=top_k, type=LayerType.SUB_NESTED_SEQ) return LayerOutput( name=name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr index 8f41be104293fcafc74209c8c761764389e8fb47..4b906b113e3c0569d5576127e100d097e4923436 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr @@ -1,20 +1,15 @@ type: "nn" layers { - name: "input" + name: "input_seq" type: "data" size: 300 active_type: "" } layers { - name: "__fc_layer_0__" - type: "fc" - size: 1 - active_type: "sequence_softmax" - inputs { - input_layer_name: "input" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" + name: "input" + type: "data" + size: 5 + active_type: "" } layers { name: "__sub_nested_seq_layer_0__" @@ -22,41 +17,20 @@ layers { size: 300 active_type: "" inputs { - input_layer_name: "input" + input_layer_name: "input_seq" } inputs { - input_layer_name: "__fc_layer_0__" + input_layer_name: "input" } - top_k: 1 -} -parameters { - name: "___fc_layer_0__.w0" - size: 300 - initial_mean: 0.0 - initial_std: 0.057735026919 - dims: 300 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false } -input_layer_names: "input" +input_layer_names: "input_seq" output_layer_names: "__sub_nested_seq_layer_0__" sub_models { name: "root" + layer_names: "input_seq" layer_names: "input" - layer_names: "__fc_layer_0__" layer_names: "__sub_nested_seq_layer_0__" - input_layer_names: "input" + input_layer_names: "input_seq" output_layer_names: "__sub_nested_seq_layer_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py index f2553f6b6aff349179ff6786376c569622eece01..6d1c3175ba9801d69f3f9cb9e754858253192270 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py @@ -2,8 +2,10 @@ #coding=utf-8 from paddle.trainer_config_helpers import * -data = data_layer(name='input', size=300) -prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) -sub_nest_seq = sub_nested_seq_layer(input=[data, prob], top_k=1) +beam_size = 5 + +data = data_layer(name='input_seq', size=300) +selected_ids = data_layer(name='input', size=beam_size) +sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids) outputs(sub_nest_seq)