diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 83f72c137bdf5e55f28be908321bd2ccd6c906fe..ce4b3aad01d93f4204db5dde952b700b68f0ec18 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -482,6 +482,8 @@ message LayerConfig { repeated uint32 offset = 55; repeated uint32 shape = 56; + // for sub_nest_seq layer to select top k sequence with highest scores + optional uint32 top_k = 57 [default = 1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5477158ecb8646992ebdded0b15cce50720ebf36..f8ab0ae80a265ab01e128a421ae7a045a9d4c63e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2614,6 +2614,25 @@ class SubSequenceLayer(LayerBase): self.create_bias_parameter(bias, size) +@config_layer('sub_nested_seq') +class SubNestedSequenceLayer(LayerBase): + def __init__(self, name, inputs, top_k=1, bias=False, **xargs): + super(SubNestedSequenceLayer, self).__init__( + name, 'sub_nested_seq', 0, inputs=inputs, **xargs) + config_assert( + len(inputs) == 2, + ('SubNestSequenceLayer must have 2 inputs: ' + 'input1 is a nested sequence; input2 is a learnable distribution ' + 'or scores over each sentence in the nested sequence. ')) + input_layer0 = self.get_input_layer(0) + size = input_layer0.size + self.set_layer_size(size) + + self.config.top_k = top_k + input_layer1 = self.get_input_layer(1) + assert (input_layer1.size == 1) + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 14f072fc55109d770edf469ad7c574b8dda8a434..d266026a46323620a65b30bee4c6dd5e8db40b95 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,103 +31,33 @@ except ImportError: import copy __all__ = [ - 'full_matrix_projection', - 'AggregateLevel', - 'ExpandLevel', - 'identity_projection', - 'dotmul_projection', - 'dotmul_operator', - 'repeat_layer', - 'seq_reshape_layer', - 'table_projection', - 'mixed_layer', - 'data_layer', - 'embedding_layer', - 'fc_layer', - 'grumemory', - 'pooling_layer', - 'lstmemory', - 'last_seq', - 'first_seq', - 'cos_sim', - 'hsigmoid', - 'conv_projection', - 'mse_cost', - 'regression_cost', - 'classification_cost', - 'LayerOutput', - 'img_conv_layer', - 'img_pool_layer', - 'batch_norm_layer', - 'img_cmrnorm_layer', - 'addto_layer', - 'concat_layer', - 'seq_concat_layer', - 'lstm_step_layer', - 'recurrent_group', - 'memory', - 'StaticInput', - 'expand_layer', - 'scaling_layer', - 'scaling_projection', - 'power_layer', - 'interpolation_layer', - 'bilinear_interp_layer', - 'trans_layer', - 'rotate_layer', - 'sum_to_one_norm_layer', - 'get_output_layer', - 'LayerType', - 'context_projection', - 'beam_search', - 'maxid_layer', - 'GeneratedInput', - 'SubsequenceInput', - 'gru_step_layer', - 'gru_step_naive_layer', - 'recurrent_layer', - 'BaseGeneratedInput', - 'conv_operator', - 'conv_shift_layer', - 'tensor_layer', - 'selective_fc_layer', - 'sampling_id_layer', - 'slope_intercept_layer', - 'trans_full_matrix_projection', - 'linear_comb_layer', - 'convex_comb_layer', - 'ctc_layer', - 'warp_ctc_layer', - 'crf_layer', - 'crf_decoding_layer', - 'nce_layer', - 'cross_entropy_with_selfnorm', - 'cross_entropy', - 'multi_binary_label_cross_entropy', - 'sum_cost', - 'rank_cost', - 'lambda_cost', - 'huber_cost', - 'block_expand_layer', - 'maxout_layer', - 'out_prod_layer', - 'printer_layer', - 'print_layer', - 'priorbox_layer', - 'cross_channel_norm_layer', - 'multibox_loss_layer', - 'detection_output_layer', - 'spp_layer', - 'pad_layer', - 'eos_layer', - 'smooth_l1_cost', - 'layer_support', - 'multiplex_layer', - 'row_conv_layer', - 'dropout_layer', - 'prelu_layer', - 'gated_unit_layer', - 'crop_layer', + 'full_matrix_projection', 'AggregateLevel', 'ExpandLevel', + 'identity_projection', 'dotmul_projection', 'dotmul_operator', + 'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer', + 'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer', + 'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid', + 'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost', + 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', + 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer', + 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', + 'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer', + 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', + 'rotate_layer', 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', + 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', + 'SubsequenceInput', 'gru_step_layer', 'gru_step_naive_layer', + 'recurrent_layer', 'BaseGeneratedInput', 'conv_operator', + 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', + 'sampling_id_layer', 'slope_intercept_layer', + 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', + 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', + 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', + 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', + 'printer_layer', 'print_layer', 'priorbox_layer', + 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support', + 'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer', + 'gated_unit_layer', 'crop_layer', 'sub_nested_seq_layer' ] @@ -220,6 +150,7 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' + SUB_NESTED_SEQ = 'sub_nested_seq' @staticmethod def is_layer_type(type_name): @@ -6006,3 +5937,44 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): layer_type=LayerType.CROP_LAYER, parents=input, size=l.config.size) + + +@wrap_name_default() +@layer_support() +def sub_nested_seq_layer(input, name=None, top_k=1): + """ + The sub_nest_seq_layer accepts two inputs: the first one is a nested + sequence in PaddlePaddle; the second one is a learnable score or + distribution over each sequence in the nested sequence. + + Then sub_nest_seq_layer selects top k sentences with highest scores or + probabilites according to the second input. + + The example usage is: + + .. code-block:: python + prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) + sub_nest_seq = sub_nest_seq_layer(input=[data, prob], top_k=3) + + :param input: The two input layers. The first input must be a nested + sequence. The second input is a learnable scores, whose size must be 1. + :type input: LayerOutput + :param name: name of this layer. + :type name: basestring + :param top_k: number of sequences with highest probabilies to select. + :type top_k: int + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, collections.Sequence) and len(input) == 2, ( + 'sub_nest_seq_layer has exactly two inputs.') + l = Layer( + inputs=[x.name for x in input], + name=name, + top_k=top_k, + type=LayerType.SUB_NESTED_SEQ) + return LayerOutput( + name=name, + layer_type=LayerType.SUB_NESTED_SEQ, + parents=input, + size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index cdf9b2eab733adb173cf33cd6a93ef7b5abefc50..1a1120d59bb8cf3e0e5ef86d87b47d13900c6b5b 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer) +test_recursive_topology test_gated_unit_layer test_seq_select_layers) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..8f41be104293fcafc74209c8c761764389e8fb47 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr @@ -0,0 +1,63 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "sequence_softmax" + inputs { + input_layer_name: "input" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + } + inputs { + input_layer_name: "__fc_layer_0__" + } + top_k: 1 +} +parameters { + name: "___fc_layer_0__.w0" + size: 300 + initial_mean: 0.0 + initial_std: 0.057735026919 + dims: 300 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "input" +output_layer_names: "__sub_nested_seq_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__fc_layer_0__" + layer_names: "__sub_nested_seq_layer_0__" + input_layer_names: "input" + output_layer_names: "__sub_nested_seq_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..f2553f6b6aff349179ff6786376c569622eece01 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) +sub_nest_seq = sub_nested_seq_layer(input=[data, prob], top_k=1) + +outputs(sub_nest_seq)