diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da99e5bd53458aa0cb201a3525e28c66ab63c52d..a24299787bfd6d9d1a9b01ba3117c3ec863f9552 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1602,6 +1602,21 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha +@config_layer('cross_entropy_over_beam') +class CrossEntropyOverBeamLayer(LayerBase): + def __init__(self, name, inputs, **xargs): + config_assert(len(inputs) % 3 == 0, "Error input numbers.") + super(CrossEntropyOverBeamLayer, self).__init__( + name, 'cross_entropy_over_beam', 0, inputs, **xargs) + input_num = len(inputs) / 3 + for i in range(input_num): + input_layer = self.get_input_layer(i * 2) + config_assert( + input_layer.size == 1, "Inputs for this layer are made up of " + "several pairs and the first one in a pair is scores for " + "all the candidates, so its size should be equal to 1.") + + @config_layer('fc') class FCLayer(LayerBase): layer_type = 'fc' @@ -2249,6 +2264,7 @@ def define_cost(class_name, cost_type): define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy') +define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam') define_cost('RankingCost', 'rank-cost') define_cost('AucValidation', 'auc-validation') define_cost('PnpairValidation', 'pnpair-validation') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c869601551aff5fc0311458f906385522d2..2b01b6ad4d79031aa16a583937eb8444d91cbf3a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import functools import collections import inspect @@ -104,6 +103,7 @@ __all__ = [ 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'cross_entropy_over_beam', 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', @@ -219,6 +219,7 @@ class LayerType(object): HUBER = 'huber' CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' + CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' SUM_COST = 'sum_cost' @@ -4028,8 +4029,12 @@ def __cost_input__(input, label, weight=None): """ inputs and parents for cost layers. """ - ipts = [Input(input.name), Input(label.name)] - parents = [input, label] + if isinstance(input, LayerOutput): + input = [input] + if isinstance(label, LayerOutput): + label = [label] + ipts = [Input(ipt.name) for ipt in (input + label)] + parents = [ipt for ipt in (input + label)] if weight is not None: assert weight.size == 1 ipts.append(Input(weight.name)) @@ -5692,6 +5697,29 @@ def multi_binary_label_cross_entropy(input, size=1) +@wrap_name_default() +@layer_support() +def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None): + """ + TODO(caoying) add comments. + """ + + assert len(input) / 2 == len(label), "Error input numbers." + for i in range(0, len(input), 2): + assert (input[i].size == 1), ( + "Inputs for this layer are made up of " + "several pairs and the first one in a pair is scores for " + "all the candidates, so its size should be equal to 1.") + + ipts, parents = __cost_input__(input, label, weight) + Layer( + name=name, + type=LayerType.CROSS_ENTROPY_OVER_BEAM, + inputs=ipts, + coeff=coeff) + return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) + + @wrap_name_default() @layer_support() def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a61beb871ad064c617fa141451afcb2a5ac64854..130e6332a7cf58d0fe54dddcaf05eedd161fd112 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers test_cross_entropy_over_beam) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr new file mode 100644 index 0000000000000000000000000000000000000000..e44478ec2ba1fbbcc935f418540441f99fda6d4e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr @@ -0,0 +1,208 @@ +type: "nn" +layers { + name: "sentence_states" + type: "data" + size: 32 + active_type: "" +} +layers { + name: "sentence_scores" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__kmax_sequence_score_layer_0__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + beam_size: 5 +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 32 + active_type: "" + inputs { + input_layer_name: "sentence_states" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_0__" + } +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "" + inputs { + input_layer_name: "__sub_nested_seq_layer_0__" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_1__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + beam_size: 5 +} +layers { + name: "__seq_slice_layer_0__" + type: "seq_slice" + size: 32 + active_type: "" + inputs { + input_layer_name: "__sub_nested_seq_layer_0__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_1__" + } + select_first: true +} +layers { + name: "__fc_layer_1__" + type: "fc" + size: 1 + active_type: "" + inputs { + input_layer_name: "__seq_slice_layer_0__" + input_parameter_name: "___fc_layer_1__.w0" + } + bias_parameter_name: "___fc_layer_1__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_2__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "__fc_layer_1__" + } + beam_size: 5 +} +layers { + name: "sentences_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "start_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "end_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__cross_entropy_over_beam_0__" + type: "cross_entropy_over_beam" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_0__" + } + inputs { + input_layer_name: "__fc_layer_0__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_1__" + } + inputs { + input_layer_name: "__fc_layer_1__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_2__" + } + inputs { + input_layer_name: "sentences_ids" + } + inputs { + input_layer_name: "start_ids" + } + inputs { + input_layer_name: "end_ids" + } + coeff: 1.0 +} +parameters { + name: "___fc_layer_0__.w0" + size: 32 + initial_mean: 0.0 + initial_std: 0.176776695297 + dims: 32 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_1__.w0" + size: 32 + initial_mean: 0.0 + initial_std: 0.176776695297 + dims: 32 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_1__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "sentence_scores" +input_layer_names: "sentence_states" +input_layer_names: "sentences_ids" +input_layer_names: "start_ids" +input_layer_names: "end_ids" +output_layer_names: "__cross_entropy_over_beam_0__" +sub_models { + name: "root" + layer_names: "sentence_states" + layer_names: "sentence_scores" + layer_names: "__kmax_sequence_score_layer_0__" + layer_names: "__sub_nested_seq_layer_0__" + layer_names: "__fc_layer_0__" + layer_names: "__kmax_sequence_score_layer_1__" + layer_names: "__seq_slice_layer_0__" + layer_names: "__fc_layer_1__" + layer_names: "__kmax_sequence_score_layer_2__" + layer_names: "sentences_ids" + layer_names: "start_ids" + layer_names: "end_ids" + layer_names: "__cross_entropy_over_beam_0__" + input_layer_names: "sentence_scores" + input_layer_names: "sentence_states" + input_layer_names: "sentences_ids" + input_layer_names: "start_ids" + input_layer_names: "end_ids" + output_layer_names: "__cross_entropy_over_beam_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py new file mode 100644 index 0000000000000000000000000000000000000000..edc2d32fca1c911ad72277b5175578565443b783 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +#coding=utf-8 + +from paddle.trainer_config_helpers import * +beam_size = 5 + +# the first beam expansion. +sentence_states = data_layer(name="sentence_states", size=32) +sentence_scores = data_layer(name="sentence_scores", size=1) +topk_sentence_ids = kmax_sequence_score_layer( + input=sentence_scores, beam_size=beam_size) + +# the second beam expansion. +topk_sen = sub_nested_seq_layer( + input=sentence_states, selected_indices=topk_sentence_ids) +start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation()) +topk_start_pos_ids = kmax_sequence_score_layer( + input=sentence_scores, beam_size=beam_size) + +# the final beam expansion. +topk_start_spans = seq_slice_layer( + input=topk_sen, starts=topk_start_pos_ids, ends=None) +end_pos_scores = fc_layer( + input=topk_start_spans, size=1, act=LinearActivation()) +topk_end_pos_ids = kmax_sequence_score_layer( + input=end_pos_scores, beam_size=beam_size) + +# define the cost +sentence_idx = data_layer(name="sentences_ids", size=1) +start_idx = data_layer(name="start_ids", size=1) +end_idx = data_layer(name="end_ids", size=1) +cost = cross_entropy_over_beam( + input=[ + sentence_scores, topk_sentence_ids, start_pos_scores, + topk_start_pos_ids, end_pos_scores, topk_end_pos_ids + ], + label=[sentence_idx, start_idx, end_idx]) + +outputs(cost)