diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 31652613fb3a55636b32babbc4bde60d65776c61..2af7c9c9c4487b6123ffe91dd1f920aad31df763 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -4921,12 +4921,14 @@ def crf_decoding_layer(input, @wrap_act_default(act=SigmoidActivation()) @wrap_bias_attr_default(has_bias=True) +@wrap_param_attr_default() @wrap_name_default() @layer_support() def nce_layer(input, label, - num_classes, + num_classes=None, act=None, + param_attr=None, weight=None, num_neg_samples=10, neg_distribution=None, @@ -4942,7 +4944,8 @@ def nce_layer(input, .. code-block:: python - cost = nce_layer(input=layer1, label=layer2, weight=layer3, + cost = nce_layer(input=[layer1, layer2], label=layer2, + param_attr=[attr1, attr2], weight=layer3, num_classes=3, neg_distribution=[0.1,0.3,0.6]) :param name: layer name @@ -4957,6 +4960,8 @@ def nce_layer(input, :type num_classes: int :param act: Activation, default is Sigmoid. :type act: BaseActivation + :param param_attr: The Parameter Attribute|list. + :type param_attr: ParameterAttribute :param num_neg_samples: number of negative samples. Default is 10. :type num_neg_samples: int :param neg_distribution: The distribution for generating the random negative labels. @@ -4972,9 +4977,20 @@ def nce_layer(input, """ if isinstance(input, LayerOutput): input = [input] + assert not isinstance(param_attr, collections.Sequence) + param_attr = [param_attr] + else: + if isinstance(param_attr, collections.Sequence): + assert len(input) == len(param_attr) + else: + param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))] + assert isinstance(input, collections.Sequence) + assert isinstance(label, LayerOutput) assert label.layer_type == LayerType.DATA + if num_classes is None: + num_classes = label.size if neg_distribution is not None: assert isinstance(neg_distribution, collections.Sequence) assert len(neg_distribution) == num_classes @@ -4984,9 +5000,9 @@ def nce_layer(input, ipts_for_layer = [] parents = [] - for each_input in input: + for each_input, attr in zip(input, param_attr): assert isinstance(each_input, LayerOutput) - ipts_for_layer.append(each_input.name) + ipts_for_layer.append(Input(each_input.name, **attr.attr)) parents.append(each_input) ipts_for_layer.append(label.name) parents.append(label) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index 05fd1c99d2db6e9faa3b3884ec9baf051791f9fe..05847344be60b4de42a7dd709914fd3da524d1ae 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -215,6 +215,22 @@ layers { } coeff: 1.0 } +layers { + name: "__nce_layer_0__" + type: "nce" + size: 1 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___nce_layer_0__.w0" + } + inputs { + input_layer_name: "labels" + } + bias_parameter_name: "___nce_layer_0__.wbias" + num_classes: 5000 + num_neg_samples: 10 +} parameters { name: "___fc_layer_0__.w0" size: 800 @@ -245,6 +261,26 @@ parameters { initial_strategy: 0 initial_smart: true } +parameters { + name: "___nce_layer_0__.w0" + size: 20000 + initial_mean: 0.0 + initial_std: 0.0141421356237 + dims: 5000 + dims: 4 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___nce_layer_0__.wbias" + size: 5000 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 5000 + initial_strategy: 0 + initial_smart: false +} input_layer_names: "input" input_layer_names: "labels" input_layer_names: "crf_label" @@ -267,6 +303,7 @@ output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__huber_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" +output_layer_names: "__nce_layer_0__" sub_models { name: "root" layer_names: "input" @@ -292,6 +329,7 @@ sub_models { layer_names: "__huber_cost_0__" layer_names: "__multi_binary_label_cross_entropy_0__" layer_names: "__sum_cost_0__" + layer_names: "__nce_layer_0__" input_layer_names: "input" input_layer_names: "labels" input_layer_names: "crf_label" @@ -314,6 +352,7 @@ sub_models { output_layer_names: "__huber_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" + output_layer_names: "__nce_layer_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr index 3244181a63109335c4fba6ca4dd04ac8f0446313..b7d74f85ab4ca3f434dfa45516dfee7227b6ceee 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr @@ -60,6 +60,31 @@ layers { } coeff: 1.0 } +layers { + name: "multi_class_label" + type: "data" + size: 500 + active_type: "" +} +layers { + name: "__nce_layer_0__" + type: "nce" + size: 1 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___nce_layer_0__.w0" + } + inputs { + input_layer_name: "multi_class_label" + } + inputs { + input_layer_name: "weight" + } + bias_parameter_name: "___nce_layer_0__.wbias" + num_classes: 500 + num_neg_samples: 10 +} parameters { name: "___fc_layer_0__.w0" size: 3000 @@ -80,9 +105,30 @@ parameters { initial_strategy: 0 initial_smart: false } +parameters { + name: "___nce_layer_0__.w0" + size: 5000 + initial_mean: 0.0 + initial_std: 0.04472135955 + dims: 500 + dims: 10 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___nce_layer_0__.wbias" + size: 500 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 500 + initial_strategy: 0 + initial_smart: false +} input_layer_names: "input" input_layer_names: "label" input_layer_names: "weight" +input_layer_names: "multi_class_label" output_layer_names: "__cost_0__" output_layer_names: "__mse_cost_0__" evaluators { @@ -100,9 +146,12 @@ sub_models { layer_names: "__fc_layer_0__" layer_names: "__cost_0__" layer_names: "__mse_cost_0__" + layer_names: "multi_class_label" + layer_names: "__nce_layer_0__" input_layer_names: "input" input_layer_names: "label" input_layer_names: "weight" + input_layer_names: "multi_class_label" output_layer_names: "__cost_0__" output_layer_names: "__mse_cost_0__" evaluator_names: "classification_error_evaluator" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index 18ff6b48c495b7a9d61595916ade1a54b1fa6a10..d2a3b702a1d7b650947b344e4719098f68d4dd73 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -40,4 +40,6 @@ outputs( name='huber_label', size=1)), multi_binary_label_cross_entropy( input=probs, label=xe_label), - sum_cost(input=hidden)) + sum_cost(input=hidden), + nce_layer( + input=hidden, label=labels)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py index 1c0aa7f9b9ee45b9eaf82dc46a2648d834dcd4ad..c369062930e2b067ceab0dc3b25ba6c1eabe2450 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py @@ -11,4 +11,9 @@ outputs( classification_cost( input=fc, label=lbl, weight=wt), mse_cost( - input=fc, label=lbl, weight=wt)) + input=fc, label=lbl, weight=wt), + nce_layer( + input=fc, + label=data_layer( + name='multi_class_label', size=500), + weight=wt))