From 34410eb8221a5842fdee7d359889e342f676851a Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 8 Nov 2017 13:49:31 +0800 Subject: [PATCH] nce does not need activation. --- .../paddle/trainer_config_helpers/layers.py | 66 ++++++++++--------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 169e201046a..eb4ff702191 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5494,7 +5494,11 @@ def crf_decoding_layer(input, return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1) -@wrap_act_default(act=SigmoidActivation()) +""" +Following are cost Layers. +""" + + @wrap_bias_attr_default(has_bias=True) @wrap_param_attr_default() @wrap_name_default() @@ -5502,7 +5506,6 @@ def crf_decoding_layer(input, def nce_layer(input, label, num_classes=None, - act=None, param_attr=None, weight=None, num_neg_samples=10, @@ -5511,9 +5514,12 @@ def nce_layer(input, bias_attr=None, layer_attr=None): """ - Noise-contrastive estimation. - Implements the method in the following paper: - A fast and simple algorithm for training neural probabilistic language models. + Noise-contrastive estimation. This layer implements the method in the + following paper: + + Reference: + A fast and simple algorithm for training neural probabilistic language + models. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf The example usage is: @@ -5525,32 +5531,37 @@ def nce_layer(input, :param name: The name of this layer. It is optional. :type name: basestring - :param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput. + :param input: The input layers. It should be a LayerOutput or a list/tuple + of LayerOutput. :type input: LayerOutput | list | tuple | collections.Sequence - :param label: label layer + :param label: The ground truth. :type label: LayerOutput - :param weight: weight layer, can be None(default) + :param weight: The weight layer defines a weight for each sample in the + mini-batch. The default value is None. :type weight: LayerOutput - :param num_classes: number of classes. + :param num_classes: The class number. :type num_classes: int - :param act: Activation type. SigmoidActivation is the default. - :type act: BaseActivation - :param param_attr: The Parameter Attribute|list. - :type param_attr: ParameterAttribute - :param num_neg_samples: number of negative samples. Default is 10. + :param param_attr: The parameter attributes. + :type param_attr: ParameterAttribute|list + :param num_neg_samples: The number of sampled negative labels. The default + value is 10. :type num_neg_samples: int - :param neg_distribution: The distribution for generating the random negative labels. - A uniform distribution will be used if not provided. - If not None, its length must be equal to num_classes. + :param neg_distribution: The discrete noisy distribution over the output + space from which num_neg_samples negative labels + are sampled. If this parameter is not set, a + uniform distribution will be used. A user defined + distribution is a list whose length must be equal + to the num_classes. Each member of the list defines + the probability of a class given input x. :type neg_distribution: list | tuple | collections.Sequence | None - :param bias_attr: The Bias Attribute. If the parameter is set to - False or something not type of ParameterAttribute, - no bias is defined. If the parameter is set to - True, the bias is initialized to zero. + :param bias_attr: The attribute for bias. If this parameter is set False or + any object whose type is not ParameterAttribute, no bias + is added. If this parameter is set True, the bias is + initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute - :return: layer name. + :return: The LayerOutput object. :rtype: LayerOutput """ if isinstance(input, LayerOutput): @@ -5573,8 +5584,6 @@ def nce_layer(input, assert isinstance(neg_distribution, collections.Sequence) assert len(neg_distribution) == num_classes assert abs(sum(neg_distribution) - 1.0) < 1e-5 - if not isinstance(act, BaseActivation): - raise TypeError() ipts_for_layer = [] parents = [] @@ -5596,7 +5605,7 @@ def nce_layer(input, type=LayerType.NCE_LAYER, num_classes=num_classes, neg_sampling_dist=neg_distribution, - active_type=act.name, + active_type=SigmoidActivation().name, num_neg_samples=num_neg_samples, inputs=ipts_for_layer, bias=ParamAttr.to_bias(bias_attr), @@ -5606,12 +5615,7 @@ def nce_layer(input, LayerType.NCE_LAYER, parents=parents, size=l.config.size, - activation=act) - - -""" -following are cost Layers. -""" + activation=SigmoidActivation()) @wrap_name_default() -- GitLab