diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 4b1d80d3db924bfa2ad0e081f785d8f5dd719fce..d4d165e5d96cef1854d987acec6bc0eac5f6e395 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5457,9 +5457,9 @@ def nce_layer(input, param_attr=[attr1, attr2], weight=layer3, num_classes=3, neg_distribution=[0.1,0.3,0.6]) - :param name: layer name + :param name: The name of this layer. :type name: basestring - :param input: input layers. It could be a LayerOutput of list/tuple of LayerOutput. + :param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput. :type input: LayerOutput|list|tuple|collections.Sequence :param label: label layer :type label: LayerOutput @@ -5477,7 +5477,9 @@ def nce_layer(input, A uniform distribution will be used if not provided. If not None, its length must be equal to num_classes. :type neg_distribution: list|tuple|collections.Sequence|None - :param bias_attr: Bias parameter attribute. True if no bias. + :param bias_attr: The Bias Attribute. If no bias, then pass False or + something not type of ParameterAttribute. None will get a + default Bias. :type bias_attr: ParameterAttribute|None|False :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -5593,7 +5595,7 @@ def rank_cost(left, :param weight: The weight affects the cost, namely the scale of cost. It is an optional argument. :type weight: LayerOutput - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring :param coeff: The coefficient affects the gradient in the backward. :type coeff: float @@ -5647,7 +5649,7 @@ def lambda_cost(input, :param score: The 2nd input. Score of each sample. :type input: LayerOutput :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), - e.g., 5 for NDCG@5. It must be less than for equal to the + e.g., 5 for NDCG@5. It must be less than or equal to the minimum size of lists. :type NDCG_num: int :param max_sort_size: The size of partial sorting in calculating gradient. @@ -5658,7 +5660,7 @@ def lambda_cost(input, than the size of a list, the algorithm will sort the entire list of get gradient. :type max_sort_size: int - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -5702,7 +5704,7 @@ def cross_entropy(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param coeff: The cost is multiplied with coeff. The coefficient affects the gradient in the backward. @@ -5750,7 +5752,7 @@ def cross_entropy_with_selfnorm(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. :type coeff: float. @@ -5790,7 +5792,7 @@ def sum_cost(input, name=None, layer_attr=None): :param input: The first input layer. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -5835,7 +5837,7 @@ def huber_regression_cost(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param delta: The difference between the observed and predicted values. :type delta: float. @@ -5885,7 +5887,7 @@ def huber_classification_cost(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. :type coeff: float. @@ -5928,7 +5930,7 @@ def multi_binary_label_cross_entropy(input, :type input: LayerOutput :param label: The input label. :type input: LayerOutput - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring :param coeff: The coefficient affects the gradient in the backward. :type coeff: float @@ -6033,9 +6035,9 @@ def cross_entropy_over_beam(input, name=None): ]) - :param input: input beams for this layer. + :param input: Input beams for this layer. :type input: BeamInput - :param name: input beams for this layer. + :param name: The name of this layer. :type name: basestring :return: LayerOutput object. :rtype: LayerOutput @@ -6175,12 +6177,21 @@ def multiplex_layer(input, name=None, layer_attr=None): @wrap_name_default("dropout") def dropout_layer(input, dropout_rate, name=None): """ - @TODO(yuyang18): Add comments. - :param name: - :param input: - :param dropout_rate: - :return: + The example usage is: + + .. code-block:: python + + dropout = dropout_layer(input=input_layer, dropout_rate=0.5) + + :param name: The name of this layer. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param dropout_rate: The probability of dropout. + :type dropout_rate: float + :return: LayerOutput object. + :rtype: LayerOutput """ return addto_layer( name=name, @@ -6203,7 +6214,7 @@ def row_conv_layer(input, """ The row convolution is called lookahead convolution. It is firstly - introduced in paper of `Deep Speech 2: End-toEnd Speech Recognition + introduced in paper of `Deep Speech 2: End-to-End Speech Recognition in English and Mandarin `_ . The bidirectional RNN that learns representation for a sequence by @@ -6211,9 +6222,9 @@ def row_conv_layer(input, However, unlike unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online and low-latency setting. The lookahead convolution incorporates information from future subsequences in a computationally - efficient manner to improve unidirectional recurrent neural networks. + efficient manner to improve unidirectional RNNs. - The connection of row convolution is different form the 1D sequence + The connection of row convolution is different from the 1D sequence convolution. Assumed that, the future context-length is k, that is to say, it can get the output at timestep t by using the the input feature from t-th timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input @@ -6242,7 +6253,7 @@ def row_conv_layer(input, :param act: Activation Type. Default is linear activation. :type act: BaseActivation :param param_attr: The Parameter Attribute. If None, the parameter will be - initialized smartly. It's better set it by yourself. + initialized smartly. It's better to set it by yourself. :type param_attr: ParameterAttribute :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None @@ -6342,7 +6353,7 @@ def gated_unit_layer(input, The gated unit layer implements a simple gating mechanism over the input. The input :math:`X` is first projected into a new space :math:`X'`, and it is also used to produce a gate weight :math:`\sigma`. Element-wise - prodict between :match:`X'` and :math:`\sigma` is finally returned. + product between :match:`X'` and :math:`\sigma` is finally returned. Reference: Language Modeling with Gated Convolutional Networks @@ -6440,8 +6451,8 @@ def switch_order_layer(input, :type input: LayerOutput :param name: Name of this layer. :type name: basestring - :param reshape: reshape matrix by axises. - :type reshape: Dict + :param reshape_axis: Specify the axises of 'height'. Its value should be positive and less than 4. + :type reshape_axis: int :return: LayerOutput object. :rtype: LayerOutput """ @@ -6869,7 +6880,9 @@ def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): :type input: LayerOutput. :param param_attr: The parameter attribute of scaling. :type param_attr: ParameterAttribute - :param bias_attr: The parameter attribute of shifting. + :param bias_attr: The Bias Attribute. If no bias, then pass False or + something not type of ParameterAttribute. None will get a + default Bias. :type bias_attr: ParameterAttribute :return: LayerOutput object. :rtype: LayerOutput