未验证 提交 374e1685 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #5517 from ranqiu92/doc

Update the annotations of layers.py.
...@@ -888,7 +888,7 @@ def mixed_layer(size=0, ...@@ -888,7 +888,7 @@ def mixed_layer(size=0,
:type size: int :type size: int
:param input: The input of this layer. It is an optional parameter. If set, :param input: The input of this layer. It is an optional parameter. If set,
then this function will just return layer's name. then this function will just return layer's name.
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
...@@ -1030,7 +1030,7 @@ def fc_layer(input, ...@@ -1030,7 +1030,7 @@ def fc_layer(input,
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
:param act: Activation Type. TanhActivation is the default. :param act: Activation Type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute|list. :param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -1527,7 +1527,7 @@ def lstmemory(input, ...@@ -1527,7 +1527,7 @@ def lstmemory(input,
:type input: LayerOutput :type input: LayerOutput
:param reverse: is sequence process reversed or not. :param reverse: is sequence process reversed or not.
:type reverse: bool :type reverse: bool
:param act: Activation type. TanhActivation is the default. :math:`h_t` :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activation type, SigmoidActivation by default. :param gate_act: gate activation type, SigmoidActivation by default.
:type gate_act: BaseActivation :type gate_act: BaseActivation
...@@ -1920,7 +1920,7 @@ def repeat_layer(input, ...@@ -1920,7 +1920,7 @@ def repeat_layer(input,
False for treating input as column vector and repeating False for treating input as column vector and repeating
in the row direction. in the row direction.
:type as_row_vector: bool :type as_row_vector: bool
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
...@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input, ...@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input,
:type reshape_size: int :type reshape_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
...@@ -2487,7 +2487,7 @@ def img_conv_layer(input, ...@@ -2487,7 +2487,7 @@ def img_conv_layer(input,
shape will be (filter_size, filter_size_y). shape will be (filter_size, filter_size_y).
:type filter_size_y: int | None :type filter_size_y: int | None
:param num_filters: Each filter group's number of filter :param num_filters: Each filter group's number of filter
:param act: Activation type. ReluActivation is the default. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: Group size of filters. :param groups: Group size of filters.
:type groups: int :type groups: int
...@@ -3255,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): ...@@ -3255,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
:param input: Input layers. It could be a LayerOutput or list/tuple of :param input: Input layers. It could be a LayerOutput or list/tuple of
LayerOutput. LayerOutput.
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
...@@ -3313,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -3313,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
:type name: basestring :type name: basestring
:param input: input layers or projections :param input: input layers or projections
:type input: list | tuple | collections.Sequence :type input: list | tuple | collections.Sequence
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3408,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -3408,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
:type a: LayerOutput :type a: LayerOutput
:param b: input sequence layer :param b: input sequence layer
:type b: LayerOutput :type b: LayerOutput
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3574,30 +3574,32 @@ def lstm_step_layer(input, ...@@ -3574,30 +3574,32 @@ def lstm_step_layer(input,
... ...
This layer has two outputs. Default output is :math:`h_t`. The other This layer has two outputs. The default output is :math:`h_t`. The other
output is :math:`o_t`, whose name is 'state' and can use output is :math:`o_t`, whose name is 'state' and users can use
:code:`get_output_layer` to extract this output. :code:`get_output_layer` to extract this output.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param size: Layer's size. NOTE: lstm layer's size, should be equal to :param size: The dimension of this layer's output, which must be
:code:`input.size/4`, and should be equal to equal to the dimension of the state.
:code:`state.size`.
:type size: int :type size: int
:param input: input layer. :math:`Wx_t + Wh_{t-1}` :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param state: State Layer. :math:`c_{t-1}` :param state: The state of the LSTM unit.
:type state: LayerOutput :type state: LayerOutput
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Gate Activation Type. SigmoidActivation is the default. :param gate_act: Activation type of the gate. SigmoidActivation is the
default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: State Activation Type. TanhActivation is the default. :param state_act: Activation type of the state. TanhActivation is the
default activation.
:type state_act: BaseActivation :type state_act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is :param bias_attr: The bias attribute. If the parameter is set to False or an object
set to True or None, the bias is initialized to zero. whose type is not ParameterAttribute, no bias is defined. If the
:type bias_attr: ParameterAttribute | None | True parameter is set to True, the bias is initialized to zero.
:param layer_attr: layer's extra attribute. :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3642,23 +3644,31 @@ def gru_step_layer(input, ...@@ -3642,23 +3644,31 @@ def gru_step_layer(input,
layer_attr=None): layer_attr=None):
""" """
:param input: :param input: The input of this layer, whose dimension can be divided by 3.
:type input: LayerOutput :type input: LayerOutput
:param output_mem: :param output_mem: A memory which memorizes the output of this layer at previous
:param size: time step.
:param act: :type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :type name: basestring
:param gate_act: Activation type of this layer's two gates. SigmoidActivation is
the default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, no bias False or an object whose type is not ParameterAttribute, no bias
is defined. If this parameter is set to True, is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: the parameter_attribute for transforming the output_mem :param param_attr: The parameter attribute. See ParameterAttribute for details.
from previous step. :type param_attr: ParameterAttribute
:param layer_attr: :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3703,25 +3713,34 @@ def gru_step_naive_layer(input, ...@@ -3703,25 +3713,34 @@ def gru_step_naive_layer(input,
param_attr=None, param_attr=None,
layer_attr=None): layer_attr=None):
""" """
GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING
and DROPOUT. and DROPOUT.
:param input: :param input: The input of this layer, whose dimensionality can be divided by 3.
:param output_mem: :param output_mem: A memory which memorizes the output of this layer at previous
:param size: time step.
:type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param act: :type name: basestring
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :param gate_act: Activation type of this layer's two gates. SigmoidActivation
is the default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, no bias False or an object whose type is not ParameterAttribute, no bias
is defined. If this parameter is set to True, is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: :param param_attr: The parameter attribute. See ParameterAttribute for details.
:param layer_attr: :type param_attr: ParameterAttribute
:return: :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if input.size % 3 != 0: if input.size % 3 != 0:
...@@ -3783,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): ...@@ -3783,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: get output layer's input. And this layer should contains :param input: The input layer. And this layer should contain
multiple outputs. multiple outputs.
:type input: LayerOutput :type input: LayerOutput
:param arg_name: Output name from input. :param arg_name: The name of the output to be extracted from the input layer.
:type arg_name: basestring :type arg_name: basestring
:param layer_attr: Layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3845,18 +3865,20 @@ def recurrent_layer(input, ...@@ -3845,18 +3865,20 @@ def recurrent_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, False or an object whose type is not ParameterAttribute,
no bias is defined. If the parameter is set to True, no bias is defined. If the parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: parameter attribute. :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3881,7 +3903,7 @@ def recurrent_layer(input, ...@@ -3881,7 +3903,7 @@ def recurrent_layer(input,
class StaticInput(object): class StaticInput(object):
""" """
StaticInput is only used in recurrent_group which defines a read-only memory StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence. and can be a sequence or non-sequence.
:param size: DEPRECATED :param size: DEPRECATED
:param is_seq: DEPRECATED :param is_seq: DEPRECATED
""" """
...@@ -3914,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3914,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
Recurrent layer group is an extremely flexible recurrent unit in Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a PaddlePaddle. As long as the user defines the calculation done within a
time step, PaddlePaddle will iterate such a recurrent calculation over time step, PaddlePaddle will iterate such a recurrent calculation over
sequence input. This is extremely usefull for attention based model, or sequence input. This is useful for attention-based models, or Neural
Neural Turning Machine like models. Turning Machine like models.
The basic usage (time steps) is: The basic usage (time steps) is:
...@@ -3937,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3937,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
demo/seqToseq/seqToseq_net.py demo/seqToseq/seqToseq_net.py
- sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf - sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf
:param step: recurrent one time step function.The input of this function is :param step: A step function which takes the input of recurrent_group as its own
input of the group. The return of this function will be input and returns values as recurrent_group's output every time step.
recurrent group's return value.
The recurrent group scatter a sequence into time steps. And The recurrent group scatters a sequence into time steps. And
for each time step, will invoke step function, and return for each time step, it will invoke step function, and return
a time step result. Then gather each time step of output into a time step result. Then gather outputs of each time step into
layer group's output. layer group's output.
:type step: callable :type step: callable
:param name: recurrent_group's name. :param name: The recurrent_group's name. It is optional.
:type name: basestring :type name: basestring
:param input: Input links array. :param input: Input links array.
...@@ -3956,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3956,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
LayerOutput will be scattered into time steps. LayerOutput will be scattered into time steps.
SubsequenceInput will be scattered into sequence steps. SubsequenceInput will be scattered into sequence steps.
StaticInput will be imported to each time step, and doesn't change StaticInput will be imported to each time step, and doesn't change
through time. It's a mechanism to access layer outside step function. over time. It's a mechanism to access layer outside step function.
:type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple
:param reverse: If reverse is set true, the recurrent unit will process the :param reverse: If reverse is set to True, the recurrent unit will process the
input sequence in a reverse order. input sequence in a reverse order.
:type reverse: bool :type reverse: bool
...@@ -4095,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -4095,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None):
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4128,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): ...@@ -4128,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input1: The first input layer name. :param input1: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param input2: The second input layer name. :param input2: The second input layer.
:type input2: LayerOutput :type input2: LayerOutput
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4171,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -4171,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param eos_id: end id of sequence :param eos_id: End id of sequence
:type eos_id: int :type eos_id: int
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4234,8 +4258,9 @@ def beam_search(step, ...@@ -4234,8 +4258,9 @@ def beam_search(step,
- machine translation : demo/seqToseq/translation/gen.conf \ - machine translation : demo/seqToseq/translation/gen.conf \
demo/seqToseq/seqToseq_net.py demo/seqToseq/seqToseq_net.py
:param name: Name of the recurrent unit that generates sequences. :param name: The name of the recurrent unit that is responsible for
:type name: base string generating sequences. It is optional.
:type name: basestring
:param step: A callable function that defines the calculation in a time :param step: A callable function that defines the calculation in a time
step, and it is applied to sequences with arbitrary length by step, and it is applied to sequences with arbitrary length by
sharing a same set of weights. sharing a same set of weights.
...@@ -4360,16 +4385,18 @@ def square_error_cost(input, ...@@ -4360,16 +4385,18 @@ def square_error_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Network prediction. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: Data label. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4402,17 +4429,20 @@ def classification_cost(input, ...@@ -4402,17 +4429,20 @@ def classification_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: input layer name. network output. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: label layer name. data_layer often. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param evaluator: Evaluator method. :param evaluator: Evaluator method. classification_error_evaluator is the default.
:param layer_attr: layer's extra attribute. :type evaluator: Evaluator method
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4465,7 +4495,7 @@ def conv_operator(img, ...@@ -4465,7 +4495,7 @@ def conv_operator(img,
Different from img_conv_layer, conv_op is an Operator, which can be used Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution. in mixed_layer. And conv_op takes two inputs to perform convolution.
The first input is the image and the second is filter kernel. It only The first input is the image and the second is filter kernel. It only
support GPU mode. supports GPU mode.
The example usage is: The example usage is:
...@@ -4477,27 +4507,31 @@ def conv_operator(img, ...@@ -4477,27 +4507,31 @@ def conv_operator(img,
num_filters=64, num_filters=64,
num_channels=64) num_channels=64)
:param img: input image :param img: The input image.
:type img: LayerOutput :type img: LayerOutput
:param filter: input filter :param filter: The input filter.
:type filter: LayerOutput :type filter: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The dimension of the filter kernel on the x axis.
:type filter_size: int :type filter_size: int
:param filter_size_y: The y dimension of a filter kernel. Since :param filter_size_y: The dimension of the filter kernel on the y axis.
PaddlePaddle now supports rectangular filters, If the parameter is not set or set to None, it will
the filter's shape can be (filter_size, filter_size_y). set to 'filter_size' automatically.
:type filter_size_y: int :type filter_size_y: int
:param num_filters: channel of output data. :param num_filters: The number of the output channels.
:type num_filters: int :type num_filters: int
:param num_channels: channel of input data. :param num_channels: The number of the input channels. If the parameter is not set
or set to None, it will be automatically set to the channel
number of the 'img'.
:type num_channels: int :type num_channels: int
:param stride: The x dimension of the stride. :param stride: The stride on the x axis.
:type stride: int :type stride: int
:param stride_y: The y dimension of the stride. :param stride_y: The stride on the y axis. If the parameter is not set or
set to None, it will be set to 'stride' automatically.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of padding. :param padding: The padding size on the x axis.
:type padding: int :type padding: int
:param padding_y: The y dimension of padding. :param padding_y: The padding size on the y axis. If the parameter is not set
or set to None, it will be set to 'padding' automatically.
:type padding_y: int :type padding_y: int
:return: A ConvOperator Object. :return: A ConvOperator Object.
:rtype: ConvOperator :rtype: ConvOperator
...@@ -4548,9 +4582,9 @@ def conv_projection(input, ...@@ -4548,9 +4582,9 @@ def conv_projection(input,
param_attr=None, param_attr=None,
trans=False): trans=False):
""" """
Different from img_conv_layer and conv_op, conv_projection is an Projection, Different from img_conv_layer and conv_op, conv_projection is a Projection,
which can be used in mixed_layer and conat_layer. It use cudnn to implement which can be used in mixed_layer and concat_layer. It uses cudnn to implement
conv and only support GPU mode. convolution and only supports GPU mode.
The example usage is: The example usage is:
...@@ -4563,32 +4597,45 @@ def conv_projection(input, ...@@ -4563,32 +4597,45 @@ def conv_projection(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The dimensions of the filter kernel. If the parameter is
:type filter_size: int set to one integer, the two dimensions on x and y axises
:param filter_size_y: The y dimension of a filter kernel. Since will be same when filter_size_y is not set. If it is set
PaddlePaddle now supports rectangular filters, to a list, the first element indicates the dimension on
the filter's shape can be (filter_size, filter_size_y). the x axis, and the second is used to specify the dimension
on the y axis when filter_size is not provided.
:type filter_size: int | tuple | list
:param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
is not set, it will be set automatically according to filter_size.
:type filter_size_y: int :type filter_size_y: int
:param num_filters: channel of output data. :param num_filters: The number of filters.
:type num_filters: int :type num_filters: int
:param num_channels: channel of input data. :param num_channels: The number of the input channels.
:type num_channels: int :type num_channels: int
:param stride: The x dimension of the stride. :param stride: The strides. If the parameter is set to one integer, the strides
:type stride: int on x and y axises will be same when stride_y is not set. If it is
:param stride_y: The y dimension of the stride. set to a list, the first element indicates the stride on the x axis,
and the second is used to specify the stride on the y axis when
stride_y is not provided.
:type stride: int | tuple | list
:param stride_y: The stride on the y axis.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of padding. :param padding: The padding sizes. If the parameter is set to one integer, the padding
:type padding: int sizes on x and y axises will be same when padding_y is not set. If it
:param padding_y: The y dimension of padding. is set to a list, the first element indicates the padding size on the
x axis, and the second is used to specify the padding size on the y axis
when padding_y is not provided.
:type padding: int | tuple | list
:param padding_y: The padding size on the y axis.
:type padding_y: int :type padding_y: int
:param groups: The group number. :param groups: The group number.
:type groups: int :type groups: int
:param param_attr: Convolution param attribute. None means default attribute :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param trans: whether it is convTrans or conv :param trans: Whether it is ConvTransProjection or ConvProjection
:type trans: bool :type trans: bool
:return: A DotMulProjection Object. :return: A Projection Object.
:rtype: DotMulProjection :rtype: ConvTransProjection | ConvProjection
""" """
if num_channels is None: if num_channels is None:
assert input.num_filters is not None assert input.num_filters is not None
...@@ -4653,13 +4700,13 @@ def pad_layer(input, ...@@ -4653,13 +4700,13 @@ def pad_layer(input,
layer_attr=None): layer_attr=None):
""" """
This operation pads zeros to the input data according to pad_c,pad_h This operation pads zeros to the input data according to pad_c,pad_h
and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding
of padding. And the input data shape is NCHW. dimension. And the input data shape is NCHW.
For example, pad_c=[2,3] means padding 2 zeros before the For example, pad_c=[2,3] means padding 2 zeros before the input data
input data and 3 zeros after the input data in channel dimension. and 3 zeros after the input data in the channel dimension. pad_h means
pad_h means padding zeros in height dimension. pad_w means padding zeros padding zeros in the height dimension. pad_w means padding zeros in the
in width dimension. width dimension.
For example, For example,
...@@ -4696,13 +4743,14 @@ def pad_layer(input, ...@@ -4696,13 +4743,14 @@ def pad_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pad_c: padding size in channel dimension. :param pad_c: The padding size in the channel dimension.
:type pad_c: list | None :type pad_c: list | None
:param pad_h: padding size in height dimension. :param pad_h: The padding size in the height dimension.
:type pad_h: list | None :type pad_h: list | None
:param pad_w: padding size in width dimension. :param pad_w: The padding size in the width dimension.
:type pad_w: list | None :type pad_w: list | None
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -4751,7 +4799,7 @@ def pad_layer(input, ...@@ -4751,7 +4799,7 @@ def pad_layer(input,
@layer_support() @layer_support()
def conv_shift_layer(a, b, name=None, layer_attr=None): def conv_shift_layer(a, b, name=None, layer_attr=None):
""" """
This layer performs cyclic convolution for two input. For example: This layer performs cyclic convolution on two inputs. For example:
- a[in]: contains M elements. - a[in]: contains M elements.
- b[in]: contains N elements (N should be odd). - b[in]: contains N elements (N should be odd).
- c[out]: contains M elements. - c[out]: contains M elements.
...@@ -4760,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -4760,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j} c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
In this formular: In this formula:
- a's index is computed modulo M. When it is negative, then get item from - a's index is computed modulo M. When it is negative, then get item from
the right side (which is the end of array) to the left. the right side (which is the end of array) to the left.
- b's index is computed modulo N. When it is negative, then get item from - b's index is computed modulo N. When it is negative, then get item from
...@@ -4774,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -4774,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: The first input of this layer.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b. :param b: The second input of this layer.
:type b: LayerOutput :type b: LayerOutput
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4809,8 +4858,8 @@ def tensor_layer(a, ...@@ -4809,8 +4858,8 @@ def tensor_layer(a,
bias_attr=None, bias_attr=None,
layer_attr=None): layer_attr=None):
""" """
This layer performs tensor operation for two input. This layer performs tensor operation on two inputs.
For example, each sample: For example:
.. math:: .. math::
y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1 y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1
...@@ -4830,22 +4879,24 @@ def tensor_layer(a, ...@@ -4830,22 +4879,24 @@ def tensor_layer(a,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: The first input of this layer.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b. :param b: The second input of this layer.
:type b: LayerOutput :type b: LayerOutput
:param size: the layer dimension. :param size: The dimension of this layer.
:type size: int. :type size: int
:param act: Activation type. LinearActivation is the default. :param act: Activation type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True, no bias is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4881,7 +4932,7 @@ def selective_fc_layer(input, ...@@ -4881,7 +4932,7 @@ def selective_fc_layer(input,
layer_attr=None): layer_attr=None):
""" """
Selectived fully connected layer. Different from fc_layer, the output Selectived fully connected layer. Different from fc_layer, the output
of this layer maybe sparse. It requires an additional input to indicate of this layer can be sparse. It requires an additional input to indicate
several selected columns for output. If the selected columns is not several selected columns for output. If the selected columns is not
specified, selective_fc_layer acts exactly like fc_layer. specified, selective_fc_layer acts exactly like fc_layer.
...@@ -4895,22 +4946,34 @@ def selective_fc_layer(input, ...@@ -4895,22 +4946,34 @@ def selective_fc_layer(input,
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param select: The select layer. The output of select layer should be a :param select: The layer to select columns to output. It should be a sparse
sparse binary matrix, and treat as the mask of selective fc. binary matrix, and is treated as the mask of selective fc. If
If is None, acts exactly like fc_layer. it is not set or set to None, selective_fc_layer acts exactly
like fc_layer.
:type select: LayerOutput :type select: LayerOutput
:param size: The layer dimension. :param size: The dimension of this layer, which should be equal to that of
the layer 'select'.
:type size: int :type size: int
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param pass_generation: The flag which indicates whether it is during generation.
:type pass_generation: bool
:param has_selected_colums: The flag which indicates whether the parameter 'select'
has been set. True is the default.
:type has_selected_colums: bool
:param mul_ratio: A ratio helps to judge how sparse the output is and determine
the computation method for speed consideration.
:type mul_ratio: float
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True, no bias is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4961,7 +5024,7 @@ def selective_fc_layer(input, ...@@ -4961,7 +5024,7 @@ def selective_fc_layer(input,
@layer_support() @layer_support()
def sampling_id_layer(input, name=None, layer_attr=None): def sampling_id_layer(input, name=None, layer_attr=None):
""" """
A layer for sampling id from multinomial distribution from the input layer. A layer for sampling id from a multinomial distribution from the input layer.
Sampling one id for one sample. Sampling one id for one sample.
The simple usage is: The simple usage is:
...@@ -4974,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None): ...@@ -4974,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None):
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -4996,8 +5060,7 @@ def slope_intercept_layer(input, ...@@ -4996,8 +5060,7 @@ def slope_intercept_layer(input,
intercept=0.0, intercept=0.0,
layer_attr=None): layer_attr=None):
""" """
This layer for applying a slope and an intercept to the input This layer for applying a slope and an intercept to the input.
element-wise. There is no activation and weight.
.. math:: .. math::
y = slope * x + intercept y = slope * x + intercept
...@@ -5012,12 +5075,13 @@ def slope_intercept_layer(input, ...@@ -5012,12 +5075,13 @@ def slope_intercept_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param slope: the scale factor. :param slope: The scale factor.
:type slope: float. :type slope: float
:param intercept: the offset. :param intercept: The offset.
:type intercept: float. :type intercept: float
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5072,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): ...@@ -5072,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
:type weights: LayerOutput :type weights: LayerOutput
:param vectors: The vector layer. :param vectors: The vector layer.
:type vectors: LayerOutput :type vectors: LayerOutput
:param size: the dimension of this layer. :param size: The dimension of this layer.
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5124,11 +5189,11 @@ def block_expand_layer(input, ...@@ -5124,11 +5189,11 @@ def block_expand_layer(input,
outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x
The expand method is the same with ExpandConvLayer, but saved the transposed The expanding method is the same with ExpandConvLayer, but saved the transposed
value. After expanding, output.sequenceStartPositions will store timeline. value. After expanding, output.sequenceStartPositions will store timeline.
The number of time steps are outputH * outputW and the dimension of each The number of time steps is outputH * outputW and the dimension of each
time step is block_y * block_x * num_channels. This layer can be used after time step is block_y * block_x * num_channels. This layer can be used after
convolution neural network, and before recurrent neural network. convolutional neural network, and before recurrent neural network.
The simple usage is: The simple usage is:
...@@ -5143,8 +5208,10 @@ def block_expand_layer(input, ...@@ -5143,8 +5208,10 @@ def block_expand_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. :param num_channels: The number of input channels. If the parameter is not set or
:type num_channels: int | None set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int
:param block_x: The width of sub block. :param block_x: The width of sub block.
:type block_x: int :type block_x: int
:param block_y: The width of sub block. :param block_y: The width of sub block.
...@@ -5158,9 +5225,10 @@ def block_expand_layer(input, ...@@ -5158,9 +5225,10 @@ def block_expand_layer(input,
:param padding_y: The padding size in vertical direction. :param padding_y: The padding size in vertical direction.
:type padding_y: int :type padding_y: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring. :type name: basestring.
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5190,12 +5258,19 @@ def block_expand_layer(input, ...@@ -5190,12 +5258,19 @@ def block_expand_layer(input,
@layer_support() @layer_support()
def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
""" """
A layer to do max out on conv layer output. A layer to do max out on convolutional layer output.
- Input: output of a conv layer. - Input: the output of a convolutional layer.
- Output: feature map size same as input. Channel is (input channel) / groups. - Output: feature map size same as the input's, and its channel number is
(input channel) / groups.
So groups should be larger than 1, and the num of channels should be able So groups should be larger than 1, and the num of channels should be able
to devided by groups. to be devided by groups.
Reference:
Maxout Networks
http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
https://arxiv.org/pdf/1312.6082v4.pdf
.. math:: .. math::
y_{si+j} = \max_k x_{gsi + sk + j} y_{si+j} = \max_k x_{gsi + sk + j}
...@@ -5205,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5205,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
0 \le j < s 0 \le j < s
0 \le k < groups 0 \le k < groups
Please refer to Paper:
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is: The simple usage is:
.. code-block:: python .. code-block:: python
...@@ -5221,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5221,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set :param num_channels: The number of input channels. If the parameter is not set or
automatically from previous output. set to None, its actual value will be automatically set to
:type num_channels: int | None the channels number of the input.
:type num_channels: int
:param groups: The group number of input layer. :param groups: The group number of input layer.
:type groups: int :type groups: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring. :type name: basestring
:param layer_attr: Extra Layer attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5260,20 +5331,20 @@ def ctc_layer(input, ...@@ -5260,20 +5331,20 @@ def ctc_layer(input,
layer_attr=None): layer_attr=None):
""" """
Connectionist Temporal Classification (CTC) is designed for temporal Connectionist Temporal Classification (CTC) is designed for temporal
classication task. That is, for sequence labeling problems where the classication task. e.g. sequence labeling problems where the
alignment between the inputs and the target labels is unknown. alignment between the inputs and the target labels is unknown.
More details can be found by referring to `Connectionist Temporal Reference:
Classification: Labelling Unsegmented Sequence Data with Recurrent Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ with Recurrent Neural Networks
icml2006_GravesFGS06.pdf>`_ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note: Note:
Considering the 'blank' label needed by CTC, you need to use Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
(num_classes + 1) as the input size. num_classes is the category number. as the size of the input, where num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer, such as And the 'blank' is the last category index. So the size of 'input' layer (e.g.
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer fc_layer with softmax activation) should be (num_classes + 1). The size of
should also be num_classes + 1. ctc_layer should also be (num_classes + 1).
The example usage is: The example usage is:
...@@ -5286,16 +5357,17 @@ def ctc_layer(input, ...@@ -5286,16 +5357,17 @@ def ctc_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring | None :type name: basestring
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5336,20 +5408,19 @@ def warp_ctc_layer(input, ...@@ -5336,20 +5408,19 @@ def warp_ctc_layer(input,
building process, PaddlePaddle will clone the source codes, build and building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory. install it to :code:`third_party/install/warpctc` directory.
More details of CTC can be found by referring to `Connectionist Temporal Reference:
Classification: Labelling Unsegmented Sequence Data with Recurrent Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ with Recurrent Neural Networks
icml2006_GravesFGS06.pdf>`_. http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note: Note:
- Let num_classes represent the category number. Considering the 'blank' - Let num_classes represents the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the input size. label needed by CTC, you need to use (num_classes + 1) as the size of
Thus, the size of both warp_ctc layer and 'input' layer should be set to warp_ctc layer.
num_classes + 1.
- You can set 'blank' to any value ranged in [0, num_classes], which - You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels. should be consistent with those used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library, - As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer. 'linear' activation is expected to be used instead in the 'input' layer.
The example usage is: The example usage is:
...@@ -5363,18 +5434,19 @@ def warp_ctc_layer(input, ...@@ -5363,18 +5434,19 @@ def warp_ctc_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring | None :type name: basestring
:param blank: the 'blank' label used in ctc :param blank: The 'blank' label used in ctc.
:type blank: int :type blank: int
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5420,23 +5492,26 @@ def crf_layer(input, ...@@ -5420,23 +5492,26 @@ def crf_layer(input,
label=label, label=label,
size=label_dim) size=label_dim)
:param input: The first input layer is the feature. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The second input layer is label. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: The category number. :param size: The category number.
:type size: int :type size: int
:param weight: The third layer is "weight" of each sample, which is an :param weight: The weight layer defines a weight for each sample in the
optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param param_attr: Parameter attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5482,9 +5557,9 @@ def crf_decoding_layer(input, ...@@ -5482,9 +5557,9 @@ def crf_decoding_layer(input,
""" """
A layer for calculating the decoding sequence of sequential conditional A layer for calculating the decoding sequence of sequential conditional
random field model. The decoding sequence is stored in output.ids. random field model. The decoding sequence is stored in output.ids.
If a second input is provided, it is treated as the ground-truth label, and If the input 'label' is provided, it is treated as the ground-truth label, and
this layer will also calculate error. output.value[i] is 1 for incorrect this layer will also calculate error. output.value[i] is 1 for an incorrect
decoding or 0 for correct decoding. decoding and 0 for the correct.
The example usage is: The example usage is:
...@@ -5495,16 +5570,18 @@ def crf_decoding_layer(input, ...@@ -5495,16 +5570,18 @@ def crf_decoding_layer(input,
:param input: The first input layer. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param size: size of this layer. :param size: The dimension of this layer.
:type size: int :type size: int
:param label: None or ground-truth label. :param label: The input label.
:type label: LayerOutput or None :type label: LayerOutput | None
:param param_attr: Parameter attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5551,8 +5628,7 @@ def nce_layer(input, ...@@ -5551,8 +5628,7 @@ def nce_layer(input,
bias_attr=None, bias_attr=None,
layer_attr=None): layer_attr=None):
""" """
Noise-contrastive estimation. This layer implements the method in the Noise-contrastive estimation.
following paper:
Reference: Reference:
A fast and simple algorithm for training neural probabilistic language A fast and simple algorithm for training neural probabilistic language
...@@ -5568,25 +5644,27 @@ def nce_layer(input, ...@@ -5568,25 +5644,27 @@ def nce_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layers. It should be a LayerOutput or a list/tuple :param input: The first input of this layer.
of LayerOutput.
:type input: LayerOutput | list | tuple | collections.Sequence :type input: LayerOutput | list | tuple | collections.Sequence
:param label: The ground truth. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight layer defines a weight for each sample in the :param weight: The weight layer defines a weight for each sample in the
mini-batch. The default value is None. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param num_classes: The class number. :param num_classes: The number of classes.
:type num_classes: int :type num_classes: int
:param param_attr: The parameter attributes. :param act: Activation type. SigmoidActivation is the default activation.
:type param_attr: ParameterAttribute|list :type act: BaseActivation
:param num_neg_samples: The number of sampled negative labels. The default :param param_attr: The parameter attribute. See ParameterAttribute for
value is 10. details.
:type param_attr: ParameterAttribute
:param num_neg_samples: The number of sampled negative labels. 10 is the
default value.
:type num_neg_samples: int :type num_neg_samples: int
:param neg_distribution: The discrete noisy distribution over the output :param neg_distribution: The discrete noisy distribution over the output
space from which num_neg_samples negative labels space from which num_neg_samples negative labels
are sampled. If this parameter is not set, a are sampled. If this parameter is not set, a
uniform distribution will be used. A user defined uniform distribution will be used. A user-defined
distribution is a list whose length must be equal distribution is a list whose length must be equal
to the num_classes. Each member of the list defines to the num_classes. Each member of the list defines
the probability of a class given input x. the probability of a class given input x.
...@@ -5596,9 +5674,10 @@ def nce_layer(input, ...@@ -5596,9 +5674,10 @@ def nce_layer(input,
no bias is defined. If this parameter is set to True, no bias is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: The LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if isinstance(input, LayerOutput): if isinstance(input, LayerOutput):
...@@ -5665,11 +5744,11 @@ def rank_cost(left, ...@@ -5665,11 +5744,11 @@ def rank_cost(left,
coeff=1.0, coeff=1.0,
layer_attr=None): layer_attr=None):
""" """
A cost Layer for learning to rank using gradient descent. Details can refer A cost Layer for learning to rank using gradient descent.
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
ICML_ranking.pdf>`_. Reference:
This layer contains at least three inputs. The weight is an optional Learning to Rank using Gradient Descent
argument, which affects the cost. http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf
.. math:: .. math::
...@@ -5700,14 +5779,16 @@ def rank_cost(left, ...@@ -5700,14 +5779,16 @@ def rank_cost(left,
:type right: LayerOutput :type right: LayerOutput
:param label: Label is 1 or 0, means positive order and reverse order. :param label: Label is 1 or 0, means positive order and reverse order.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5752,25 +5833,25 @@ def lambda_cost(input, ...@@ -5752,25 +5833,25 @@ def lambda_cost(input,
NDCG_num=8, NDCG_num=8,
max_sort_size=-1) max_sort_size=-1)
:param input: Samples of the same query should be loaded as sequence. :param input: The first input of this layer, which is often a document
samples list of the same query and whose type must be sequence.
:type input: LayerOutput :type input: LayerOutput
:param score: The 2nd input. Score of each sample. :param score: The scores of the samples.
:type input: LayerOutput :type input: LayerOutput
:param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain),
e.g., 5 for NDCG@5. It must be less than or equal to the e.g., 5 for NDCG@5. It must be less than or equal to the
minimum size of lists. minimum size of the list.
:type NDCG_num: int :type NDCG_num: int
:param max_sort_size: The size of partial sorting in calculating gradient. :param max_sort_size: The size of partial sorting in calculating gradient. If
If max_sort_size = -1, then for each list, the max_sort_size is equal to -1 or greater than the number
algorithm will sort the entire list to get gradient. of the samples in the list, then the algorithm will sort
In other cases, max_sort_size must be greater than or the entire list to compute the gradient. In other cases,
equal to NDCG_num. And if max_sort_size is greater max_sort_size must be greater than or equal to NDCG_num.
than the size of a list, the algorithm will sort the
entire list of get gradient.
:type max_sort_size: int :type max_sort_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5815,11 +5896,10 @@ def cross_entropy(input, ...@@ -5815,11 +5896,10 @@ def cross_entropy(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param weight: The cost of each sample is multiplied with each weight. :param weight: The weight layer defines a weight for each sample in the
The weight should be a layer with size=1. Note that gradient mini-batch. It is optional.
will not be calculated for weight.
:type weight: LayerOutout :type weight: LayerOutout
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -5864,7 +5944,7 @@ def cross_entropy_with_selfnorm(input, ...@@ -5864,7 +5944,7 @@ def cross_entropy_with_selfnorm(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param softmax_selfnorm_alpha: The scale factor affects the cost. :param softmax_selfnorm_alpha: The scale factor affects the cost.
:type softmax_selfnorm_alpha: float :type softmax_selfnorm_alpha: float
...@@ -5954,7 +6034,7 @@ def huber_regression_cost(input, ...@@ -5954,7 +6034,7 @@ def huber_regression_cost(input,
:param delta: The difference between the observed and predicted values. :param delta: The difference between the observed and predicted values.
:type delta: float :type delta: float
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6004,7 +6084,7 @@ def huber_classification_cost(input, ...@@ -6004,7 +6084,7 @@ def huber_classification_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6049,7 +6129,7 @@ def multi_binary_label_cross_entropy(input, ...@@ -6049,7 +6129,7 @@ def multi_binary_label_cross_entropy(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6220,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -6220,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6372,7 +6452,7 @@ def row_conv_layer(input, ...@@ -6372,7 +6452,7 @@ def row_conv_layer(input,
:param context_len: The context length equals the lookahead step number :param context_len: The context length equals the lookahead step number
plus one. plus one.
:type context_len: int :type context_len: int
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The parameter attribute. See ParameterAttribute for :param param_attr: The parameter attribute. See ParameterAttribute for
details. details.
...@@ -6494,7 +6574,8 @@ def gated_unit_layer(input, ...@@ -6494,7 +6574,8 @@ def gated_unit_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param size: The dimension of this layer's output. :param size: The dimension of this layer's output.
:type size: int :type size: int
:param act: Activation type of the projection. LinearActivation is the default. :param act: Activation type of the projection. LinearActivation is the default
activation.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -6875,7 +6956,7 @@ def img_conv3d_layer(input, ...@@ -6875,7 +6956,7 @@ def img_conv3d_layer(input,
:type filter_size: int | tuple | list :type filter_size: int | tuple | list
:param num_filters: The number of filters in each group. :param num_filters: The number of filters in each group.
:type num_filters: int :type num_filters: int
:param act: Activation type. ReluActivation is the default. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: The number of the filter groups. :param groups: The number of the filter groups.
:type groups: int :type groups: int
...@@ -6890,8 +6971,8 @@ def img_conv3d_layer(input, ...@@ -6890,8 +6971,8 @@ def img_conv3d_layer(input,
parameter is set to True, the bias is initialized to zero. parameter is set to True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: The number of input channels. If the parameter is not set or :param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to set to None, its actual value will be automatically set to
the channels number of the input . the channels number of the input.
:type num_channels: int :type num_channels: int
:param param_attr: The parameter attribute of the convolution. See ParameterAttribute for :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details. details.
...@@ -7067,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): ...@@ -7067,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None):
:type offsets: LayerOutput :type offsets: LayerOutput
:param sizes: The sizes of the sub-sequences, which should be sequence type. :param sizes: The sizes of the sub-sequences, which should be sequence type.
:type sizes: LayerOutput :type sizes: LayerOutput
:param act: Activation type, LinearActivation is the default. :param act: Activation type, LinearActivation is the default activation.
:type act: BaseActivation. :type act: BaseActivation.
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册