From 297a16986d3ed700c2d02b6a00b2012ab5bdba3b Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 14 Jun 2018 20:14:08 +0800 Subject: [PATCH] Fix doc of warpctc, array_read, edit_distance and sequence_reshape. --- python/paddle/fluid/layers/control_flow.py | 63 +- python/paddle/fluid/layers/nn.py | 742 ++++++++++++++------- 2 files changed, 522 insertions(+), 283 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index d1ea9f14856..8cd389910c8 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -13,7 +13,7 @@ # limitations under the License. import contextlib -from layer_function_generator import autodoc +from layer_function_generator import autodoc, templatedoc from tensor import assign, fill_constant from .. import core from ..framework import Program, Variable, Operator @@ -721,26 +721,22 @@ def lod_rank_table(x, level=0): return table +@templatedoc() def max_sequence_len(rank_table): - """Max Sequence Len Operator. Given a LoDRankTable object, this layer - returns the max length of a batch of sequences. In fact, a LoDRankTable - object contains a list of tuples() and - the list is already sorted by sequence length in descending order, so the - operator just returns the sequence length of the first tuple element. + """ + ${comment} + + >>> import paddle.fluid as fluid + >>> x = fluid.layers.data(name='x', shape=[10], dtype='float32', + >>> lod_level=1) + >>> rank_table = layers.lod_rank_table(x=x, level=0) + >>> max_seq_len = layers.max_sequence_len(rank_table) Args: - rank_table (Variable): Input variable which is a LoDRankTable object. + rank_table(${rank_table_type}): ${rank_table_comment}. Returns: - Variable: The max length of sequence. - - Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[10], - dtype='float32', lod_level=1) - rank_table = layers.lod_rank_table(x=x, level=0) - max_seq_len = layers.max_sequence_len(rank_table) + ${out_comment}. """ helper = LayerHelper("max_seqence_len", **locals()) res = helper.create_tmp_variable(dtype="int64") @@ -978,19 +974,38 @@ def equal(x, y, cond=None, **ignored): def array_read(array, i): - """This function performs the operation to read the data in as an + """ + This function performs the operation to read the data in as an LOD_TENSOR_ARRAY. + + .. code-block:: text + + Given: + + array = [0.6, 0.1, 0.3, 0.1] + + And: + + i = 2 + + Then: + + output = 0.3 + Args: - array (Variable|list): The input tensor that will be written to an array. - i (Variable|list): The subscript index in tensor array, that points the - place where data will be written to. + array (Variable|list): The input tensor that store data to be read. + i (Variable|list): The index of the data to be read from input array. + Returns: Variable: The tensor type variable that has the data written to it. + Examples: - .. code-block::python - tmp = fluid.layers.zeros(shape=[10], dtype='int32') - i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) - arr = layers.array_read(tmp, i=i) + .. code-block:: python + + tmp = fluid.layers.zeros(shape=[10], dtype='int32') + i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) + arr = layers.array_read(tmp, i=i) + """ helper = LayerHelper('array_read', **locals()) if not isinstance( diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index bd6ed0f30e4..eba22f59623 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -12,78 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -All layers just related to the neural network. +All layers just related to the neural network. """ from ..layer_helper import LayerHelper from ..initializer import Normal, Constant from ..framework import Variable from ..param_attr import ParamAttr -from layer_function_generator import autodoc +from layer_function_generator import autodoc, templatedoc from tensor import concat import utils +import random __all__ = [ - 'fc', - 'embedding', - 'dynamic_lstm', - 'dynamic_lstmp', - 'dynamic_gru', - 'gru_unit', - 'linear_chain_crf', - 'crf_decoding', - 'cos_sim', - 'cross_entropy', - 'square_error_cost', - 'chunk_eval', - 'sequence_conv', - 'conv2d', - 'sequence_pool', - 'sequence_softmax', - 'softmax', - 'pool2d', - 'batch_norm', - 'beam_search_decode', - 'conv2d_transpose', - 'sequence_expand', - 'lstm_unit', - 'reduce_sum', - 'reduce_mean', - 'reduce_max', - 'reduce_min', - 'reduce_prod', - 'sequence_first_step', - 'sequence_last_step', - 'dropout', - 'split', - 'ctc_greedy_decoder', - 'edit_distance', - 'l2_normalize', - 'matmul', - 'topk', - 'warpctc', - 'sequence_reshape', - 'transpose', - 'im2sequence', - 'nce', - 'beam_search', - 'row_conv', - 'multiplex', - 'layer_norm', - 'softmax_with_cross_entropy', - 'smooth_l1', - 'one_hot', - 'autoincreased_step_counter', - 'reshape', - 'lod_reset', - 'lrn', - 'pad', - 'label_smooth', - 'roi_pool', - 'dice_loss', - 'resize_bilinear', - 'gather', - 'random_crop', + 'fc', 'embedding', 'dynamic_lstm', 'dynamic_lstmp', 'dynamic_gru', + 'gru_unit', 'linear_chain_crf', 'crf_decoding', 'cos_sim', 'cross_entropy', + 'square_error_cost', 'chunk_eval', 'sequence_conv', 'conv2d', + 'sequence_pool', 'sequence_softmax', 'softmax', 'pool2d', 'batch_norm', + 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', 'lstm_unit', + 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', 'reduce_prod', + 'sequence_first_step', 'sequence_last_step', 'dropout', 'split', + 'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'topk', + 'warpctc', 'sequence_reshape', 'transpose', 'im2sequence', 'nce', + 'beam_search', 'row_conv', 'multiplex', 'layer_norm', + 'softmax_with_cross_entropy', 'smooth_l1', 'one_hot', + 'autoincreased_step_counter', 'reshape', 'lod_reset', 'lrn', 'pad', + 'label_smooth', 'roi_pool', 'dice_loss', 'image_resize', + 'image_resize_short', 'resize_bilinear', 'gather', 'random_crop', 'mean_iou' ] @@ -92,7 +47,6 @@ def fc(input, num_flatten_dims=1, param_attr=None, bias_attr=None, - use_cudnn=False, use_mkldnn=False, act=None, is_test=False, @@ -219,6 +173,7 @@ def embedding(input, have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. is_sparse(bool): The flag indicating whether to use sparse update. + is_distributed (bool): Whether to run lookup table from remote parameter server. padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters it in :attr:`input`. If @@ -258,9 +213,10 @@ def embedding(input, return tmp -# TODO(qijun): expose H0 and C0 def dynamic_lstm(input, size, + h_0=None, + c_0=None, param_attr=None, bias_attr=None, use_peepholes=True, @@ -321,6 +277,13 @@ def dynamic_lstm(input, (T X 4D), where T is the total time steps in this mini-batch, D is the hidden size. size(int): 4 * hidden size. + h_0(Variable): The initial hidden state is an optional input, default is zero. + This is a tensor with shape (N x D), where N is the + batch size and D is the hidden size. + c_0(Variable): The initial cell state is an optional input, default is zero. + This is a tensor with shape (N x D), where N is the + batch size. `h_0` and `c_0` can be NULL but only at the same time. + param_attr(ParamAttr|None): The parameter attribute for the learnable hidden-hidden weights. @@ -384,12 +347,20 @@ def dynamic_lstm(input, cell = helper.create_tmp_variable(dtype) batch_gate = helper.create_tmp_variable(dtype) batch_cell_pre_act = helper.create_tmp_variable(dtype) + inputs = {'Input': input, 'Weight': weight, 'Bias': bias} + batch_size = input.shape[0] + if h_0: + assert h_0.shape == (batch_size, size), \ + 'The shape of h0 should be (batch_size, %d)' % size + inputs['H0'] = h_0 + if c_0: + assert c_0.shape == (batch_size, size), \ + 'The shape of c0 should be (batch_size, %d)' % size + inputs['C0'] = c_0 helper.append_op( type='lstm', - inputs={'Input': input, - 'Weight': weight, - 'Bias': bias}, + inputs=inputs, outputs={ 'Hidden': hidden, 'Cell': cell, @@ -651,8 +622,9 @@ def dynamic_gru(input, :attr:`False`. gate_activation(str): The activation for update gate and reset gate. Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid". - activation(str): The activation for candidate hidden state. + candidate_activation(str): The activation for candidate hidden state. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". + h_0 (Variable): The hidden output of the first time step. Returns: Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \ @@ -673,11 +645,13 @@ def dynamic_gru(input, attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) bias = helper.create_parameter( attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True) + batch_size = input.shape[0] inputs = {'Input': input, 'Weight': weight, 'Bias': bias} if h_0 != None: assert h_0.shape == ( - size, size), 'The shape of h0 should be(%d, %d)' % (size, size) - inputs['h0'] = h_0 + batch_size, size + ), 'The shape of h0 should be(batch_size, %d)' % size + inputs['H0'] = h_0 hidden = helper.create_tmp_variable(dtype) batch_gate = helper.create_tmp_variable(dtype) @@ -799,7 +773,22 @@ def gru_unit(input, return updated_hidden, reset_hidden_pre, gate +@templatedoc() def linear_chain_crf(input, label, param_attr=None): + """ + Linear Chain CRF. + + ${comment} + + Args: + input(${emission_type}): ${emission_comment} + label(${label_type}): ${label_comment} + param_attr(ParamAttr): The attribute of the learnable parameter. + + Returns: + ${log_likelihood_comment} + + """ helper = LayerHelper('linear_chain_crf', **locals()) size = input.shape[1] transition = helper.create_parameter( @@ -825,7 +814,19 @@ def linear_chain_crf(input, label, param_attr=None): return log_likelihood +@templatedoc() def crf_decoding(input, param_attr, label=None): + """ + ${comment} + + Args: + input(${emission_type}): ${emission_comment} + param_attr(ParamAttr): The parameter attribute for training. + label(${label_type}): ${label_comment} + + Returns: + ${viterbi_path_comment} + """ helper = LayerHelper('crf_decoding', **locals()) transition = helper.get_parameter(param_attr.name) viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -843,6 +844,13 @@ def cos_sim(X, Y): """ This function performs the cosine similarity between two tensors X and Y and returns that as the output. + + Args: + X (Variable): The input X. + Y (Variable): The input Y. + + Returns: + Variable: the output of cosine(X, Y). """ helper = LayerHelper('cos_sim', **locals()) out = helper.create_tmp_variable(dtype=X.dtype) @@ -869,15 +877,15 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): unchanged. Args: - x(variable): The input tensor. - dropout_prob(float): Probability of setting units to zero. - is_test(bool): A flag indicating whether it is in test phrase or not. - seed(int): A Python integer used to create random seeds. If this - parameter is set to None, a random seed is used. - NOTE: If an integer seed is given, always the same output - units will be dropped. DO NOT use a fixed seed in training. - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + x (Variable): The input tensor. + dropout_prob (float): Probability of setting units to zero. + is_test (bool): A flag indicating whether it is in test phrase or not. + seed (int): A Python integer used to create random seeds. If this + parameter is set to None, a random seed is used. + NOTE: If an integer seed is given, always the same output + units will be dropped. DO NOT use a fixed seed in training. + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: A tensor variable. @@ -999,8 +1007,8 @@ def square_error_cost(input, label): * :math:`Out`: Output value, same shape with :math:`X`. Args: - input(Variable): Input tensor, has predictions. - label(Variable): Label tensor, has target labels. + input (Variable): Input tensor, has predictions. + label (Variable): Label tensor, has target labels. Returns: Variable: The tensor variable storing the element-wise squared error \ @@ -1029,6 +1037,7 @@ def square_error_cost(input, label): return square_out +@templatedoc() def chunk_eval(input, label, chunk_scheme, @@ -1037,6 +1046,18 @@ def chunk_eval(input, """ This function computes and outputs the precision, recall and F1-score of chunk detection. + + Args: + input (Variable): prediction output of the network. + label (Variable): label of the test data set. + chunk_scheme (str): ${chunk_scheme_comment} + num_chunk_types (int): ${num_chunk_types_comment} + excluded_chunk_types (list): ${excluded_chunk_types_comment} + + Returns: + tuple: tuple containing: (precision, recall, f1_score, + num_infer_chunks, num_label_chunks, + num_correct_chunks) """ helper = LayerHelper("chunk_eval", **locals()) @@ -1069,6 +1090,7 @@ def chunk_eval(input, num_correct_chunks) +@templatedoc() def sequence_conv(input, num_filters, filter_size=3, @@ -1081,6 +1103,19 @@ def sequence_conv(input, This function creates the op for sequence_conv, using the inputs and other convolutional configurations for the filters and stride as given in the input parameters to the function. + + Args: + input (Variable): ${x_comment} + num_filters (int): number of filters. + filter_size (int): the filter size (H and W). + filter_stride (int): stride of the filter. + padding (bool): if True, add paddings. + bias_attr (ParamAttr|None): attributes for bias + param_attr (ParamAttr|None): attributes for parameter + act (str): the activation type + + Returns: + Variable: output of sequence_conv """ # FIXME(dzh) : want to unify the argument of python layer @@ -1180,48 +1215,49 @@ def conv2d(input, - Input: - Input shape: $(N, C_{in}, H_{in}, W_{in})$ + Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` - Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ + Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)` - Output: - Output shape: $(N, C_{out}, H_{out}, W_{out})$ + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` Where .. math:: - H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ - W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 Args: - input(Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of filter. It is as same as the output - image channel. - filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. - padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. - dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. Default: dilation = 1. - groups(int): The groups number of the Conv2d Layer. According to grouped - convolution in Alex Krizhevsky's Deep CNN paper: when group=2, - the first half of the filters is only connected to the first half - of the input channels, while the second half of the filters is only - connected to the second half of the input channels. Default: groups=1 - param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None - bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None - use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act(str): Activation type. Default: None - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + input (Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size (int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + stride (int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + padding (int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + dilation (int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + groups (int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None + bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + use_mkldnn (bool): Use mkldnn kernels or not. + act (str): Activation type. Default: None + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The tensor variable storing the convolution and \ @@ -1379,7 +1415,7 @@ def sequence_pool(input, pool_type): def sequence_first_step(input): """ - This funciton get the first step of sequence. + This function gets the first step of sequence. .. code-block:: text @@ -1412,7 +1448,7 @@ def sequence_first_step(input): def sequence_last_step(input): """ - This funciton get the last step of sequence. + This function gets the last step of sequence. .. code-block:: text @@ -1456,6 +1492,22 @@ def pool2d(input, """ This function adds the operator for pooling in 2 dimensions, using the pooling configurations mentioned in input parameters. + + Args: + input (Variable): ${input_comment} + pool_size (int): ${ksize_comment} + pool_type (str): ${pooling_type_comment} + pool_stride (int): stride of the pooling layer. + pool_padding (int): padding size. + global_pooling (bool): ${global_pooling_comment} + use_cudnn (bool): ${use_cudnn_comment} + ceil_mode (bool): ${ceil_mode_comment} + use_mkldnn (bool): ${use_mkldnn_comment} + name (str): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: output of pool2d layer. """ if pool_type not in ["max", "avg"]: raise ValueError( @@ -1513,6 +1565,25 @@ def batch_norm(input, """ This function helps create an operator to implement the BatchNorm layer using the configurations from the input parameters. + + Args: + input (Variable): the input variable. + act (str): activation type + is_test (bool): whether to run batch_norm as test mode. + momentum (float): momentum + epsilon (float): epsilon, default 1e-05 + param_attr (ParamAttr|None): attributes for parameter + bias_attr (ParamAttr|None): attributes for bias + data_layout (str): data layout, default NCHW + in_place (bool): if True, do not create tmp variable + use_mkldnn (bool): ${use_mkldnn_comment} + name (str): The name of this layer. It is optional. + moving_mean_name (str): The name of moving mean variable name, optional. + moving_variance_name (str): The name of moving variance name, optional. + do_model_average_for_mean_and_var (bool): + + Returns: + Variable: output of batch_norm layer. """ helper = LayerHelper('batch_norm', **locals()) dtype = helper.input_dtype() @@ -1640,6 +1711,7 @@ def layer_norm(input, bias_attr(ParamAttr|None): The parameter attribute for the learnable bias :math:`b`. act(str): Activation to be applied to the output of layer normalizaiton. + name (str): The name of this layer. It is optional. Returns: Variable: A tensor variable with the same shape as the input. @@ -1691,6 +1763,17 @@ def layer_norm(input, def beam_search_decode(ids, scores, name=None): + """ + ${beam_search_decode} + + Args: + ids (Variable): ${ids_comment} + scores (Variable): ${scores_comment} + name (str): The name of this layer. It is optional. + + Returns: + tuple: a tuple of two output variable: sentence_ids, sentence_scores + """ helper = LayerHelper('beam_search_decode', **locals()) sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) @@ -1766,46 +1849,46 @@ def conv2d_transpose(input, W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 Args: - input(Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of the filter. It is as same as the output - image channel. - output_size(int|tuple|None): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). This - parameter only works when filter_size is None. - filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. None if use output size to - calculate filter_size. - padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. - stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. - dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. Default: dilation = 1. - groups(int): The groups number of the Conv2d transpose layer. Inspired by - grouped convolution in Alex Krizhevsky's Deep CNN paper, in which - when group=2, the first half of the filters is only connected to the - first half of the input channels, while the second half of the - filters is only connected to the second half of the input channels. - Default: groups=1 - param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. - Default: None - bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None - use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act(str): Activation type. Default: None - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of the filter. It is as same as the output + image channel. + output_size(int|tuple|None): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). This + parameter only works when filter_size is None. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. None if use output size to + calculate filter_size. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + dilation(int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + groups(int): The groups number of the Conv2d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: groups=1 + param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. + Default: None + bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None + use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act(str): Activation type. Default: None + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: - Variable: The tensor variable storing the convolution transpose result. + Variable: The tensor variable storing the convolution transpose result. Raises: - ValueError: If the shapes of input, filter_size, stride, padding and - groups mismatch. + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. Examples: .. code-block:: python @@ -1942,6 +2025,17 @@ def sequence_expand(x, y, ref_level=-1, name=None): def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): ''' This function implements the beam search algorithm. + + Args: + pre_ids (Variable): ${pre_ids_comment} + ids (Variable): ${ids_comment} + scores (Variable): ${scores_comment} + beam_size (int): ${beam_size_comment} + end_id (int): ${end_id_comment} + level (int): ${level_comment} + + Returns: + tuple: a tuple of beam_search output variables: selected_ids, selected_scores ''' helper = LayerHelper('beam_search', **locals()) score_type = scores.dtype @@ -2437,19 +2531,21 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): The l2 normalize layer normalizes `x` along dimension `axis` using an L2 norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes - output = x / sqrt(max(sum(x**2), epsilon)) + .. math:: + y = \frac{x}{ \sqrt{\sum {x^2} + epsion }} For `x` with more dimensions, this layer independently normalizes each 1-D slice along dimension `axis`. Args: - x(Variable|list): The input tensor to l2_normalize layer. - axis(int): Dimension along which to normalize the input. - epsilon(float): A lower bound value for `x`'s l2 norm. sqrt(epsilon) will - be used as the divisor if the l2 norm of `x` is less than - sqrt(epsilon). - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + x(Variable|list): The input tensor to l2_normalize layer. + axis(int): The axis on which to apply normalization. If `axis < 0`, + the dimension to normalization is rank(X) + axis. -1 is the + last dimension. + epsilon(float): The epsilon value is used to avoid division by zero, + the defalut value is 1e-10. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: @@ -2468,46 +2564,17 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): axis = 0 helper = LayerHelper("l2_normalize", **locals()) - square = helper.create_tmp_variable(dtype=x.dtype) - helper.append_op(type="square", inputs={"X": x}, outputs={"Out": square}) - - reduced_sum = helper.create_tmp_variable(dtype=x.dtype) + out = helper.create_tmp_variable(dtype=x.dtype) + norm = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( - type="reduce_sum", - inputs={"X": square}, - outputs={"Out": reduced_sum}, + type="norm", + inputs={"X": x}, + outputs={"Out": out, + "Norm": norm}, attrs={ - "dim": [1] if axis is None else [axis], - "keep_dim": True, - "reduce_all": False + "axis": 1 if axis is None else axis, + "epsilon": epsilon, }) - - # TODO(caoying) A lower bound value epsilon for the norm is needed to - # imporve the numeric stability of reciprocal. This requires a maximum_op. - rsquare = helper.create_tmp_variable(dtype=x.dtype) - helper.append_op( - type="reciprocal", inputs={"X": reduced_sum}, outputs={"Out": rsquare}) - - # TODO(caoying) the current elementwise_mul operator does not support a - # general broadcast rule which broadcasts input(Y) to have the same - # dimension with Input(X) starting from a specified dimension. So this - # exanpsion is requred. Once a general broadcast rule is spported, this - # expanding canbe removed. - rsquare_expanded = helper.create_tmp_variable(dtype=x.dtype) - expand_times = [1] * len(x.shape) - expand_times[axis] = int(x.shape[axis]) - helper.append_op( - type="expand", - inputs={"X": rsquare}, - outputs={"Out": rsquare_expanded}, - attrs={"expand_times": expand_times}) - - out = helper.create_tmp_variable(dtype=x.dtype) - helper.append_op( - type="elementwise_mul", - inputs={"X": x, - "Y": rsquare_expanded}, - outputs={"Out": out}) return out @@ -2666,8 +2733,7 @@ def topk(input, k, name=None): return values, indices -def edit_distance(input, label, normalized=True, ignored_tokens=None, - name=None): +def edit_distance(input, label, normalized=True, ignored_tokens=None): """ EditDistance operator computes the edit distances between a batch of hypothesis strings and their references. Edit distance, also called @@ -2681,26 +2747,23 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, "kitten" -> "sitten" -> "sittin" -> "sitting" - Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with + The input is a LoDTensor consisting of all the hypothesis strings with the total number denoted by `batch_size`, and the separation is specified by the LoD information. And the `batch_size` reference strings are arranged - in order in the same way in the LoDTensor Input(Refs). + in order in the same way in the input LoDTensor. - Output(Out) contains the `batch_size` results and each stands for the edit + The output contains the `batch_size` results and each stands for the edit distance for a pair of strings respectively. If Attr(normalized) is true, the edit distance will be divided by the length of reference string. Args: - input(Variable): The indices for hypothesis strings. - label(Variable): The indices for reference strings. - - normalized(bool): Indicated whether to normalize the edit distance by + normalized(bool, default True): Indicated whether to normalize the edit distance by the length of reference string. - - ignored_tokens(list of int): Tokens that should be removed before + ignored_tokens(list, default None): Tokens that should be removed before calculating edit distance. + name (str): The name of this layer. It is optional. Returns: Variable: sequence-to-sequence edit distance in shape [batch_size, 1]. @@ -2710,7 +2773,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, x = fluid.layers.data(name='x', shape=[8], dtype='float32') y = fluid.layers.data(name='y', shape=[7], dtype='float32') - cost = fluid.layers.edit_distance(input=x,label=y) """ helper = LayerHelper("edit_distance", **locals()) @@ -2790,10 +2852,10 @@ def ctc_greedy_decoder(input, blank, name=None): where Lp is the sum of all input sequences' length and num_classes is the true number of classes. (not including the blank label). - blank(int): the blank label index of Connectionist Temporal Classification (CTC) loss, which is in thehalf-opened interval [0, num_classes + 1). + name (str): The name of this layer. It is optional. Returns: Variable: CTC greedy decode result. If all the sequences in result were @@ -2830,35 +2892,33 @@ def warpctc(input, label, blank=0, norm_by_times=False): input tensor. Args: - input(Variable): (LodTensor, default: LoDTensor), - the unscaled probabilities of variable-length sequences, + input (Variable): The unscaled probabilities of variable-length sequences, which is a 2-D Tensor with LoD information. It's shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences' length and num_classes is the true number of classes. (not including the blank label). - label(Variable): (LodTensor, default: LoDTensor), the ground truth - of variable-length sequence, which is a 2-D Tensor with LoD - information. It is of the shape [Lg, 1], where Lg is th sum of - all labels' length. - blank: (int, default: 0), the blank label index of Connectionist + label (Variable): The ground truth of variable-length sequence, + which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1], + where Lg is th sum of all labels' length. + blank (int, default 0): The blank label index of Connectionist Temporal Classification (CTC) loss, which is in the half-opened interval [0, num_classes + 1). - norm_by_times: (bool, default: false), whether to normalize - the gradients by the number of time-step, which is also the - sequence's length. There is no need to normalize the gradients - if warpctc layer was follewed by a mean_op. + norm_by_times(bool, default false): Whether to normalize the gradients + by the number of time-step, which is also the sequence's length. + There is no need to normalize the gradients if warpctc layer was + follewed by a mean_op. Returns: Variable: The Connectionist Temporal Classification (CTC) loss, which is a 2-D Tensor of the shape [batch_size, 1]. Examples: + .. code-block:: python - y = layers.data( - name='y', shape=[11, 8], dtype='float32', lod_level=1) - y_predict = layers.data( - name='y_predict', shape=[11, 1], dtype='float32') - cost = layers.warpctc(input=y_predict, label=y) + + label = layers.data(shape=[11, 8], dtype='float32', lod_level=1) + predict = layers.data(shape=[11, 1], dtype='float32') + cost = layers.warpctc(input=predict, label=label) """ helper = LayerHelper('warpctc', **locals()) @@ -2888,16 +2948,21 @@ def sequence_reshape(input, new_dim): x is a LoDTensor: x.lod = [[0, 2, 6]] - x.data = [[1, 2], [3, 4], - [5, 6], [7, 8], [9, 10], [11, 12]] + x.data = [[1, 2], [3, 4], + [5, 6], [7, 8], + [9, 10], [11, 12]] x.dims = [6, 2] set new_dim = 4 then out is a LoDTensor: + out.lod = [[0, 1, 3]] - out.data = [[1, 2, 3, 4], - [5, 6, 7, 8], [9, 10, 11, 12]] + + out.data = [[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12]] + out.dims = [3, 4] Currently, only 1-level LoDTensor is supported and please make sure @@ -2905,18 +2970,18 @@ def sequence_reshape(input, new_dim): no remainder for each sequence. Args: - input (Variable): (LodTensor, default: LoDTensor), a 2-D LoDTensor - with shape being [N, M] where M for dimension. - new_dim (int): New dimension which the input LoDTensor is reshaped to. + + input (Variable): A 2-D LoDTensor with shape being [N, M] where M for dimension. + new_dim (int): New dimension that the input LoDTensor is reshaped to. Returns: + Variable: Reshaped LoDTensor according to new dimension. Examples: .. code-block:: python - x = fluid.layers.data(name='x', shape=[5, 20], - dtype='float32', lod_level=1) + x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1) x_reshaped = layers.sequence_reshape(input=x, new_dim=10) """ helper = LayerHelper('sequence_reshape', **locals()) @@ -2929,7 +2994,10 @@ def sequence_reshape(input, new_dim): return out -@autodoc() +# FIXME(wuyi): let docstring_checker.py understand @autodoc. +# For now, the comments in c++ use types like Tensor, but in python side +# the type is often "Variable", and arguments may vary. +@templatedoc(op_type="nce") def nce(input, label, num_total_classes, @@ -2937,6 +3005,21 @@ def nce(input, param_attr=None, bias_attr=None, num_neg_samples=None): + """ + ${comment} + + Args: + input (Variable): input variable. + label (Variable): label. + num_total_classes (int):${num_total_classes_comment} + sample_weight (int): ${sample_weight_comment} + param_attr (ParamAttr|None): attributes for parameter + bias_attr (ParamAttr|None): attributes for bias + num_neg_samples (int): ${num_neg_samples_comment} + + Returns: + Variable: output of nce layer. + """ helper = LayerHelper('nce', **locals()) assert isinstance(input, Variable) dim = input.shape[1] @@ -2994,8 +3077,9 @@ def transpose(x, perm, name=None): perm[i]-th dimension of `input`. Args: - input (Variable): (Tensor), A Tensor. - perm (list): A permutation of the dimensions of `input`. + x (Variable): The input Tensor. + perm (list): A permutation of the dimensions of `input`. + name (str): The name of this layer. It is optional. Returns: Variable: A transposed Tensor. @@ -3228,9 +3312,9 @@ def multiplex(inputs, index): row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`. Args: - inputs (list): A list of variables to gather from. All variables have the + inputs (list): A list of variables to gather from. All variables have the same shape and the rank is at least 2. - index (Variable): Tensor, index variable which is a 2-D tensor + index (Variable): Tensor, index variable which is a 2-D tensor with shape [M, 1] where M is the batch size. Returns: @@ -3429,7 +3513,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): begin(int): The first value of this counter. step(int): The increment step between each execution. - Returns(Variable): The global run counter. + Returns: + Variable: The global run counter. """ helper = LayerHelper('global_step_counter') if counter_name is None: @@ -3490,7 +3575,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): the corresponding dimension of x. Args: - input(variable): The input tensor. + x(variable): The input tensor. shape(list): The new shape. At most one dimension of the new shape can be -1. actual_shape(variable): An optional input. If provided, reshape @@ -3502,8 +3587,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): inplace(bool): If this flag is set true, a new output tensor is created whose data is copied from input x, otherwise the output shares data with input without copying. + name (str): The name of this layer. It is optional. - Returns(variable): The output tensor. + Returns: + Variable: The output tensor. Examples: .. code-block:: python @@ -3929,22 +4016,25 @@ def dice_loss(input, label, epsilon=0.00001): return reduce_mean(dice_score) -def resize_bilinear(input, out_shape=None, scale=None, name=None): +def image_resize(input, + out_shape=None, + scale=None, + name=None, + resample='BILINEAR'): """ - The mathematical meaning of resize bilinear layer is - Bilinear interpolation. - Bilinear interpolation is an extension of linear interpolation for - interpolating functions of two variables (e.g. H-direction and - W-direction in this layer) on a rectilinear 2D grid. + Resize a batch of images. - For details, please refer to Wikipedia: - https://en.wikipedia.org/wiki/Bilinear_interpolation + The input must be a tensor of the shape (num_batches, channels, in_h, in_w), + and the resizing only applies on the last two dimensions(hight and width). + + Supporting resample methods: + 'BILINEAR' : Bilinear interpolation Args: - input (Variable): The input tensor of resize bilinear layer, + input (Variable): The input tensor of image resize layer, This is a 4-D tensor of the shape (num_batches, channels, in_h, in_w). - out_shape(list|tuple|Variable|None): Output shape of resize bilinear + out_shape(list|tuple|Variable|None): Output shape of image resize layer, the shape is (out_h, out_w). Default: None scale(float|None): The multiplier for the input height or width. @@ -3953,6 +4043,8 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): Default: None name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. + resample(str): The resample method. It can only be 'BILINEAR' currently. + Default: 'BILINEAR' Returns: out (Variable): The output is a 4-D tensor of the shape @@ -3961,8 +4053,12 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): Examples: .. code-block:: python - out = fluid.layers.resize_bilinear(input, out_shape=[12, 12]) + out = fluid.layers.image_resize(input, out_shape=[12, 12]) """ + resample_methods = {'BILINEAR': 'bilinear_interp'} + if resample not in resample_methods: + raise ValueError( + "The 'resample' of image_resize can only be 'BILINEAR' currently.") if out_shape is None and scale is None: raise ValueError("One of out_shape and scale must not be None") helper = LayerHelper('bilinear_interp', **locals()) @@ -3990,7 +4086,7 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): out = helper.create_tmp_variable(dtype) helper.append_op( - type="bilinear_interp", + type=resample_methods[resample], inputs=inputs, outputs={"Out": out}, attrs={"out_h": out_h, @@ -3998,6 +4094,62 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): return out +@templatedoc(op_type="bilinear_interp") +def resize_bilinear(input, out_shape=None, scale=None, name=None): + """ + ${comment} + + Args: + input(${x_type}): ${x_comment}. + + out_shape(${out_size_type}): ${out_size_comment}. + + scale(float|None): The multiplier for the input height or width. At + least one of out_shape or scale must be set. And out_shape has + a higher priority than scale. Default: None. + + name(str|None): The output variable name. + + Returns: + ${out_comment}. + """ + + return image_resize(input, out_shape, scale, name, 'BILINEAR') + + +def image_resize_short(input, out_short_len, resample='BILINEAR'): + """ + Resize a batch of images. The short edge of input images will be + resized to the given 'out_short_len'. The long edge of input images + will be resized proportionately to make images' length-width ratio + constant. + + Args: + input (Variable): The input tensor of image resize layer, + This is a 4-D tensor of the shape + (num_batches, channels, in_h, in_w). + out_short_len(int): The length of output images' short edge. + resample (str): resample method, default: BILINEAR. + + Returns: + out (Variable): The output is a 4-D tensor of the shape + (num_batches, channls, out_h, out_w). + """ + in_shape = input.shape + if len(in_shape) != 4: + raise ValueError( + "The rank of input must be 4 (num_batches, channels, in_h, in_w).") + hw = in_shape[2:4] + short_idx = hw.index(min(hw)) + long_idx = 1 - short_idx + out_shape = list(hw) + out_shape[short_idx] = out_short_len + out_shape[long_idx] = int( + float(out_shape[long_idx]) * (float(out_short_len) / float(hw[ + short_idx])) + 0.5) + return image_resize(input=input, out_shape=out_shape, resample=resample) + + def gather(input, index): """ Output is obtained by gathering entries of the outer-most dimension @@ -4005,7 +4157,7 @@ def gather(input, index): .. math:: - Out = X[Index] + Out = X[Index] .. code-block:: text @@ -4013,8 +4165,8 @@ def gather(input, index): Given: - X = [[1, 2], - [3, 4], + X = [[1, 2], + [3, 4], [5, 6]] Index = [1, 2] @@ -4032,6 +4184,7 @@ def gather(input, index): output (Variable): The output is a tensor with the same rank as input. Examples: + .. code-block:: python output = fluid.layers.gather(x, index) @@ -4047,10 +4200,31 @@ def gather(input, index): return out -def random_crop(input, shape, seed=1): +@templatedoc() +def random_crop(x, shape, seed=None): + """ + ${comment} + + Examples: + >>> img = fluid.layers.data("img", [3, 256, 256]) + >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) + + Args: + x(${x_type}): ${x_comment} + shape(${shape_type}): ${shape_comment} + seed(int|${seed_type}|None): ${seed_comment} By default, the seed will + get from `random.randint(-65536, 65535)`. + + Returns: + ${out_comment} + + """ helper = LayerHelper("random_crop", **locals()) dtype = helper.input_dtype() out = helper.create_tmp_variable(dtype) + if seed is None: + seed = random.randint(-65536, 65535) + if isinstance(seed, int): seed_value = seed seed = helper.create_tmp_variable(dtype="int64") @@ -4069,9 +4243,59 @@ def random_crop(input, shape, seed=1): seed_out = helper.create_tmp_variable(dtype="int64") helper.append_op( type="random_crop", - inputs={"X": input, + inputs={"X": x, "Seed": seed}, outputs={"Out": out, "SeedOut": seed_out}, attrs={"shape": shape}) return out + + +def mean_iou(input, label, num_classes): + """ + Mean Intersection-Over-Union is a common evaluation metric for + semantic image segmentation, which first computes the IOU for each + semantic class and then computes the average over classes. + IOU is defined as follows: + + .. math:: + + IOU = true_positive / (true_positive + false_positive + false_negative). + + The predictions are accumulated in a confusion matrix and mean-IOU + is then calculated from it. + + + Args: + input (Variable): A Tensor of prediction results for semantic labels with type int32 or int64. + label (Variable): A Tensor of ground truth labels with type int32 or int64. + Its shape should be the same as input. + + Returns: + mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1]. + out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class. + out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. + + + Examples: + + .. code-block:: python + + iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes) + """ + helper = LayerHelper('mean_iou', **locals()) + dtype = helper.input_dtype() + out_mean_iou = helper.create_tmp_variable(dtype='float32') + out_wrong = helper.create_tmp_variable(dtype='int32') + out_correct = helper.create_tmp_variable(dtype='int32') + helper.append_op( + type="mean_iou", + inputs={"predictions": input, + "labels": label}, + outputs={ + "out_mean_iou": out_mean_iou, + "out_wrong": out_wrong, + "out_correct": out_correct + }, + attrs={"num_classes": num_classes}) + return out_mean_iou, out_wrong, out_correct -- GitLab