提交 297a1698 编写于 作者: W wanghaoshuang

Fix doc of warpctc, array_read, edit_distance and sequence_reshape.

上级 e0a8c584
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import contextlib import contextlib
from layer_function_generator import autodoc from layer_function_generator import autodoc, templatedoc
from tensor import assign, fill_constant from tensor import assign, fill_constant
from .. import core from .. import core
from ..framework import Program, Variable, Operator from ..framework import Program, Variable, Operator
...@@ -721,26 +721,22 @@ def lod_rank_table(x, level=0): ...@@ -721,26 +721,22 @@ def lod_rank_table(x, level=0):
return table return table
@templatedoc()
def max_sequence_len(rank_table): def max_sequence_len(rank_table):
"""Max Sequence Len Operator. Given a LoDRankTable object, this layer """
returns the max length of a batch of sequences. In fact, a LoDRankTable ${comment}
object contains a list of tuples(<sequence index, sequence length>) and
the list is already sorted by sequence length in descending order, so the >>> import paddle.fluid as fluid
operator just returns the sequence length of the first tuple element. >>> x = fluid.layers.data(name='x', shape=[10], dtype='float32',
>>> lod_level=1)
>>> rank_table = layers.lod_rank_table(x=x, level=0)
>>> max_seq_len = layers.max_sequence_len(rank_table)
Args: Args:
rank_table (Variable): Input variable which is a LoDRankTable object. rank_table(${rank_table_type}): ${rank_table_comment}.
Returns: Returns:
Variable: The max length of sequence. ${out_comment}.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10],
dtype='float32', lod_level=1)
rank_table = layers.lod_rank_table(x=x, level=0)
max_seq_len = layers.max_sequence_len(rank_table)
""" """
helper = LayerHelper("max_seqence_len", **locals()) helper = LayerHelper("max_seqence_len", **locals())
res = helper.create_tmp_variable(dtype="int64") res = helper.create_tmp_variable(dtype="int64")
...@@ -978,19 +974,38 @@ def equal(x, y, cond=None, **ignored): ...@@ -978,19 +974,38 @@ def equal(x, y, cond=None, **ignored):
def array_read(array, i): def array_read(array, i):
"""This function performs the operation to read the data in as an """
This function performs the operation to read the data in as an
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
.. code-block:: text
Given:
array = [0.6, 0.1, 0.3, 0.1]
And:
i = 2
Then:
output = 0.3
Args: Args:
array (Variable|list): The input tensor that will be written to an array. array (Variable|list): The input tensor that store data to be read.
i (Variable|list): The subscript index in tensor array, that points the i (Variable|list): The index of the data to be read from input array.
place where data will be written to.
Returns: Returns:
Variable: The tensor type variable that has the data written to it. Variable: The tensor type variable that has the data written to it.
Examples: Examples:
.. code-block::python .. code-block:: python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) tmp = fluid.layers.zeros(shape=[10], dtype='int32')
arr = layers.array_read(tmp, i=i) i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i)
""" """
helper = LayerHelper('array_read', **locals()) helper = LayerHelper('array_read', **locals())
if not isinstance( if not isinstance(
......
...@@ -12,78 +12,33 @@ ...@@ -12,78 +12,33 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
All layers just related to the neural network. All layers just related to the neural network.
""" """
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable from ..framework import Variable
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from layer_function_generator import autodoc from layer_function_generator import autodoc, templatedoc
from tensor import concat from tensor import concat
import utils import utils
import random
__all__ = [ __all__ = [
'fc', 'fc', 'embedding', 'dynamic_lstm', 'dynamic_lstmp', 'dynamic_gru',
'embedding', 'gru_unit', 'linear_chain_crf', 'crf_decoding', 'cos_sim', 'cross_entropy',
'dynamic_lstm', 'square_error_cost', 'chunk_eval', 'sequence_conv', 'conv2d',
'dynamic_lstmp', 'sequence_pool', 'sequence_softmax', 'softmax', 'pool2d', 'batch_norm',
'dynamic_gru', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', 'lstm_unit',
'gru_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', 'reduce_prod',
'linear_chain_crf', 'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
'crf_decoding', 'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'topk',
'cos_sim', 'warpctc', 'sequence_reshape', 'transpose', 'im2sequence', 'nce',
'cross_entropy', 'beam_search', 'row_conv', 'multiplex', 'layer_norm',
'square_error_cost', 'softmax_with_cross_entropy', 'smooth_l1', 'one_hot',
'chunk_eval', 'autoincreased_step_counter', 'reshape', 'lod_reset', 'lrn', 'pad',
'sequence_conv', 'label_smooth', 'roi_pool', 'dice_loss', 'image_resize',
'conv2d', 'image_resize_short', 'resize_bilinear', 'gather', 'random_crop', 'mean_iou'
'sequence_pool',
'sequence_softmax',
'softmax',
'pool2d',
'batch_norm',
'beam_search_decode',
'conv2d_transpose',
'sequence_expand',
'lstm_unit',
'reduce_sum',
'reduce_mean',
'reduce_max',
'reduce_min',
'reduce_prod',
'sequence_first_step',
'sequence_last_step',
'dropout',
'split',
'ctc_greedy_decoder',
'edit_distance',
'l2_normalize',
'matmul',
'topk',
'warpctc',
'sequence_reshape',
'transpose',
'im2sequence',
'nce',
'beam_search',
'row_conv',
'multiplex',
'layer_norm',
'softmax_with_cross_entropy',
'smooth_l1',
'one_hot',
'autoincreased_step_counter',
'reshape',
'lod_reset',
'lrn',
'pad',
'label_smooth',
'roi_pool',
'dice_loss',
'resize_bilinear',
'gather',
'random_crop',
] ]
...@@ -92,7 +47,6 @@ def fc(input, ...@@ -92,7 +47,6 @@ def fc(input,
num_flatten_dims=1, num_flatten_dims=1,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
use_cudnn=False,
use_mkldnn=False, use_mkldnn=False,
act=None, act=None,
is_test=False, is_test=False,
...@@ -219,6 +173,7 @@ def embedding(input, ...@@ -219,6 +173,7 @@ def embedding(input,
have two elements which indicate the size of the dictionary of have two elements which indicate the size of the dictionary of
embeddings and the size of each embedding vector respectively. embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update. is_sparse(bool): The flag indicating whether to use sparse update.
is_distributed (bool): Whether to run lookup table from remote parameter server.
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
Otherwise the given :attr:`padding_idx` indicates padding the output Otherwise the given :attr:`padding_idx` indicates padding the output
with zeros whenever lookup encounters it in :attr:`input`. If with zeros whenever lookup encounters it in :attr:`input`. If
...@@ -258,9 +213,10 @@ def embedding(input, ...@@ -258,9 +213,10 @@ def embedding(input,
return tmp return tmp
# TODO(qijun): expose H0 and C0
def dynamic_lstm(input, def dynamic_lstm(input,
size, size,
h_0=None,
c_0=None,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
use_peepholes=True, use_peepholes=True,
...@@ -321,6 +277,13 @@ def dynamic_lstm(input, ...@@ -321,6 +277,13 @@ def dynamic_lstm(input,
(T X 4D), where T is the total time steps in this (T X 4D), where T is the total time steps in this
mini-batch, D is the hidden size. mini-batch, D is the hidden size.
size(int): 4 * hidden size. size(int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights. hidden-hidden weights.
...@@ -384,12 +347,20 @@ def dynamic_lstm(input, ...@@ -384,12 +347,20 @@ def dynamic_lstm(input,
cell = helper.create_tmp_variable(dtype) cell = helper.create_tmp_variable(dtype)
batch_gate = helper.create_tmp_variable(dtype) batch_gate = helper.create_tmp_variable(dtype)
batch_cell_pre_act = helper.create_tmp_variable(dtype) batch_cell_pre_act = helper.create_tmp_variable(dtype)
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
batch_size = input.shape[0]
if h_0:
assert h_0.shape == (batch_size, size), \
'The shape of h0 should be (batch_size, %d)' % size
inputs['H0'] = h_0
if c_0:
assert c_0.shape == (batch_size, size), \
'The shape of c0 should be (batch_size, %d)' % size
inputs['C0'] = c_0
helper.append_op( helper.append_op(
type='lstm', type='lstm',
inputs={'Input': input, inputs=inputs,
'Weight': weight,
'Bias': bias},
outputs={ outputs={
'Hidden': hidden, 'Hidden': hidden,
'Cell': cell, 'Cell': cell,
...@@ -651,8 +622,9 @@ def dynamic_gru(input, ...@@ -651,8 +622,9 @@ def dynamic_gru(input,
:attr:`False`. :attr:`False`.
gate_activation(str): The activation for update gate and reset gate. gate_activation(str): The activation for update gate and reset gate.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid". Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
activation(str): The activation for candidate hidden state. candidate_activation(str): The activation for candidate hidden state.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
h_0 (Variable): The hidden output of the first time step.
Returns: Returns:
Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \ Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \
...@@ -673,11 +645,13 @@ def dynamic_gru(input, ...@@ -673,11 +645,13 @@ def dynamic_gru(input,
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True) attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True)
batch_size = input.shape[0]
inputs = {'Input': input, 'Weight': weight, 'Bias': bias} inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
if h_0 != None: if h_0 != None:
assert h_0.shape == ( assert h_0.shape == (
size, size), 'The shape of h0 should be(%d, %d)' % (size, size) batch_size, size
inputs['h0'] = h_0 ), 'The shape of h0 should be(batch_size, %d)' % size
inputs['H0'] = h_0
hidden = helper.create_tmp_variable(dtype) hidden = helper.create_tmp_variable(dtype)
batch_gate = helper.create_tmp_variable(dtype) batch_gate = helper.create_tmp_variable(dtype)
...@@ -799,7 +773,22 @@ def gru_unit(input, ...@@ -799,7 +773,22 @@ def gru_unit(input,
return updated_hidden, reset_hidden_pre, gate return updated_hidden, reset_hidden_pre, gate
@templatedoc()
def linear_chain_crf(input, label, param_attr=None): def linear_chain_crf(input, label, param_attr=None):
"""
Linear Chain CRF.
${comment}
Args:
input(${emission_type}): ${emission_comment}
label(${label_type}): ${label_comment}
param_attr(ParamAttr): The attribute of the learnable parameter.
Returns:
${log_likelihood_comment}
"""
helper = LayerHelper('linear_chain_crf', **locals()) helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1] size = input.shape[1]
transition = helper.create_parameter( transition = helper.create_parameter(
...@@ -825,7 +814,19 @@ def linear_chain_crf(input, label, param_attr=None): ...@@ -825,7 +814,19 @@ def linear_chain_crf(input, label, param_attr=None):
return log_likelihood return log_likelihood
@templatedoc()
def crf_decoding(input, param_attr, label=None): def crf_decoding(input, param_attr, label=None):
"""
${comment}
Args:
input(${emission_type}): ${emission_comment}
param_attr(ParamAttr): The parameter attribute for training.
label(${label_type}): ${label_comment}
Returns:
${viterbi_path_comment}
"""
helper = LayerHelper('crf_decoding', **locals()) helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name) transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype()) viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype())
...@@ -843,6 +844,13 @@ def cos_sim(X, Y): ...@@ -843,6 +844,13 @@ def cos_sim(X, Y):
""" """
This function performs the cosine similarity between two tensors This function performs the cosine similarity between two tensors
X and Y and returns that as the output. X and Y and returns that as the output.
Args:
X (Variable): The input X.
Y (Variable): The input Y.
Returns:
Variable: the output of cosine(X, Y).
""" """
helper = LayerHelper('cos_sim', **locals()) helper = LayerHelper('cos_sim', **locals())
out = helper.create_tmp_variable(dtype=X.dtype) out = helper.create_tmp_variable(dtype=X.dtype)
...@@ -869,15 +877,15 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): ...@@ -869,15 +877,15 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
unchanged. unchanged.
Args: Args:
x(variable): The input tensor. x (Variable): The input tensor.
dropout_prob(float): Probability of setting units to zero. dropout_prob (float): Probability of setting units to zero.
is_test(bool): A flag indicating whether it is in test phrase or not. is_test (bool): A flag indicating whether it is in test phrase or not.
seed(int): A Python integer used to create random seeds. If this seed (int): A Python integer used to create random seeds. If this
parameter is set to None, a random seed is used. parameter is set to None, a random seed is used.
NOTE: If an integer seed is given, always the same output NOTE: If an integer seed is given, always the same output
units will be dropped. DO NOT use a fixed seed in training. units will be dropped. DO NOT use a fixed seed in training.
name(str|None): A name for this layer(optional). If set None, the layer name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
Returns: Returns:
Variable: A tensor variable. Variable: A tensor variable.
...@@ -999,8 +1007,8 @@ def square_error_cost(input, label): ...@@ -999,8 +1007,8 @@ def square_error_cost(input, label):
* :math:`Out`: Output value, same shape with :math:`X`. * :math:`Out`: Output value, same shape with :math:`X`.
Args: Args:
input(Variable): Input tensor, has predictions. input (Variable): Input tensor, has predictions.
label(Variable): Label tensor, has target labels. label (Variable): Label tensor, has target labels.
Returns: Returns:
Variable: The tensor variable storing the element-wise squared error \ Variable: The tensor variable storing the element-wise squared error \
...@@ -1029,6 +1037,7 @@ def square_error_cost(input, label): ...@@ -1029,6 +1037,7 @@ def square_error_cost(input, label):
return square_out return square_out
@templatedoc()
def chunk_eval(input, def chunk_eval(input,
label, label,
chunk_scheme, chunk_scheme,
...@@ -1037,6 +1046,18 @@ def chunk_eval(input, ...@@ -1037,6 +1046,18 @@ def chunk_eval(input,
""" """
This function computes and outputs the precision, recall and This function computes and outputs the precision, recall and
F1-score of chunk detection. F1-score of chunk detection.
Args:
input (Variable): prediction output of the network.
label (Variable): label of the test data set.
chunk_scheme (str): ${chunk_scheme_comment}
num_chunk_types (int): ${num_chunk_types_comment}
excluded_chunk_types (list): ${excluded_chunk_types_comment}
Returns:
tuple: tuple containing: (precision, recall, f1_score,
num_infer_chunks, num_label_chunks,
num_correct_chunks)
""" """
helper = LayerHelper("chunk_eval", **locals()) helper = LayerHelper("chunk_eval", **locals())
...@@ -1069,6 +1090,7 @@ def chunk_eval(input, ...@@ -1069,6 +1090,7 @@ def chunk_eval(input,
num_correct_chunks) num_correct_chunks)
@templatedoc()
def sequence_conv(input, def sequence_conv(input,
num_filters, num_filters,
filter_size=3, filter_size=3,
...@@ -1081,6 +1103,19 @@ def sequence_conv(input, ...@@ -1081,6 +1103,19 @@ def sequence_conv(input,
This function creates the op for sequence_conv, using the inputs and This function creates the op for sequence_conv, using the inputs and
other convolutional configurations for the filters and stride as given other convolutional configurations for the filters and stride as given
in the input parameters to the function. in the input parameters to the function.
Args:
input (Variable): ${x_comment}
num_filters (int): number of filters.
filter_size (int): the filter size (H and W).
filter_stride (int): stride of the filter.
padding (bool): if True, add paddings.
bias_attr (ParamAttr|None): attributes for bias
param_attr (ParamAttr|None): attributes for parameter
act (str): the activation type
Returns:
Variable: output of sequence_conv
""" """
# FIXME(dzh) : want to unify the argument of python layer # FIXME(dzh) : want to unify the argument of python layer
...@@ -1180,48 +1215,49 @@ def conv2d(input, ...@@ -1180,48 +1215,49 @@ def conv2d(input,
- Input: - Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$ Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- Output: - Output:
Output shape: $(N, C_{out}, H_{out}, W_{out})$ Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where Where
.. math:: .. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args: Args:
input(Variable): The input image with [N, C, H, W] format. input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output num_filters(int): The number of filter. It is as same as the output
image channel. image channel.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple, filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W). it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. Otherwise, the filter will be a square.
stride(int|tuple): The stride size. If stride is a tuple, it must stride (int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1. stride_H = stride_W = stride. Default: stride = 1.
padding(int|tuple): The padding size. If padding is a tuple, it must padding (int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0. padding_H = padding_W = padding. Default: padding = 0.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1. dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d Layer. According to grouped groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2, convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1 connected to the second half of the input channels. Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True library is installed. Default: True
act(str): Activation type. Default: None use_mkldnn (bool): Use mkldnn kernels or not.
name(str|None): A name for this layer(optional). If set None, the layer act (str): Activation type. Default: None
will be named automatically. name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns: Returns:
Variable: The tensor variable storing the convolution and \ Variable: The tensor variable storing the convolution and \
...@@ -1379,7 +1415,7 @@ def sequence_pool(input, pool_type): ...@@ -1379,7 +1415,7 @@ def sequence_pool(input, pool_type):
def sequence_first_step(input): def sequence_first_step(input):
""" """
This funciton get the first step of sequence. This function gets the first step of sequence.
.. code-block:: text .. code-block:: text
...@@ -1412,7 +1448,7 @@ def sequence_first_step(input): ...@@ -1412,7 +1448,7 @@ def sequence_first_step(input):
def sequence_last_step(input): def sequence_last_step(input):
""" """
This funciton get the last step of sequence. This function gets the last step of sequence.
.. code-block:: text .. code-block:: text
...@@ -1456,6 +1492,22 @@ def pool2d(input, ...@@ -1456,6 +1492,22 @@ def pool2d(input,
""" """
This function adds the operator for pooling in 2 dimensions, using the This function adds the operator for pooling in 2 dimensions, using the
pooling configurations mentioned in input parameters. pooling configurations mentioned in input parameters.
Args:
input (Variable): ${input_comment}
pool_size (int): ${ksize_comment}
pool_type (str): ${pooling_type_comment}
pool_stride (int): stride of the pooling layer.
pool_padding (int): padding size.
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
ceil_mode (bool): ${ceil_mode_comment}
use_mkldnn (bool): ${use_mkldnn_comment}
name (str): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: output of pool2d layer.
""" """
if pool_type not in ["max", "avg"]: if pool_type not in ["max", "avg"]:
raise ValueError( raise ValueError(
...@@ -1513,6 +1565,25 @@ def batch_norm(input, ...@@ -1513,6 +1565,25 @@ def batch_norm(input,
""" """
This function helps create an operator to implement This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters. the BatchNorm layer using the configurations from the input parameters.
Args:
input (Variable): the input variable.
act (str): activation type
is_test (bool): whether to run batch_norm as test mode.
momentum (float): momentum
epsilon (float): epsilon, default 1e-05
param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias
data_layout (str): data layout, default NCHW
in_place (bool): if True, do not create tmp variable
use_mkldnn (bool): ${use_mkldnn_comment}
name (str): The name of this layer. It is optional.
moving_mean_name (str): The name of moving mean variable name, optional.
moving_variance_name (str): The name of moving variance name, optional.
do_model_average_for_mean_and_var (bool):
Returns:
Variable: output of batch_norm layer.
""" """
helper = LayerHelper('batch_norm', **locals()) helper = LayerHelper('batch_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -1640,6 +1711,7 @@ def layer_norm(input, ...@@ -1640,6 +1711,7 @@ def layer_norm(input,
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. bias :math:`b`.
act(str): Activation to be applied to the output of layer normalizaiton. act(str): Activation to be applied to the output of layer normalizaiton.
name (str): The name of this layer. It is optional.
Returns: Returns:
Variable: A tensor variable with the same shape as the input. Variable: A tensor variable with the same shape as the input.
...@@ -1691,6 +1763,17 @@ def layer_norm(input, ...@@ -1691,6 +1763,17 @@ def layer_norm(input,
def beam_search_decode(ids, scores, name=None): def beam_search_decode(ids, scores, name=None):
"""
${beam_search_decode}
Args:
ids (Variable): ${ids_comment}
scores (Variable): ${scores_comment}
name (str): The name of this layer. It is optional.
Returns:
tuple: a tuple of two output variable: sentence_ids, sentence_scores
"""
helper = LayerHelper('beam_search_decode', **locals()) helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) sentence_ids = helper.create_tmp_variable(dtype=ids.dtype)
sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) sentence_scores = helper.create_tmp_variable(dtype=ids.dtype)
...@@ -1766,46 +1849,46 @@ def conv2d_transpose(input, ...@@ -1766,46 +1849,46 @@ def conv2d_transpose(input,
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
Args: Args:
input(Variable): The input image with [N, C, H, W] format. input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
image channel. image channel.
output_size(int|tuple|None): The output image size. If output size is a output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This tuple, it must contain two integers, (image_H, image_W). This
parameter only works when filter_size is None. parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple, filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W). it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to Otherwise, the filter will be a square. None if use output size to
calculate filter_size. calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0. padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1. stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1. dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by groups(int): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
Default: groups=1 Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True library is installed. Default: True
act(str): Activation type. Default: None act(str): Activation type. Default: None
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
Returns: Returns:
Variable: The tensor variable storing the convolution transpose result. Variable: The tensor variable storing the convolution transpose result.
Raises: Raises:
ValueError: If the shapes of input, filter_size, stride, padding and ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch. groups mismatch.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1942,6 +2025,17 @@ def sequence_expand(x, y, ref_level=-1, name=None): ...@@ -1942,6 +2025,17 @@ def sequence_expand(x, y, ref_level=-1, name=None):
def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0):
''' '''
This function implements the beam search algorithm. This function implements the beam search algorithm.
Args:
pre_ids (Variable): ${pre_ids_comment}
ids (Variable): ${ids_comment}
scores (Variable): ${scores_comment}
beam_size (int): ${beam_size_comment}
end_id (int): ${end_id_comment}
level (int): ${level_comment}
Returns:
tuple: a tuple of beam_search output variables: selected_ids, selected_scores
''' '''
helper = LayerHelper('beam_search', **locals()) helper = LayerHelper('beam_search', **locals())
score_type = scores.dtype score_type = scores.dtype
...@@ -2437,19 +2531,21 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): ...@@ -2437,19 +2531,21 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
The l2 normalize layer normalizes `x` along dimension `axis` using an L2 The l2 normalize layer normalizes `x` along dimension `axis` using an L2
norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes
output = x / sqrt(max(sum(x**2), epsilon)) .. math::
y = \frac{x}{ \sqrt{\sum {x^2} + epsion }}
For `x` with more dimensions, this layer independently normalizes each 1-D For `x` with more dimensions, this layer independently normalizes each 1-D
slice along dimension `axis`. slice along dimension `axis`.
Args: Args:
x(Variable|list): The input tensor to l2_normalize layer. x(Variable|list): The input tensor to l2_normalize layer.
axis(int): Dimension along which to normalize the input. axis(int): The axis on which to apply normalization. If `axis < 0`,
epsilon(float): A lower bound value for `x`'s l2 norm. sqrt(epsilon) will the dimension to normalization is rank(X) + axis. -1 is the
be used as the divisor if the l2 norm of `x` is less than last dimension.
sqrt(epsilon). epsilon(float): The epsilon value is used to avoid division by zero,
name(str|None): A name for this layer(optional). If set None, the layer the defalut value is 1e-10.
will be named automatically. name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns: Returns:
...@@ -2468,46 +2564,17 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): ...@@ -2468,46 +2564,17 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
axis = 0 axis = 0
helper = LayerHelper("l2_normalize", **locals()) helper = LayerHelper("l2_normalize", **locals())
square = helper.create_tmp_variable(dtype=x.dtype) out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(type="square", inputs={"X": x}, outputs={"Out": square}) norm = helper.create_tmp_variable(dtype=x.dtype)
reduced_sum = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op( helper.append_op(
type="reduce_sum", type="norm",
inputs={"X": square}, inputs={"X": x},
outputs={"Out": reduced_sum}, outputs={"Out": out,
"Norm": norm},
attrs={ attrs={
"dim": [1] if axis is None else [axis], "axis": 1 if axis is None else axis,
"keep_dim": True, "epsilon": epsilon,
"reduce_all": False
}) })
# TODO(caoying) A lower bound value epsilon for the norm is needed to
# imporve the numeric stability of reciprocal. This requires a maximum_op.
rsquare = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type="reciprocal", inputs={"X": reduced_sum}, outputs={"Out": rsquare})
# TODO(caoying) the current elementwise_mul operator does not support a
# general broadcast rule which broadcasts input(Y) to have the same
# dimension with Input(X) starting from a specified dimension. So this
# exanpsion is requred. Once a general broadcast rule is spported, this
# expanding canbe removed.
rsquare_expanded = helper.create_tmp_variable(dtype=x.dtype)
expand_times = [1] * len(x.shape)
expand_times[axis] = int(x.shape[axis])
helper.append_op(
type="expand",
inputs={"X": rsquare},
outputs={"Out": rsquare_expanded},
attrs={"expand_times": expand_times})
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type="elementwise_mul",
inputs={"X": x,
"Y": rsquare_expanded},
outputs={"Out": out})
return out return out
...@@ -2666,8 +2733,7 @@ def topk(input, k, name=None): ...@@ -2666,8 +2733,7 @@ def topk(input, k, name=None):
return values, indices return values, indices
def edit_distance(input, label, normalized=True, ignored_tokens=None, def edit_distance(input, label, normalized=True, ignored_tokens=None):
name=None):
""" """
EditDistance operator computes the edit distances between a batch of EditDistance operator computes the edit distances between a batch of
hypothesis strings and their references. Edit distance, also called hypothesis strings and their references. Edit distance, also called
...@@ -2681,26 +2747,23 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, ...@@ -2681,26 +2747,23 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None,
"kitten" -> "sitten" -> "sittin" -> "sitting" "kitten" -> "sitten" -> "sittin" -> "sitting"
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with The input is a LoDTensor consisting of all the hypothesis strings with
the total number denoted by `batch_size`, and the separation is specified the total number denoted by `batch_size`, and the separation is specified
by the LoD information. And the `batch_size` reference strings are arranged by the LoD information. And the `batch_size` reference strings are arranged
in order in the same way in the LoDTensor Input(Refs). in order in the same way in the input LoDTensor.
Output(Out) contains the `batch_size` results and each stands for the edit The output contains the `batch_size` results and each stands for the edit
distance for a pair of strings respectively. If Attr(normalized) is true, distance for a pair of strings respectively. If Attr(normalized) is true,
the edit distance will be divided by the length of reference string. the edit distance will be divided by the length of reference string.
Args: Args:
input(Variable): The indices for hypothesis strings. input(Variable): The indices for hypothesis strings.
label(Variable): The indices for reference strings. label(Variable): The indices for reference strings.
normalized(bool, default True): Indicated whether to normalize the edit distance by
normalized(bool): Indicated whether to normalize the edit distance by
the length of reference string. the length of reference string.
ignored_tokens(list<int>, default None): Tokens that should be removed before
ignored_tokens(list of int): Tokens that should be removed before
calculating edit distance. calculating edit distance.
name (str): The name of this layer. It is optional.
Returns: Returns:
Variable: sequence-to-sequence edit distance in shape [batch_size, 1]. Variable: sequence-to-sequence edit distance in shape [batch_size, 1].
...@@ -2710,7 +2773,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, ...@@ -2710,7 +2773,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None,
x = fluid.layers.data(name='x', shape=[8], dtype='float32') x = fluid.layers.data(name='x', shape=[8], dtype='float32')
y = fluid.layers.data(name='y', shape=[7], dtype='float32') y = fluid.layers.data(name='y', shape=[7], dtype='float32')
cost = fluid.layers.edit_distance(input=x,label=y) cost = fluid.layers.edit_distance(input=x,label=y)
""" """
helper = LayerHelper("edit_distance", **locals()) helper = LayerHelper("edit_distance", **locals())
...@@ -2790,10 +2852,10 @@ def ctc_greedy_decoder(input, blank, name=None): ...@@ -2790,10 +2852,10 @@ def ctc_greedy_decoder(input, blank, name=None):
where Lp is the sum of all input sequences' length and where Lp is the sum of all input sequences' length and
num_classes is the true number of classes. (not num_classes is the true number of classes. (not
including the blank label). including the blank label).
blank(int): the blank label index of Connectionist Temporal blank(int): the blank label index of Connectionist Temporal
Classification (CTC) loss, which is in thehalf-opened Classification (CTC) loss, which is in thehalf-opened
interval [0, num_classes + 1). interval [0, num_classes + 1).
name (str): The name of this layer. It is optional.
Returns: Returns:
Variable: CTC greedy decode result. If all the sequences in result were Variable: CTC greedy decode result. If all the sequences in result were
...@@ -2830,35 +2892,33 @@ def warpctc(input, label, blank=0, norm_by_times=False): ...@@ -2830,35 +2892,33 @@ def warpctc(input, label, blank=0, norm_by_times=False):
input tensor. input tensor.
Args: Args:
input(Variable): (LodTensor, default: LoDTensor<float>), input (Variable): The unscaled probabilities of variable-length sequences,
the unscaled probabilities of variable-length sequences,
which is a 2-D Tensor with LoD information. which is a 2-D Tensor with LoD information.
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes. sequences' length and num_classes is the true number of classes.
(not including the blank label). (not including the blank label).
label(Variable): (LodTensor, default: LoDTensor<int>), the ground truth label (Variable): The ground truth of variable-length sequence,
of variable-length sequence, which is a 2-D Tensor with LoD which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
information. It is of the shape [Lg, 1], where Lg is th sum of where Lg is th sum of all labels' length.
all labels' length. blank (int, default 0): The blank label index of Connectionist
blank: (int, default: 0), the blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1). half-opened interval [0, num_classes + 1).
norm_by_times: (bool, default: false), whether to normalize norm_by_times(bool, default false): Whether to normalize the gradients
the gradients by the number of time-step, which is also the by the number of time-step, which is also the sequence's length.
sequence's length. There is no need to normalize the gradients There is no need to normalize the gradients if warpctc layer was
if warpctc layer was follewed by a mean_op. follewed by a mean_op.
Returns: Returns:
Variable: The Connectionist Temporal Classification (CTC) loss, Variable: The Connectionist Temporal Classification (CTC) loss,
which is a 2-D Tensor of the shape [batch_size, 1]. which is a 2-D Tensor of the shape [batch_size, 1].
Examples: Examples:
.. code-block:: python .. code-block:: python
y = layers.data(
name='y', shape=[11, 8], dtype='float32', lod_level=1) label = layers.data(shape=[11, 8], dtype='float32', lod_level=1)
y_predict = layers.data( predict = layers.data(shape=[11, 1], dtype='float32')
name='y_predict', shape=[11, 1], dtype='float32') cost = layers.warpctc(input=predict, label=label)
cost = layers.warpctc(input=y_predict, label=y)
""" """
helper = LayerHelper('warpctc', **locals()) helper = LayerHelper('warpctc', **locals())
...@@ -2888,16 +2948,21 @@ def sequence_reshape(input, new_dim): ...@@ -2888,16 +2948,21 @@ def sequence_reshape(input, new_dim):
x is a LoDTensor: x is a LoDTensor:
x.lod = [[0, 2, 6]] x.lod = [[0, 2, 6]]
x.data = [[1, 2], [3, 4], x.data = [[1, 2], [3, 4],
[5, 6], [7, 8], [9, 10], [11, 12]] [5, 6], [7, 8],
[9, 10], [11, 12]]
x.dims = [6, 2] x.dims = [6, 2]
set new_dim = 4 set new_dim = 4
then out is a LoDTensor: then out is a LoDTensor:
out.lod = [[0, 1, 3]] out.lod = [[0, 1, 3]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8], [9, 10, 11, 12]] out.data = [[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]]
out.dims = [3, 4] out.dims = [3, 4]
Currently, only 1-level LoDTensor is supported and please make sure Currently, only 1-level LoDTensor is supported and please make sure
...@@ -2905,18 +2970,18 @@ def sequence_reshape(input, new_dim): ...@@ -2905,18 +2970,18 @@ def sequence_reshape(input, new_dim):
no remainder for each sequence. no remainder for each sequence.
Args: Args:
input (Variable): (LodTensor, default: LoDTensor<float>), a 2-D LoDTensor
with shape being [N, M] where M for dimension. input (Variable): A 2-D LoDTensor with shape being [N, M] where M for dimension.
new_dim (int): New dimension which the input LoDTensor is reshaped to. new_dim (int): New dimension that the input LoDTensor is reshaped to.
Returns: Returns:
Variable: Reshaped LoDTensor according to new dimension. Variable: Reshaped LoDTensor according to new dimension.
Examples: Examples:
.. code-block:: python .. code-block:: python
x = fluid.layers.data(name='x', shape=[5, 20], x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1)
dtype='float32', lod_level=1)
x_reshaped = layers.sequence_reshape(input=x, new_dim=10) x_reshaped = layers.sequence_reshape(input=x, new_dim=10)
""" """
helper = LayerHelper('sequence_reshape', **locals()) helper = LayerHelper('sequence_reshape', **locals())
...@@ -2929,7 +2994,10 @@ def sequence_reshape(input, new_dim): ...@@ -2929,7 +2994,10 @@ def sequence_reshape(input, new_dim):
return out return out
@autodoc() # FIXME(wuyi): let docstring_checker.py understand @autodoc.
# For now, the comments in c++ use types like Tensor, but in python side
# the type is often "Variable", and arguments may vary.
@templatedoc(op_type="nce")
def nce(input, def nce(input,
label, label,
num_total_classes, num_total_classes,
...@@ -2937,6 +3005,21 @@ def nce(input, ...@@ -2937,6 +3005,21 @@ def nce(input,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
num_neg_samples=None): num_neg_samples=None):
"""
${comment}
Args:
input (Variable): input variable.
label (Variable): label.
num_total_classes (int):${num_total_classes_comment}
sample_weight (int): ${sample_weight_comment}
param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias
num_neg_samples (int): ${num_neg_samples_comment}
Returns:
Variable: output of nce layer.
"""
helper = LayerHelper('nce', **locals()) helper = LayerHelper('nce', **locals())
assert isinstance(input, Variable) assert isinstance(input, Variable)
dim = input.shape[1] dim = input.shape[1]
...@@ -2994,8 +3077,9 @@ def transpose(x, perm, name=None): ...@@ -2994,8 +3077,9 @@ def transpose(x, perm, name=None):
perm[i]-th dimension of `input`. perm[i]-th dimension of `input`.
Args: Args:
input (Variable): (Tensor), A Tensor. x (Variable): The input Tensor.
perm (list): A permutation of the dimensions of `input`. perm (list): A permutation of the dimensions of `input`.
name (str): The name of this layer. It is optional.
Returns: Returns:
Variable: A transposed Tensor. Variable: A transposed Tensor.
...@@ -3228,9 +3312,9 @@ def multiplex(inputs, index): ...@@ -3228,9 +3312,9 @@ def multiplex(inputs, index):
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`. row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
Args: Args:
inputs (list): A list of variables to gather from. All variables have the inputs (list): A list of variables to gather from. All variables have the
same shape and the rank is at least 2. same shape and the rank is at least 2.
index (Variable): Tensor<int32>, index variable which is a 2-D tensor index (Variable): Tensor<int32>, index variable which is a 2-D tensor
with shape [M, 1] where M is the batch size. with shape [M, 1] where M is the batch size.
Returns: Returns:
...@@ -3429,7 +3513,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -3429,7 +3513,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
begin(int): The first value of this counter. begin(int): The first value of this counter.
step(int): The increment step between each execution. step(int): The increment step between each execution.
Returns(Variable): The global run counter. Returns:
Variable: The global run counter.
""" """
helper = LayerHelper('global_step_counter') helper = LayerHelper('global_step_counter')
if counter_name is None: if counter_name is None:
...@@ -3490,7 +3575,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -3490,7 +3575,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
the corresponding dimension of x. the corresponding dimension of x.
Args: Args:
input(variable): The input tensor. x(variable): The input tensor.
shape(list): The new shape. At most one dimension of the new shape can shape(list): The new shape. At most one dimension of the new shape can
be -1. be -1.
actual_shape(variable): An optional input. If provided, reshape actual_shape(variable): An optional input. If provided, reshape
...@@ -3502,8 +3587,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -3502,8 +3587,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
inplace(bool): If this flag is set true, a new output tensor is created inplace(bool): If this flag is set true, a new output tensor is created
whose data is copied from input x, otherwise the output whose data is copied from input x, otherwise the output
shares data with input without copying. shares data with input without copying.
name (str): The name of this layer. It is optional.
Returns(variable): The output tensor. Returns:
Variable: The output tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -3929,22 +4016,25 @@ def dice_loss(input, label, epsilon=0.00001): ...@@ -3929,22 +4016,25 @@ def dice_loss(input, label, epsilon=0.00001):
return reduce_mean(dice_score) return reduce_mean(dice_score)
def resize_bilinear(input, out_shape=None, scale=None, name=None): def image_resize(input,
out_shape=None,
scale=None,
name=None,
resample='BILINEAR'):
""" """
The mathematical meaning of resize bilinear layer is Resize a batch of images.
Bilinear interpolation.
Bilinear interpolation is an extension of linear interpolation for
interpolating functions of two variables (e.g. H-direction and
W-direction in this layer) on a rectilinear 2D grid.
For details, please refer to Wikipedia: The input must be a tensor of the shape (num_batches, channels, in_h, in_w),
https://en.wikipedia.org/wiki/Bilinear_interpolation and the resizing only applies on the last two dimensions(hight and width).
Supporting resample methods:
'BILINEAR' : Bilinear interpolation
Args: Args:
input (Variable): The input tensor of resize bilinear layer, input (Variable): The input tensor of image resize layer,
This is a 4-D tensor of the shape This is a 4-D tensor of the shape
(num_batches, channels, in_h, in_w). (num_batches, channels, in_h, in_w).
out_shape(list|tuple|Variable|None): Output shape of resize bilinear out_shape(list|tuple|Variable|None): Output shape of image resize
layer, the shape is (out_h, out_w). layer, the shape is (out_h, out_w).
Default: None Default: None
scale(float|None): The multiplier for the input height or width. scale(float|None): The multiplier for the input height or width.
...@@ -3953,6 +4043,8 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): ...@@ -3953,6 +4043,8 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
Default: None Default: None
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
resample(str): The resample method. It can only be 'BILINEAR' currently.
Default: 'BILINEAR'
Returns: Returns:
out (Variable): The output is a 4-D tensor of the shape out (Variable): The output is a 4-D tensor of the shape
...@@ -3961,8 +4053,12 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): ...@@ -3961,8 +4053,12 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
out = fluid.layers.resize_bilinear(input, out_shape=[12, 12]) out = fluid.layers.image_resize(input, out_shape=[12, 12])
""" """
resample_methods = {'BILINEAR': 'bilinear_interp'}
if resample not in resample_methods:
raise ValueError(
"The 'resample' of image_resize can only be 'BILINEAR' currently.")
if out_shape is None and scale is None: if out_shape is None and scale is None:
raise ValueError("One of out_shape and scale must not be None") raise ValueError("One of out_shape and scale must not be None")
helper = LayerHelper('bilinear_interp', **locals()) helper = LayerHelper('bilinear_interp', **locals())
...@@ -3990,7 +4086,7 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): ...@@ -3990,7 +4086,7 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
out = helper.create_tmp_variable(dtype) out = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="bilinear_interp", type=resample_methods[resample],
inputs=inputs, inputs=inputs,
outputs={"Out": out}, outputs={"Out": out},
attrs={"out_h": out_h, attrs={"out_h": out_h,
...@@ -3998,6 +4094,62 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): ...@@ -3998,6 +4094,62 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
return out return out
@templatedoc(op_type="bilinear_interp")
def resize_bilinear(input, out_shape=None, scale=None, name=None):
"""
${comment}
Args:
input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}.
scale(float|None): The multiplier for the input height or width. At
least one of out_shape or scale must be set. And out_shape has
a higher priority than scale. Default: None.
name(str|None): The output variable name.
Returns:
${out_comment}.
"""
return image_resize(input, out_shape, scale, name, 'BILINEAR')
def image_resize_short(input, out_short_len, resample='BILINEAR'):
"""
Resize a batch of images. The short edge of input images will be
resized to the given 'out_short_len'. The long edge of input images
will be resized proportionately to make images' length-width ratio
constant.
Args:
input (Variable): The input tensor of image resize layer,
This is a 4-D tensor of the shape
(num_batches, channels, in_h, in_w).
out_short_len(int): The length of output images' short edge.
resample (str): resample method, default: BILINEAR.
Returns:
out (Variable): The output is a 4-D tensor of the shape
(num_batches, channls, out_h, out_w).
"""
in_shape = input.shape
if len(in_shape) != 4:
raise ValueError(
"The rank of input must be 4 (num_batches, channels, in_h, in_w).")
hw = in_shape[2:4]
short_idx = hw.index(min(hw))
long_idx = 1 - short_idx
out_shape = list(hw)
out_shape[short_idx] = out_short_len
out_shape[long_idx] = int(
float(out_shape[long_idx]) * (float(out_short_len) / float(hw[
short_idx])) + 0.5)
return image_resize(input=input, out_shape=out_shape, resample=resample)
def gather(input, index): def gather(input, index):
""" """
Output is obtained by gathering entries of the outer-most dimension Output is obtained by gathering entries of the outer-most dimension
...@@ -4005,7 +4157,7 @@ def gather(input, index): ...@@ -4005,7 +4157,7 @@ def gather(input, index):
.. math:: .. math::
Out = X[Index] Out = X[Index]
.. code-block:: text .. code-block:: text
...@@ -4013,8 +4165,8 @@ def gather(input, index): ...@@ -4013,8 +4165,8 @@ def gather(input, index):
Given: Given:
X = [[1, 2], X = [[1, 2],
[3, 4], [3, 4],
[5, 6]] [5, 6]]
Index = [1, 2] Index = [1, 2]
...@@ -4032,6 +4184,7 @@ def gather(input, index): ...@@ -4032,6 +4184,7 @@ def gather(input, index):
output (Variable): The output is a tensor with the same rank as input. output (Variable): The output is a tensor with the same rank as input.
Examples: Examples:
.. code-block:: python .. code-block:: python
output = fluid.layers.gather(x, index) output = fluid.layers.gather(x, index)
...@@ -4047,10 +4200,31 @@ def gather(input, index): ...@@ -4047,10 +4200,31 @@ def gather(input, index):
return out return out
def random_crop(input, shape, seed=1): @templatedoc()
def random_crop(x, shape, seed=None):
"""
${comment}
Examples:
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
Args:
x(${x_type}): ${x_comment}
shape(${shape_type}): ${shape_comment}
seed(int|${seed_type}|None): ${seed_comment} By default, the seed will
get from `random.randint(-65536, 65535)`.
Returns:
${out_comment}
"""
helper = LayerHelper("random_crop", **locals()) helper = LayerHelper("random_crop", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
out = helper.create_tmp_variable(dtype) out = helper.create_tmp_variable(dtype)
if seed is None:
seed = random.randint(-65536, 65535)
if isinstance(seed, int): if isinstance(seed, int):
seed_value = seed seed_value = seed
seed = helper.create_tmp_variable(dtype="int64") seed = helper.create_tmp_variable(dtype="int64")
...@@ -4069,9 +4243,59 @@ def random_crop(input, shape, seed=1): ...@@ -4069,9 +4243,59 @@ def random_crop(input, shape, seed=1):
seed_out = helper.create_tmp_variable(dtype="int64") seed_out = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="random_crop", type="random_crop",
inputs={"X": input, inputs={"X": x,
"Seed": seed}, "Seed": seed},
outputs={"Out": out, outputs={"Out": out,
"SeedOut": seed_out}, "SeedOut": seed_out},
attrs={"shape": shape}) attrs={"shape": shape})
return out return out
def mean_iou(input, label, num_classes):
"""
Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes.
IOU is defined as follows:
.. math::
IOU = true_positive / (true_positive + false_positive + false_negative).
The predictions are accumulated in a confusion matrix and mean-IOU
is then calculated from it.
Args:
input (Variable): A Tensor of prediction results for semantic labels with type int32 or int64.
label (Variable): A Tensor of ground truth labels with type int32 or int64.
Its shape should be the same as input.
Returns:
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
Examples:
.. code-block:: python
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
"""
helper = LayerHelper('mean_iou', **locals())
dtype = helper.input_dtype()
out_mean_iou = helper.create_tmp_variable(dtype='float32')
out_wrong = helper.create_tmp_variable(dtype='int32')
out_correct = helper.create_tmp_variable(dtype='int32')
helper.append_op(
type="mean_iou",
inputs={"predictions": input,
"labels": label},
outputs={
"out_mean_iou": out_mean_iou,
"out_wrong": out_wrong,
"out_correct": out_correct
},
attrs={"num_classes": num_classes})
return out_mean_iou, out_wrong, out_correct
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册