diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index b93400c4274e54d2812bb3d8774a93fa14570dd6..a06ca7952f8556671fa0662329be4eb7dfefc984 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -443,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Swish Activation Operator. -$$out = \frac{x}{1 + e^{- \beta x}}$$ +$$out = \\frac{x}{1 + e^{- \beta x}}$$ )DOC"); } diff --git a/paddle/fluid/operators/clip_by_norm_op.cc b/paddle/fluid/operators/clip_by_norm_op.cc index c87bded034e382c981d119e8499d6780e288031f..eae86a373be278cbb3ea9425b2ff0169f8faa99e 100644 --- a/paddle/fluid/operators/clip_by_norm_op.cc +++ b/paddle/fluid/operators/clip_by_norm_op.cc @@ -54,10 +54,19 @@ be linearly scaled to make the L2 norm of $Out$ equal to $max\_norm$, as shown in the following formula: $$ -Out = \frac{max\_norm * X}{norm(X)}, +Out = \\frac{max\\_norm * X}{norm(X)}, $$ where $norm(X)$ represents the L2 norm of $X$. + +Examples: + .. code-block:: python + + data = fluid.layer.data( + name='data', shape=[2, 4, 6], dtype='float32') + reshaped = fluid.layers.clip_by_norm( + x=data, max_norm=0.5) + )DOC"); } }; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 6707cdded4020fe3e2b01ba399dfc279a9da677d..f8ad63690e84339da0390d4ddd2db45f25db385a 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() { // TODO(dzhwinter): need to registered layout transform function AddComment(R"DOC( -Pool2d Operator. - The pooling2d operation calculates the output based on the input, pooling_type and ksize, strides, paddings parameters. Input(X) and output(Out) are in NCHW format, where N is batch size, C is the @@ -215,19 +213,28 @@ These two elements represent height and width, respectively. The input(X) size and output(Out) size may be different. Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + For ceil_mode = false: $$ - H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 + H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 $$ For ceil_mode = true: $$ - H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 + H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 $$ )DOC"); diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 78fee77df8151221459b0afa0d6789bfe82cfda5..75d6181749e4e9bd81a3c02de69caf0acd81eef9 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -35,10 +35,10 @@ class UniformRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { protected: void Apply() override { AddComment(R"DOC( -Uniform random operator +UniformRandomBatchSizeLike operator. This operator initializes a tensor with the same batch_size as the Input tensor - with random values sampled from a uniform distribution. +with random values sampled from a uniform distribution. )DOC"); AddAttr("min", diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index f6438c82ac207d0e38d8be5e9d6252b28e72826e..df0625649d2cf897e103131739aaa4d48f8a097c 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1034,6 +1034,37 @@ class Block(object): class Program(object): + """ + Python Program. Beneath it is a ProgramDesc, which is used for + create c++ Program. A program is a self-contained programing + language like container. It has at least one Block, when the + control flow op like conditional_block, while_op is included, + it will contains nested block. + Please reference the framework.proto for details. + + Notes: we have default_startup_program and default_main_program + by default, a pair of them will shared the parameters. + The default_startup_program only run once to initialize parameters, + default_main_program run in every minibatch and adjust the weights. + + Args: + None + + Returns: + Python Program + + Examples: + .. code-block:: python + + main_program = Program() + startup_program = Program() + with fluid.program_guard(main_program=main_program, startup_program=startup_program): + fluid.layers.data(name="x", shape=[-1, 784], dtype='float32') + fluid.layers.data(name="y", shape=[-1, 1], dtype='int32') + fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu") + + """ + def __init__(self): self.desc = core.ProgramDesc() self.blocks = [Block(self, 0)] @@ -1099,6 +1130,8 @@ class Program(object): def clone(self, for_test=False): """Clone the Program object + Args: + for_test(bool): indicate whether clone for test. Set for_test to False when we want to clone the program for training. Set for_test to True when we want to clone the program for testing. @@ -1109,8 +1142,9 @@ class Program(object): the is_test attributes in these operators will be set to True for testing purposes, otherwise, they remain unchanged. - Returns(Program): - The cloned Program object. + Returns: + Program: The cloned Program object. + """ if for_test: p = self.inference_optimize() @@ -1228,6 +1262,7 @@ class Program(object): def copy_param_info_from(self, other): """ Copy the information of parameters from other program. + Args: other(Program): Other program @@ -1246,6 +1281,7 @@ class Program(object): def copy_data_info_from(self, other): """ Copy the information of data variables from other program. + Args: other(Program): Other program @@ -1299,6 +1335,7 @@ class Parameter(Variable): def to_string(self, throw_on_error, with_details=False): """ To debug string. + Args: throw_on_error(bool): raise exception when self is not initialized when throw_on_error is True diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 204486a0453fa7952e697f540487ef0e3f91f3aa..3d6cf95b3de45b406ff93270caa0669a8cb42441 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -822,17 +822,25 @@ def max_sequence_len(rank_table): def lod_tensor_to_array(x, table): - """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY. + """ + Convert a LoDTensor to a LoDTensorArray. + + This function split a LoDTesnor to a LoDTensorArray according to its LoD + information. LoDTensorArray is an alias of C++ std::vector in + PaddlePaddle. The generated LoDTensorArray of this function can be further read + or written by `read_from_array()` and `write_to_array()` operators. However, + this function is generally an internal component of PaddlePaddle `DynamicRNN`. + Users should not use it directly. Args: - x (Variable|list): The LOD tensor to be converted to a LOD tensor array. + x (Variable|list): The LoDTensor to be converted to a LoDTensorArray. table (ParamAttr|list): The variable that stores the level of lod which is ordered by sequence length in - descending order. + descending order. It is generally generated + by `layers.lod_rank_table()` API. Returns: - Variable: The variable of type array that has been converted from a - tensor. + Variable: The LoDTensorArray that has been converted from the input tensor. Examples: .. code-block:: python @@ -897,8 +905,7 @@ def increment(x, value=1.0, in_place=True): in_place (bool): If the increment should be performed in-place. Returns: - Variable: The tensor variable storing the transformation of - element-wise increment of each value in the input. + Variable: The elementwise-incremented object. Examples: .. code-block:: python @@ -940,7 +947,7 @@ def array_write(x, i, array=None): Variable: The output LOD_TENSOR_ARRAY where the input tensor is written. Examples: - .. code-block::python + .. code-block:: python tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) @@ -1054,14 +1061,31 @@ def equal(x, y, cond=None, **ignored): def array_read(array, i): - """This function performs the operation to read the data in as an + """ + This function performs the operation to read the data in as an LOD_TENSOR_ARRAY. + + .. code-block:: text + + Given: + + array = [0.6, 0.1, 0.3, 0.1] + + And: + + i = 2 + + Then: + + output = 0.3 + Args: - array (Variable|list): The input tensor that will be written to an array. - i (Variable|list): The subscript index in tensor array, that points the - place where data will be written to. + array (Variable|list): The input tensor that store data to be read. + i (Variable|list): The index of the data to be read from input array. + Returns: Variable: The tensor type variable that has the data written to it. + Examples: .. code-block:: python @@ -1154,6 +1178,13 @@ def array_length(array): class ConditionalBlockGuard(BlockGuard): + """ + ConditionalBlockGuard is derived from BlockGuard. It is dedicated for + holding a ConditionalBlock, and helping users entering and exiting the + ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard + is generally an internal component of IfElse, users should not use it directly. + """ + def __init__(self, block): if not isinstance(block, ConditionalBlock): raise TypeError("block should be conditional block") @@ -1875,26 +1906,26 @@ def reorder_lod_tensor_by_rank(x, rank_table): def is_empty(x, cond=None, **ignored): """ - **Is Empty** - - This layer returns the truth value of whether the variable is empty. + Test whether a Variable is empty. Args: - x(Variable): Operand of *is_empty* - cond(Variable|None): Optional output variable to store the result - of *is_empty* + x (Variable): The Variable to be tested. + cond (Variable|None): Output parameter. Returns the test result + of given 'x'. Default: None Returns: - Variable: The tensor variable storing the output of *is_empty*. + Variable: A bool scalar. True if 'x' is an empty Variable. Raises: TypeError: If input cond is not a variable, or cond's dtype is - not bool + not bool. Examples: .. code-block:: python - less = fluid.layers.is_empty(x=input) + res = fluid.layers.is_empty(x=input) + # or: + fluid.layers.is_empty(x=input, cond=res) """ helper = LayerHelper("is_empty", **locals()) if cond is None: diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 5dc18c633e809faeb949260cf2ba8dcda49e2fca..62faa4f6ec92dc35d33c7c6a3de4715028789236 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -544,6 +544,41 @@ def shuffle(reader, buffer_size): def batch(reader, batch_size): + """ + This layer is a reader decorator. It takes a reader and adds + 'batching' decoration on it. When reading with the result + decorated reader, output data will be automatically organized + to the form of batches. + + Args: + reader(Variable): The reader to be decorated with 'batching'. + batch_size(int): The batch size. + + Returns: + Variable: The reader which has been decorated with 'batching'. + + Examples: + .. code-block:: python + + raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio', + './data2.recordio'], + shapes=[(3,224,224), (1)], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + thread_num=2, + buffer_size=2) + batch_reader = fluid.layers.batch(reader=raw_reader, batch_size=5) + + # If we read data with the raw_reader: + # data = fluid.layers.read_file(raw_reader) + # We can only get data instance by instance. + # + # However, if we read data with the batch_reader: + # data = fluid.layers.read_file(batch_reader) + # Each 5 adjacent instances will be automatically combined together + # to become a batch. So what we get('data') is a batch data instead + # of an instance. + """ return __create_unshared_decorated_reader__( 'create_batch_reader', reader, {'batch_size': int(batch_size)}) @@ -589,15 +624,41 @@ def parallel(reader): {}) -def read_file(file_obj): +def read_file(reader): + """ + Execute the given reader and get data via it. + + A reader is also a Variable. It can be a raw reader generated by + `fluid.layers.open_files()` or a decorated one generated by + `fluid.layers.double_buffer()` and so on. + + Args: + + reader(Variable): The reader to execute. + + Returns: + Tuple[Variable]: Data read via the given reader. + + Examples: + .. code-block:: python + + data_file = fluid.layers.open_files( + filenames=['mnist.recordio'], + shapes=[(-1, 748), (-1, 1)], + lod_levels=[0, 0], + dtypes=["float32", "int64"]) + data_file = fluid.layers.double_buffer( + fluid.layers.batch(data_file, batch_size=64)) + input, label = fluid.layers.read_file(data_file) + """ helper = LayerHelper('read_file') out = [ helper.create_tmp_variable( stop_gradient=True, dtype='float32') - for _ in range(len(file_obj.desc.shapes())) + for _ in range(len(reader.desc.shapes())) ] helper.append_op( - type='read', inputs={'Reader': [file_obj]}, outputs={'Out': out}) + type='read', inputs={'Reader': [reader]}, outputs={'Out': out}) if len(out) == 1: return out[0] else: diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 3d81d66ea4b610aae5b1a93f411e97a39bcfec21..6071e3e74218e4db4cddc223818d3a9b7086fd86 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -71,21 +71,40 @@ def noam_decay(d_model, warmup_steps): def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): - """Applies exponential decay to the learning rate. + """ + Applies exponential decay to the learning rate. + + When training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, the learning rate will be decayed by + 'decay_rate' every 'decay_steps' steps. + + >>> if staircase == True: + >>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps) + >>> else: + >>> decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) - ```python - decayed_learning_rate = learning_rate * - decay_rate ^ (global_step / decay_steps) - ``` Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training - decay_steps: A Python `int32` number. - decay_rate: A Python `float` number. - staircase: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float): The initial learning rate. + decay_steps(int): See the decay computation above. + decay_rate(float): The decay rate. See the decay computation above. + staircase(Boolean): If True, decay the learning rate at discrete intervals. + Default: False Returns: - The decayed learning rate + Variable: The decayed learning rate + + Examples: + .. code-block:: python + + base_lr = 0.1 + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.exponential_decay( + learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) + """ global_step = _decay_step_counter() @@ -129,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): - """Applies inverse time decay to the initial learning rate. + """ + Applies inverse time decay to the initial learning rate. - >>> if staircase: + When training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, an inverse decay function will be + applied to the initial learning rate. + + >>> if staircase == True: >>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) >>> else: >>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training. - decay_steps: A Python `int32` number. - decay_rate: A Python `float` number. - staircase: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float): The initial learning rate. + decay_steps(int): See the decay computation above. + decay_rate(float): The decay rate. See the decay computation above. + staircase(Boolean): If True, decay the learning rate at discrete intervals. + Default: False Returns: - The decayed learning rate + Variable: The decayed learning rate + + Examples: + .. code-block:: python + + base_lr = 0.1 + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.inverse_time_decay( + learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) """ global_step = _decay_step_counter() diff --git a/python/paddle/fluid/layers/metric.py b/python/paddle/fluid/layers/metric.py index 0a978eaa3740209f407973480057e2272abe5773..58de1b6b9fe17a24203e93de6780190b9fc6b3e7 100644 --- a/python/paddle/fluid/layers/metric.py +++ b/python/paddle/fluid/layers/metric.py @@ -27,8 +27,32 @@ __all__ = ['accuracy', 'auc'] def accuracy(input, label, k=1, correct=None, total=None): """ + accuracy layer. + Refer to the https://en.wikipedia.org/wiki/Precision_and_recall + This function computes the accuracy using the input and label. - The output is the top k inputs and their indices. + If the correct label occurs in top k predictions, then correct will increment by one. + Note: the dtype of accuracy is determined by input. the input and label dtype can be different. + + Args: + input(Variable): The input of accuracy layer, which is the predictions of network. + Carry LoD information is supported. + label(Variable): The label of dataset. + k(int): The top k predictions for each class will be checked. + correct(Variable): The correct predictions count. + total(Variable): The total entries count. + + Returns: + Variable: The correct rate. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name="data", shape=[-1, 32, 32], dtype="float32") + label = fluid.layers.data(name="data", shape=[-1,1], dtype="int32") + predict = fluid.layers.fc(input=data, size=10) + acc = fluid.layers.accuracy(input=predict, label=label, k=5) + """ helper = LayerHelper("accuracy", **locals()) topk_out, topk_indices = nn.topk(input, k=k) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 40e72aa4884015e5724fd057ea85c58e23a4bd4b..6032573393a69efc5d93dd4baa427572b9c38dd8 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -91,6 +91,8 @@ __all__ = [ 'gather', 'random_crop', 'mean_iou', + 'relu', + 'log', ] @@ -106,14 +108,15 @@ def fc(input, """ **Fully Connected Layer** - The fully connected layer can take multiple tensors as its inputs. It - creates a variable called weights for each input tensor, which represents - a fully connected weight matrix from each input unit to each output unit. - The fully connected layer multiplies each input tensor with its coresponding - weight to produce an output Tensor. If multiple input tensors are given, - the results of multiple multiplications will be sumed up. If bias_attr is - not None, a bias variable will be created and added to the output. Finally, - if activation is not None, it will be applied to the output as well. + This function creates a fully connected layer in the network. It can take + multiple tensors as its inputs. It creates a variable called weights for + each input tensor, which represents a fully connected weight matrix from + each input unit to each output unit. The fully connected layer multiplies + each input tensor with its coresponding weight to produce an output Tensor. + If multiple input tensors are given, the results of multiple multiplications + will be sumed up. If bias_attr is not None, a bias variable will be created + and added to the output. Finally, if activation is not None, it will be applied + to the output as well. This process can be formulated as follows: @@ -154,7 +157,7 @@ def fc(input, name (str, default None): The name of this layer. Returns: - A tensor variable storing the transformation result. + Variable: The transformation result. Raises: ValueError: If rank of the input tensor is less than 2. @@ -162,8 +165,7 @@ def fc(input, Examples: .. code-block:: python - data = fluid.layers.data( - name="data", shape=[32, 32], dtype="float32") + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") fc = fluid.layers.fc(input=data, size=1000, act="tanh") """ @@ -845,11 +847,14 @@ def linear_chain_crf(input, label, param_attr=None): Args: input(${emission_type}): ${emission_comment} + input(${transition_type}): ${transition_comment} label(${label_type}): ${label_comment} param_attr(ParamAttr): The attribute of the learnable parameter. Returns: - ${log_likelihood_comment} + output(${emission_exps_type}): ${emission_exps_comment} \n + output(${transition_exps_type}): ${transition_exps_comment} \n + output(${log_likelihood_type}): ${log_likelihood_comment} """ helper = LayerHelper('linear_chain_crf', **locals()) @@ -911,7 +916,7 @@ def cos_sim(X, Y): Args: X (Variable): The input X. Y (Variable): The input Y. - + Returns: Variable: the output of cosine(X, Y). """ @@ -1117,7 +1122,7 @@ def chunk_eval(input, chunk_scheme (str): ${chunk_scheme_comment} num_chunk_types (int): ${num_chunk_types_comment} excluded_chunk_types (list): ${excluded_chunk_types_comment} - + Returns: tuple: tuple containing: (precision, recall, f1_score, num_infer_chunks, num_label_chunks, @@ -1177,15 +1182,11 @@ def sequence_conv(input, bias_attr (ParamAttr|None): attributes for bias param_attr (ParamAttr|None): attributes for parameter act (str): the activation type - + Returns: Variable: output of sequence_conv """ - # FIXME(dzh) : want to unify the argument of python layer - # function. So we ignore some unecessary attributes. - # such as, padding_trainable, context_start. - helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() filter_shape = [filter_size * input.shape[1], num_filters] @@ -1740,6 +1741,7 @@ def sequence_last_step(input): return sequence_pool(input=input, pool_type="last") +@templatedoc() def pool2d(input, pool_size=-1, pool_type="max", @@ -1751,24 +1753,45 @@ def pool2d(input, use_mkldnn=False, name=None): """ - This function adds the operator for pooling in 2 dimensions, using the - pooling configurations mentioned in input parameters. + ${comment} Args: - input (Variable): ${input_comment} - pool_size (int): ${ksize_comment} - pool_type (str): ${pooling_type_comment} + input (Variable): The input tensor of pooling operator. The format of + input tensor is NCHW, where N is batch size, C is + the number of channels, H is the height of the + feature, and W is the width of the feature. + pool_size (int): The side length of pooling windows. All pooling + windows are squares with pool_size on a side. + pool_type: ${pooling_type_comment} pool_stride (int): stride of the pooling layer. pool_padding (int): padding size. - global_pooling (bool): ${global_pooling_comment} - use_cudnn (bool): ${use_cudnn_comment} - ceil_mode (bool): ${ceil_mode_comment} - use_mkldnn (bool): ${use_mkldnn_comment} - name (str): A name for this layer(optional). If set None, the layer - will be named automatically. - + global_pooling: ${global_pooling_comment} + use_cudnn: ${use_cudnn_comment} + ceil_mode: ${ceil_mode_comment} + use_mkldnn: ${use_mkldnn_comment} + name (str|None): A name for this layer(optional). If set None, the + layer will be named automatically. + Returns: - Variable: output of pool2d layer. + Variable: The pooling result. + + Raises: + ValueError: If 'pool_type' is not "max" nor "avg" + ValueError: If 'global_pooling' is False and 'pool_size' is -1 + ValueError: If 'use_cudnn' is not a bool value. + + Examples: + + .. code-block:: python + + data = fluid.layers.data( + name='data', shape=[3, 32, 32], dtype='float32') + conv2d = fluid.layers.pool2d( + input=data, + pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) """ if pool_type not in ["max", "avg"]: raise ValueError( @@ -2127,15 +2150,37 @@ def layer_norm(input, def beam_search_decode(ids, scores, name=None): """ - ${beam_search_decode} + Beam Search Decode + + This layers is to pack the output of beam search layer into sentences and + associated scores. It is usually called after the beam search layer. + Typically, the output of beam search layer is a tensor of selected ids, with + a tensor of the score of each id. Beam search layer's output ids, however, + are generated directly during the tree search, and they are stacked by each + level of the search tree. Thus we need to reorganize them into sentences, + based on the score of each id. This layer takes the output of beam search + layer as input and repack them into sentences. Args: - ids (Variable): ${ids_comment} - scores (Variable): ${scores_comment} + ids (Variable): The selected ids, output of beam search layer. + scores (Variable): The associated scores of the ids, out put of beam + search layer. name (str): The name of this layer. It is optional. - + Returns: - tuple: a tuple of two output variable: sentence_ids, sentence_scores + tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores. + sentence_ids is a tensor with shape [size, length], where size is the + beam size of beam search, and length is the length of each sentence. + Note that the length of sentences may vary. + sentence_scores is a tensor with the same shape as sentence_ids. + + Examples: + .. code-block:: python + + ids, scores = fluid.layers.beam_search( + pre_ids, ids, scores, beam_size, end_id) + sentence_ids, sentence_scores = fluid.layers.beam_search_decode( + ids, scores) """ helper = LayerHelper('beam_search_decode', **locals()) sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) @@ -2567,7 +2612,7 @@ def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): beam_size (int): ${beam_size_comment} end_id (int): ${end_id_comment} level (int): ${level_comment} - + Returns: tuple: a tuple of beam_search output variables: selected_ids, selected_scores ''' @@ -3016,7 +3061,7 @@ def split(input, num_or_sections, dim=-1, name=None): will be named automatically. Returns: - List: The list of segmented tensor variables. + list(Variable): The list of segmented tensor variables. Examples: .. code-block:: python @@ -3225,25 +3270,51 @@ def topk(input, k, name=None): This operator is used to find values and indices of the k largest entries for the last dimension. - If the input is a vector (rank=1), finds the k largest entries in the vector + If the input is a vector (1-D Tensor), finds the k largest entries in the vector and outputs their values and indices as vectors. Thus values[j] is the j-th largest entry in input, and its index is indices[j]. If the input is a Tensor with higher rank, this operator computes the top k entries along the last dimension. + For example: + + .. code-block:: text + + If: + input = [[5, 4, 2, 3], + [9, 7, 10, 25], + [6, 2, 10, 1]] + k = 2 + + Then: + The first output: + values = [[5, 4], + [10, 25], + [6, 10]] + + The second output: + indices = [[0, 1], + [2, 3], + [0, 2]] + Args: input(Variable): The input variable which can be a vector or Tensor with higher rank. - k(int): An integer value to specify the top k largest elements. + k(int): The number of top elements to look for along the last dimension + of input. name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + will be named automatically. + Default: None Returns: - values(Variable): The k largest elements along each last dimensional - slice. - indices(Variable): The indices of values within the last dimension of - input. + Tuple[Variable]: A tuple with two elements. Each element is a Variable. + The first one is k largest elements along each last + dimensional slice. The second one is indices of values + within the last dimension of input. + + Raises: + ValueError: If k < 1 or k is not less than the last dimension of input Examples: .. code-block:: python @@ -3251,7 +3322,7 @@ def topk(input, k, name=None): top5_values, top5_indices = layers.topk(input, k=5) """ shape = input.shape - if k < 1 and k >= shape[-1]: + if k < 1 or k >= shape[-1]: raise ValueError("k must be greater than 0 and less than %d." % (shape[-1])) @@ -3269,8 +3340,7 @@ def topk(input, k, name=None): return values, indices -def edit_distance(input, label, normalized=True, ignored_tokens=None, - name=None): +def edit_distance(input, label, normalized=True, ignored_tokens=None): """ EditDistance operator computes the edit distances between a batch of hypothesis strings and their references. Edit distance, also called @@ -3284,21 +3354,21 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, "kitten" -> "sitten" -> "sittin" -> "sitting" - Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with + The input is a LoDTensor consisting of all the hypothesis strings with the total number denoted by `batch_size`, and the separation is specified by the LoD information. And the `batch_size` reference strings are arranged - in order in the same way in the LoDTensor Input(Refs). + in order in the same way in the input LoDTensor. - Output(Out) contains the `batch_size` results and each stands for the edit + The output contains the `batch_size` results and each stands for the edit distance for a pair of strings respectively. If Attr(normalized) is true, the edit distance will be divided by the length of reference string. Args: input(Variable): The indices for hypothesis strings. label(Variable): The indices for reference strings. - normalized(bool): Indicated whether to normalize the edit distance by + normalized(bool, default True): Indicated whether to normalize the edit distance by the length of reference string. - ignored_tokens(list of int): Tokens that should be removed before + ignored_tokens(list, default None): Tokens that should be removed before calculating edit distance. name (str): The name of this layer. It is optional. @@ -3310,7 +3380,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, x = fluid.layers.data(name='x', shape=[8], dtype='float32') y = fluid.layers.data(name='y', shape=[7], dtype='float32') - cost = fluid.layers.edit_distance(input=x,label=y) """ helper = LayerHelper("edit_distance", **locals()) @@ -3430,35 +3499,33 @@ def warpctc(input, label, blank=0, norm_by_times=False): input tensor. Args: - input(Variable): (LodTensor, default: LoDTensor), - the unscaled probabilities of variable-length sequences, - which is a 2-D Tensor with LoD information. - It's shape is [Lp, num_classes + 1], where Lp is the sum of all input - sequences' length and num_classes is the true number of classes. - (not including the blank label). - label(Variable): (LodTensor, default: LoDTensor), the ground truth - of variable-length sequence, which is a 2-D Tensor with LoD - information. It is of the shape [Lg, 1], where Lg is th sum of - all labels' length. - blank (int): default 0, the blank label index of Connectionist - Temporal Classification (CTC) loss, which is in the - half-opened interval [0, num_classes + 1). - norm_by_times (bool): default false, whether to normalize - the gradients by the number of time-step, which is also the - sequence's length. There is no need to normalize the gradients - if warpctc layer was follewed by a mean_op. + input (Variable): The unscaled probabilities of variable-length sequences, + which is a 2-D Tensor with LoD information. + It's shape is [Lp, num_classes + 1], where Lp is the sum of all input + sequences' length and num_classes is the true number of classes. + (not including the blank label). + label (Variable): The ground truth of variable-length sequence, + which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1], + where Lg is th sum of all labels' length. + blank (int, default 0): The blank label index of Connectionist + Temporal Classification (CTC) loss, which is in the + half-opened interval [0, num_classes + 1). + norm_by_times(bool, default false): Whether to normalize the gradients + by the number of time-step, which is also the sequence's length. + There is no need to normalize the gradients if warpctc layer was + follewed by a mean_op. Returns: Variable: The Connectionist Temporal Classification (CTC) loss, which is a 2-D Tensor of the shape [batch_size, 1]. Examples: + .. code-block:: python - y = layers.data( - name='y', shape=[11, 8], dtype='float32', lod_level=1) - y_predict = layers.data( - name='y_predict', shape=[11, 1], dtype='float32') - cost = layers.warpctc(input=y_predict, label=y) + + label = fluid.layers.data(shape=[11, 8], dtype='float32', lod_level=1) + predict = fluid.layers.data(shape=[11, 1], dtype='float32') + cost = fluid.layers.warpctc(input=predict, label=label) """ helper = LayerHelper('warpctc', **locals()) @@ -3487,17 +3554,21 @@ def sequence_reshape(input, new_dim): .. code-block:: text x is a LoDTensor: - x.lod = [[2, 4]] - x.data = [[1, 2], [3, 4], - [5, 6], [7, 8], [9, 10], [11, 12]] + x.lod = [[0, 2, 6]] + x.data = [[1, 2], [3, 4], + [5, 6], [7, 8], + [9, 10], [11, 12]] x.dims = [6, 2] set new_dim = 4 then out is a LoDTensor: - out.lod = [[1, 2]] - out.data = [[1, 2, 3, 4], - [5, 6, 7, 8], [9, 10, 11, 12]] + + out.lod = [[0, 1, 3]] + + out.data = [[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12]] out.dims = [3, 4] Currently, only 1-level LoDTensor is supported and please make sure @@ -3505,19 +3576,19 @@ def sequence_reshape(input, new_dim): no remainder for each sequence. Args: - input (Variable): (LodTensor, default: LoDTensor), a 2-D LoDTensor - with shape being [N, M] where M for dimension. - new_dim (int): New dimension which the input LoDTensor is reshaped to. + + input (Variable): A 2-D LoDTensor with shape being [N, M] where M for dimension. + new_dim (int): New dimension that the input LoDTensor is reshaped to. Returns: + Variable: Reshaped LoDTensor according to new dimension. Examples: .. code-block:: python - x = fluid.layers.data(name='x', shape=[5, 20], - dtype='float32', lod_level=1) - x_reshaped = layers.sequence_reshape(input=x, new_dim=10) + x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1) + x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10) """ helper = LayerHelper('sequence_reshape', **locals()) out = helper.create_tmp_variable(helper.input_dtype()) @@ -3553,7 +3624,7 @@ def nce(input, param_attr (ParamAttr|None): attributes for parameter bias_attr (ParamAttr|None): attributes for bias num_neg_samples (int): ${num_neg_samples_comment} - + Returns: Variable: The output nce loss. @@ -3723,8 +3794,6 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): Examples: - As an example: - .. code-block:: text Given: @@ -3768,7 +3837,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): output.lod = [[4, 4]] - The simple usage is: + Examples: .. code-block:: python @@ -4253,9 +4322,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): .. math:: - Output(i, x, y) = Input(i, x, y) / \left( - k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)} - (Input(j, x, y))^2 \right)^{\beta} + Output(i, x, y) = Input(i, x, y) / \\left(k + \\alpha \\sum\\limits^{\\min(C, c + n/2)}_{j = \\max(0, c - n/2)}(Input(j, x, y))^2\\right)^{\\beta} In the above equation: @@ -4769,6 +4836,62 @@ def random_crop(x, shape, seed=None): return out +def log(x): + """ + Calculates the natural log of the given input tensor, element-wise. + + .. math:: + + Out = \\ln(x) + + Args: + x (Variable): Input tensor. + + Returns: + Variable: The natural log of the input tensor computed element-wise. + + Examples: + + .. code-block:: python + + output = fluid.layers.log(x) + """ + helper = LayerHelper('log', **locals()) + dtype = helper.input_dtype() + out = helper.create_tmp_variable(dtype) + helper.append_op(type="log", inputs={"X": input}, outputs={"Out": out}) + return out + + +def relu(x): + """ + Relu takes one input data (Tensor) and produces one output data (Tensor) + where the rectified linear function, y = max(0, x), is applied to + the tensor elementwise. + + .. math:: + + Out = \\max(0, x) + + Args: + x (Variable): The input tensor. + + Returns: + Variable: The output tensor with the same shape as input. + + Examples: + + .. code-block:: python + + output = fluid.layers.relu(x) + """ + helper = LayerHelper('relu', **locals()) + dtype = helper.input_dtype() + out = helper.create_tmp_variable(dtype) + helper.append_op(type="relu", inputs={"X": input}, outputs={"Out": out}) + return out + + def mean_iou(input, label, num_classes): """ Mean Intersection-Over-Union is a common evaluation metric for @@ -4795,11 +4918,10 @@ def mean_iou(input, label, num_classes): out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class. out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. - Examples: .. code-block:: python - + iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes) """ helper = LayerHelper('mean_iou', **locals()) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 6f404c5cc608abda91c1d042d405f109dedc55c9..f4ca8ed3b3666d7fa82124d26a08c26d8eecf028 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -17,7 +17,6 @@ __activations__ = [ 'sigmoid', 'logsigmoid', 'exp', - 'relu', 'tanh', 'tanh_shrink', 'softshrink', @@ -29,7 +28,6 @@ __activations__ = [ 'sin', 'round', 'reciprocal', - 'log', 'square', 'softplus', 'softsign', diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 18e0fedcc45d8bd405796176305e449bcb46c59f..149e77b52415025e78fbf4ee641151880b415fb0 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -108,16 +108,29 @@ def create_global_var(shape, force_cpu=False, name=None): """ - Create a global variable. such as global_step + Create a new variable in the global block(block 0). + Args: shape(list[int]): shape of the variable - value(float): the value of the variable - dtype(string): element type of the parameter - persistable(bool): if this variable is persistable - force_cpu(bool): force this variable to be on CPU + value(float): the value of the variable. The new created + variable will be filled with it. + dtype(string): data type of the variable + persistable(bool): if this variable is persistable. + Default: False + force_cpu(bool): force this variable to be on CPU. + Default: False + name(str|None): The name of the variable. If set to None the variable + name will be generated automatically. + Default: None Returns: Variable: the created Variable + + Examples: + .. code-block:: python + + var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32', + persistable=True, force_cpu=True, name='new_var') """ helper = LayerHelper("global_var", **locals()) var = helper.create_global_variable( @@ -175,7 +188,8 @@ def concat(input, axis=0, name=None): Examples: .. code-block:: python - out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth]) + + out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth]) """ helper = LayerHelper('concat', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -188,19 +202,21 @@ def concat(input, axis=0, name=None): def sums(input, out=None): - """This function performs the sum operation on the input and returns the + """ + This function performs the sum operation on the input and returns the result as the output. Args: input (Variable|list): The input tensor that has the elements that need to be summed up. + out (Variable|None): Output parameter. The sum result. + Default: None Returns: - Variable: The tensor type variable that has the sum of input - written to it. + Variable: the sum of input. The same as the argument 'out' Examples: - .. code-block::python + .. code-block:: python tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) @@ -371,13 +387,13 @@ def argmin(x, axis=0): x(Variable): The input to compute the indices of the min elements. axis(int): Axis to compute indices along. - + Returns: Variable: The tensor variable storing the output - + Examples: .. code-block:: python - + out = fluid.layers.argmin(x=in, axis=0) out = fluid.layers.argmin(x=in, axis=-1) """ @@ -402,13 +418,13 @@ def argmax(x, axis=0): x(Variable): The input to compute the indices of the max elements. axis(int): Axis to compute indices along. - + Returns: Variable: The tensor variable storing the output - + Examples: .. code-block:: python - + out = fluid.layers.argmax(x=in, axis=0) out = fluid.layers.argmax(x=in, axis=-1) """ @@ -456,11 +472,12 @@ def zeros(shape, dtype, force_cpu=False): It also sets *stop_gradient* to True. Args: - shape(tuple|list|None): Shape of output tensor - dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor + shape(tuple|list|None): Shape of output tensor. + dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor. + force_cpu(bool, default False): Whether to make output stay on CPU. Returns: - Variable: The tensor variable storing the output + Variable: The tensor variable storing the output. Examples: .. code-block:: python