diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 62c154e65dcff1bdfb00109bf1b724c34731652e..290d4098798f837d966a7a9dd0dea2565d50b870 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -493,3 +493,8 @@ swish ------ .. autofunction:: paddle.v2.fluid.layers.swish :noindex: + +l2_normalize +------------ +.. autofunction:: paddle.v2.fluid.layers.l2_normalize + :noindex: diff --git a/paddle/operators/clip_op.cc b/paddle/operators/clip_op.cc index 573bb9c7dfdac2366c2458dd9f27a035a9f9b813..7adb74eab78dcdd0251b8db60781f6e24e348634 100644 --- a/paddle/operators/clip_op.cc +++ b/paddle/operators/clip_op.cc @@ -51,8 +51,8 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Clip Operator. -The clip operator limits the value of given input within an interval. The interval is -specified with arguments 'min' and 'max': +The clip operator limits the value of given input within an interval. The +interval is specified with arguments 'min' and 'max': $$ Out = \min(\max(X, min), max) diff --git a/paddle/operators/elementwise_op.h b/paddle/operators/elementwise_op.h index a342595b546bfca1a344cf8a549597df6a29adec..1a0131d8b943da3deebd0c461f78cb02b34e6dc2 100644 --- a/paddle/operators/elementwise_op.h +++ b/paddle/operators/elementwise_op.h @@ -26,9 +26,9 @@ class ElementwiseOp : public framework::OperatorWithKernel { using Tensor = framework::Tensor; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of elementwise op should not be null"); + "Input(X) of elementwise op should not be null."); PADDLE_ENFORCE(ctx->HasInput("Y"), - "Input(Y) of elementwise op should not be null"); + "Input(Y) of elementwise op should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of elementwise op should not be null."); @@ -45,12 +45,12 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { public: ElementwiseOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "(Tensor) The first input tensor of elementwise op"); - AddInput("Y", "(Tensor) The second input tensor of elementwise op"); - AddOutput("Out", "The output of elementwise op"); + AddInput("X", "(Tensor), The first input tensor of elementwise op."); + AddInput("Y", "(Tensor), The second input tensor of elementwise op."); + AddOutput("Out", "The output of elementwise op."); AddAttr("axis", - "(int, default -1) The starting dimension index " - "for broadcasting Y onto X") + "(int, default -1). The start dimension index " + "for broadcasting Y onto X.") .SetDefault(-1) .EqualGreaterThan(-1); comment_ = R"DOC( @@ -58,19 +58,18 @@ Limited Elementwise {name} Operator. The equation is: -.. math:: - {equation} +$${equation}$$ -X is a tensor of any dimension and the dimensions of tensor Y must be smaller than -or equal to the dimensions of X. +$X$ is a tensor of any dimension and the dimensions of tensor $Y$ must be +smaller than or equal to the dimensions of $X$. There are two cases for this operator: -1. The shape of Y is same with X; -2. The shape of Y is a subset of X. +1. The shape of $Y$ is same with $X$; +2. The shape of $Y$ is a subset of $X$. For case 2: -Y will be broadcasted to match the shape of X and axis should be -the starting dimension index for broadcasting Y onto X. +$Y$ will be broadcasted to match the shape of $X$ and axis should be +set to index of the start dimension to broadcast $Y$ onto $X$. For example .. code-block:: python @@ -81,7 +80,8 @@ For example shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1 shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0 -Either of the inputs X and Y or none can carry the LoD (Level of Details) information. However, the output only shares the LoD information with input X. +Either of the inputs $X$ and $Y$ or none can carry the LoD (Level of Details) +information. However, the output only shares the LoD information with input $X$. )DOC"; AddComment(comment_); diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc index 08fa91ed72aa41ed2f513c090b9085410bb5cc47..043c93654d33f7c105c89960e18ec72d3557237d 100644 --- a/paddle/operators/expand_op.cc +++ b/paddle/operators/expand_op.cc @@ -58,21 +58,21 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { ExpandOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "(Tensor, default Tensor) A tensor with rank in [1, 6]." - "X is the input tensor to be expanded."); + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "X is the input to be expanded."); AddOutput("Out", - "(Tensor, default Tensor) A tensor with rank in [1, 6]." - "The rank of Output(Out) is same as Input(X) except that each " - "dimension size of Output(Out) is equal to corresponding " - "dimension size of Input(X) multiplying corresponding value of " - "Attr(expand_times)."); + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "The rank of Output(Out) have the same with Input(X). " + "After expanding, size of each dimension of Output(Out) is equal " + "to size of the corresponding dimension of Input(X) multiplying " + "the corresponding value given by Attr(expand_times)."); AddAttr>("expand_times", "Expand times number for each dimension."); AddComment(R"DOC( Expand operator tiles the input by given times number. You should set times number for each dimension by providing attribute 'expand_times'. The rank of X -should be in [1, 6]. Please notice that size of 'expand_times' must be same with -X's rank. Following is a using case: +should be in [1, 6]. Please note that size of 'expand_times' must be the same +with X's rank. Following is a using case: Input(X) is a 3-D tensor with shape [2, 3, 1]: diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 95797fba8f67bacb421f5c2813ad6332bc53cbc9..0eeaf7eabb179f19d2af8dafe821f7baa153fead 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -16,13 +16,22 @@ from paddle.trainer.config_parser import * from default_decorators import * __all__ = [ - "evaluator_base", "classification_error_evaluator", "auc_evaluator", - "pnpair_evaluator", "precision_recall_evaluator", "ctc_error_evaluator", - "chunk_evaluator", "sum_evaluator", "column_sum_evaluator", - "value_printer_evaluator", "gradient_printer_evaluator", - "maxid_printer_evaluator", "maxframe_printer_evaluator", - "seqtext_printer_evaluator", "classification_error_printer_evaluator", - "detection_map_evaluator" + "evaluator_base", + "classification_error_evaluator", + "auc_evaluator", + "pnpair_evaluator", + "precision_recall_evaluator", + "ctc_error_evaluator", + "chunk_evaluator", + "sum_evaluator", + "column_sum_evaluator", + "value_printer_evaluator", + "gradient_printer_evaluator", + "maxid_printer_evaluator", + "maxframe_printer_evaluator", + "seqtext_printer_evaluator", + "classification_error_printer_evaluator", + "detection_map_evaluator", ] diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 8042febfed7ed7db2f6d1507142b17079aa00fd8..4f8366b64039b2edcd4c273439c87397bdc33595 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -116,8 +116,8 @@ def _debug_string_(proto, throw_on_error=True): """ error_fields = list() if not proto.IsInitialized(error_fields) and throw_on_error: - raise ValueError("{0} are not initialized\nThe message is {1}".format( - error_fields, proto)) + raise ValueError("{0} are not initialized.\nThe message is {1}:\n". + format(error_fields, proto)) return proto.__str__() @@ -374,12 +374,13 @@ class Operator(object): >>> outputs={"Out": [var1]}) Args: - block(Block): The block has the current operator - desc(core.OpDesc): The protobuf description + block(Block): The block has the current operator. + desc(core.OpDesc): The protobuf description. type(str): The type of operator. inputs(dict): The input dictionary. Key is the input parameter name. Value is a list of variables. - outputs(dict): The output dictionary. Has same format with inputs + outputs(dict): The output dictionary which has the same format with + inputs. attrs(dict): The attributes dictionary. Key is attribute name. Value is the attribute value. The attribute type should be as same as the type registered in C++ @@ -436,10 +437,11 @@ class Operator(object): for m in proto.outputs: need.add(m.name) if not given == need: - raise ValueError( - "Incorrect setting for output(s) of operator \"%s\". Need: [%s] Given: [%s]" - % (type, ", ".join(str(e) for e in need), ", ".join( - str(e) for e in given))) + raise ValueError(("Incorrect setting for output(s) of " + "operator \"%s\". Need: [%s] Given: [%s]") % + (type, ", ".join(str(e) + for e in need), ", ".join( + str(e) for e in given))) for out_proto in proto.outputs: out_args = outputs[out_proto.name] @@ -818,9 +820,8 @@ class Program(object): if isinstance(t, Variable): t = t.op else: - raise ValueError( - "All targets of prune() can only be Variable or Operator." - ) + raise ValueError(("All targets of prune() can only be " + "Variable or Operator.")) targets_idx.append([t.block.idx, t.idx]) res = Program() diff --git a/python/paddle/v2/fluid/layers/io.py b/python/paddle/v2/fluid/layers/io.py index 6177f0b4d71cdda8637fc40e3e65c72842bf7439..a43e0ee4def668bf7033f37cfa1a3f59d10a88d0 100644 --- a/python/paddle/v2/fluid/layers/io.py +++ b/python/paddle/v2/fluid/layers/io.py @@ -28,9 +28,9 @@ def data(name, **Data Layer** This function takes in the input and based on whether data has - to be returned back as a minibatch, it creates the global variable using + to be returned back as a minibatch, it creates the global variable by using the helper functions. The global variables can be accessed by all the - following operations and layers in the graph. + following operators in the graph. All the input variables of this function are passed in as local variables to the LayerHelper constructor. diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 4e8fd407c9983e2827d3137fa4f49a8425d5dce2..cfa60d2924a6af4fb8081e2b90853e262d95ff0e 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -50,6 +50,7 @@ __all__ = [ 'sequence_last_step', 'dropout', 'split', + 'l2_normalize', ] @@ -945,7 +946,8 @@ def pool2d(input, pool_type, pool_stride=None, pool_padding=None, - global_pooling=False): + global_pooling=False, + name=None): """ This function adds the operator for pooling in 2 dimensions, using the pooling configurations mentioned in input parameters. @@ -991,7 +993,8 @@ def batch_norm(input, epsilon=1e-05, param_attr=None, bias_attr=None, - data_layout='NCHW'): + data_layout='NCHW', + name=None): """ This function helps create an operator to implement the BatchNorm layer using the configurations from the input parameters. @@ -1067,7 +1070,7 @@ def batch_norm(input, return helper.append_activation(batch_norm_out) -def beam_search_decode(ids, scores): +def beam_search_decode(ids, scores, name=None): helper = LayerHelper('beam_search_decode', **locals()) sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) @@ -1091,7 +1094,8 @@ def conv2d_transpose(input, padding=None, stride=None, dilation=None, - param_attr=None): + param_attr=None, + name=None): """ The transpose of conv2d layer. @@ -1118,8 +1122,8 @@ def conv2d_transpose(input, contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. param_attr: Parameter Attribute. - main_program(Program): the main program - startup_program(Program): the startup program + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: Output image. @@ -1183,7 +1187,7 @@ def conv2d_transpose(input, return out -def sequence_expand(x, y): +def sequence_expand(x, y, name=None): """Sequence Expand Layer. This layer will expand the input variable **x** according to LoD information of **y**. And the following examples will explain how sequence_expand works: @@ -1227,6 +1231,8 @@ def sequence_expand(x, y): Args: x (Variable): The input variable which is a Tensor or LoDTensor. y (Variable): The input variable which is a LoDTensor. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The expanded variable which is a LoDTensor. @@ -1253,7 +1259,8 @@ def lstm_unit(x_t, cell_t_prev, forget_bias=0.0, param_attr=None, - bias_attr=None): + bias_attr=None, + name=None): """Lstm unit layer. The equation of a lstm step is: .. math:: @@ -1300,6 +1307,8 @@ def lstm_unit(x_t, initializer, name etc. bias_attr (ParamAttr): The attributes of bias weights, if not False, bias weights will be created and be set to default value. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: tuple: The hidden value and cell value of lstm unit. @@ -1365,7 +1374,7 @@ def lstm_unit(x_t, return h, c -def reduce_sum(input, dim=None, keep_dim=False): +def reduce_sum(input, dim=None, keep_dim=False, name=None): """ Computes the sum of tensor elements over the given dimension. @@ -1379,6 +1388,8 @@ def reduce_sum(input, dim=None, keep_dim=False): keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The reduced Tensor variable. @@ -1409,7 +1420,7 @@ def reduce_sum(input, dim=None, keep_dim=False): return out -def reduce_mean(input, dim=None, keep_dim=False): +def reduce_mean(input, dim=None, keep_dim=False, name=None): """ Computes the mean of tensor elements over the given dimension. @@ -1423,6 +1434,8 @@ def reduce_mean(input, dim=None, keep_dim=False): keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The reduced Tensor variable. @@ -1453,7 +1466,7 @@ def reduce_mean(input, dim=None, keep_dim=False): return out -def reduce_max(input, dim=None, keep_dim=False): +def reduce_max(input, dim=None, keep_dim=False, name=None): """ Computes the maximum of tensor elements over the given dimension. @@ -1467,6 +1480,8 @@ def reduce_max(input, dim=None, keep_dim=False): keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The reduced Tensor variable. @@ -1497,7 +1512,7 @@ def reduce_max(input, dim=None, keep_dim=False): return out -def reduce_min(input, dim=None, keep_dim=False): +def reduce_min(input, dim=None, keep_dim=False, name=None): """ Computes the minimum of tensor elements over the given dimension. @@ -1511,6 +1526,8 @@ def reduce_min(input, dim=None, keep_dim=False): keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The reduced Tensor variable. @@ -1541,20 +1558,22 @@ def reduce_min(input, dim=None, keep_dim=False): return out -def split(input, num_or_sections, dim=-1): +def split(input, num_or_sections, dim=-1, name=None): """ - Splits the tensor into multiple sub-tensors. + Split the input tensor into multiple sub-tensors. Args: input (Variable): The input variable which is a Tensor or LoDTensor. - num_or_sections (int|list): If :attr:`num_or_sections` is an integer, - then the integer indicates the number of equal sized sub-tensors - that the tensor will be divided into. If :attr:`num_or_sections` - is a list of integers, the length of list indicates the number of - sub-tensors and the integers indicate the sizes of sub-tensors' + num_or_sections (int|list): If :attr:`num_or_sections` is an integer, + then the integer indicates the number of equal sized sub-tensors + that the tensor will be divided into. If :attr:`num_or_sections` + is a list of integers, the length of list indicates the number of + sub-tensors and the integers indicate the sizes of sub-tensors' :attr:`dim` dimension orderly. - dim (int): The dimension along which to split. If :math:`dim < 0`, the + dim (int): The dimension along which to split. If :math:`dim < 0`, the dimension to split along is :math:`rank(input) + dim`. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: List: The list of segmented tensor variables. @@ -1597,3 +1616,84 @@ def split(input, num_or_sections, dim=-1): 'axis': dim }) return outs + + +def l2_normalize(x, axis, epsilon=1e-12, name=None): + """ + **L2 normalize Layer** + + The l2 normalize layer normalizes `x` along dimension `axis` using an L2 + norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes + + output = x / sqrt(max(sum(x**2), epsilon)) + + For `x` with more dimensions, this layer independently normalizes each 1-D + slice along dimension `axis`. + + Args: + x(Variable|list): The input tensor to l2_normalize layer. + axis(int): Dimension along which to normalize the input. + epsilon(float): A lower bound value for `x`'s l2 norm. sqrt(epsilon) will + be used as the divisor if the l2 norm of `x` is less than + sqrt(epsilon). + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + + Returns: + Variable: The output tensor variable. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name="data", + shape=(3, 17, 13), + dtype="float32") + fc = fluid.layers.l2_normalize(x=data, axis=1) + """ + + if len(x.shape) == 1: axis = 0 + + helper = LayerHelper("l2_normalize", **locals()) + + square = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op(type="square", inputs={"X": x}, outputs={"Out": square}) + + reduced_sum = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type="reduce_sum", + inputs={"X": square}, + outputs={"Out": reduced_sum}, + attrs={ + "dim": 1 if axis is None else axis, + "keep_dim": True, + "reduce_all": False + }) + + # TODO(caoying) A lower bound value epsilon for the norm is needed to + # imporve the numeric stability of reciprocal. This requires a maximum_op. + rsquare = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type="reciprocal", inputs={"X": reduced_sum}, outputs={"Out": rsquare}) + + # TODO(caoying) the current elementwise_mul operator does not support a + # general broadcast rule which broadcasts input(Y) to have the same + # dimension with Input(X) starting from a specified dimension. So this + # exanpsion is requred. Once a general broadcast relu is spported, this + # expanding canbe removed. + rsquare_expanded = helper.create_tmp_variable(dtype=x.dtype) + expand_times = [1] * len(x.shape) + expand_times[axis] = int(x.shape[axis]) + helper.append_op( + type="expand", + inputs={"X": rsquare}, + outputs={"Out": rsquare_expanded}, + attrs={"expand_times": expand_times}) + + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type="elementwise_mul", + inputs={"X": x, + "Y": rsquare_expanded}, + outputs={"Out": out}) + return out diff --git a/python/paddle/v2/fluid/tests/test_normalization_wrapper.py b/python/paddle/v2/fluid/tests/test_normalization_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..caff63011d038d785472cb38a26a51f3f4cc9288 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_normalization_wrapper.py @@ -0,0 +1,95 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import unittest +import paddle.v2.fluid as fluid +import paddle.v2.fluid.core as core +import numpy as np + + +class TestNormalization(unittest.TestCase): + data_desc = {"name": "input", "shape": (2, 3, 7)} + + def gen_random_input(self): + """Generate random input data. + """ + self.data = np.random.random( + size=self.data_desc["shape"]).astype("float32") + + def set_program(self, axis, epsilon): + """Build the test program. + """ + data = fluid.layers.data( + name=self.data_desc["name"], + shape=self.data_desc["shape"], + dtype="float32", + append_batch_size=False) + data.stop_gradient = False + l2_norm = fluid.layers.l2_normalize(x=data, axis=axis, epsilon=epsilon) + out = fluid.layers.reduce_sum(l2_norm, dim=None) + + fluid.backward.append_backward(loss=out) + self.fetch_list = [l2_norm] + + def run_program(self): + """Run the test program. + """ + places = [core.CPUPlace()] + if core.is_compile_gpu(): + places.append(core.CUDAPlace(0)) + + for place in places: + self.set_inputs(place) + exe = fluid.Executor(place) + + output = exe.run(fluid.default_main_program(), + feed=self.inputs, + fetch_list=self.fetch_list, + return_numpy=True) + self.op_output = output + + def set_inputs(self, place): + """Set the randomly generated data to the test program. + """ + self.inputs = {} + tensor = fluid.Tensor() + tensor.set(self.data, place) + self.inputs[self.data_desc["name"]] = tensor + + def l2_normalize(self, data, axis, epsilon): + """ Compute the groundtruth. + """ + output = data * np.reciprocal( + np.sum(np.square(data), axis=axis, keepdims=True)) + return output + + def test_l2_normalize(self): + """ Test the python wrapper for l2_normalize. + """ + axis = 1 + #TODO(caoying) epsilon is not supported due to lack of a maximum_op. + epsilon = 1e-6 + + self.gen_random_input() + + self.set_program(axis, epsilon) + self.run_program() + + expect_output = self.l2_normalize(self.data, axis, epsilon) + + # check output + self.assertTrue(np.allclose(self.op_output, expect_output, atol=0.001)) + + +if __name__ == '__main__': + unittest.main()