diff --git a/paddle/fluid/operators/elementwise_op_function.h b/paddle/fluid/operators/elementwise_op_function.h index 0ee7291f04a57bb843ffe4c62e489ea9b575f7d0..2a4a611511138377b46102dbe9d956c8beecd1bd 100644 --- a/paddle/fluid/operators/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise_op_function.h @@ -314,7 +314,6 @@ EIGEN_FUNCTOR(Div, EIGEN_DIV); template void ElementwiseGradCompute(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, const framework::Tensor* out, diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index a76ba796fe4ed20ebc09d34fcebe564d70c267a5..7fa2b060afd2d3effa4136bd6e6bb376600bdb7e 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -46,7 +46,7 @@ struct Formater { } private: - void PrintMessage() { CLOG << std::time(nullptr) << "\t" << message; } + void PrintMessage() { CLOG << std::time(nullptr) << "\t" << message << "\t"; } void PrintName() { if (!name.empty()) { CLOG << "Tensor[" << name << "]" << std::endl; @@ -85,15 +85,16 @@ struct Formater { // print float if (dtype.hash_code() == typeid(float).hash_code()) { Display(size); - } - if (dtype.hash_code() == typeid(double).hash_code()) { + } else if (dtype.hash_code() == typeid(double).hash_code()) { Display(size); - } - if (dtype.hash_code() == typeid(int).hash_code()) { + } else if (dtype.hash_code() == typeid(int).hash_code()) { Display(size); - } - if (dtype.hash_code() == typeid(int64_t).hash_code()) { + } else if (dtype.hash_code() == typeid(int64_t).hash_code()) { Display(size); + } else if (dtype.hash_code() == typeid(bool).hash_code()) { + Display(size); + } else { + CLOG << "\tdata: unprintable type: " << dtype.name() << std::endl; } } @@ -182,6 +183,7 @@ class TensorPrintOp : public framework::OperatorBase { } Formater formater; + formater.message = Attr("message"); if (Attr("print_tensor_name")) { formater.name = printed_var_name; } diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index 800c11a53b83bb902276cb2eb5213ba000e403c7..1ca11bb35b0e39d1bc97dbd531c0ebcf62e18e74 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -174,7 +174,7 @@ def Print(input, print_tensor_type (bool): Print the tensor type. print_tensor_shape (bool): Print the tensor shape. print_tensor_lod (bool): Print the tensor lod. - print_phase (bool): Which phase to displace, including 'forward', + print_phase (str): Which phase to displace, including 'forward', 'backward' and 'both'. If set to 'backward' or 'both', will print the gradients of input tensor. diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index f5b64fee1dc82d0b2088191338cb3cb70f6b6b52..5f1842f5fb95e09d2874caa9e9de4ebeb7a99403 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -1579,7 +1579,7 @@ def layer_norm(input, """ **Layer Normalization** - Assume feature vectors exist on dimensions + Assume feature vectors exist on dimensions :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics along these dimensions for each feature vector :math:`a` with size :math:`H`, then normalize each feature vector using the corresponding @@ -1600,13 +1600,13 @@ def layer_norm(input, Args: input(Variable): The input tensor variable. - scale(bool): Whether to learn the adaptive gain :math:`g` after + scale(bool): Whether to learn the adaptive gain :math:`g` after normalization. - shift(bool): Whether to learn the adaptive bias :math:`b` after + shift(bool): Whether to learn the adaptive bias :math:`b` after normalization. - begin_norm_axis(bool): The normalization will be performed along + begin_norm_axis(bool): The normalization will be performed along dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. - epsilon(float): The small value added to the variance to prevent + epsilon(float): The small value added to the variance to prevent division by zero. param_attr(ParamAttr|None): The parameter attribute for the learnable gain :math:`g`. @@ -2070,7 +2070,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): Tensor variable with a single element, otherwise must be in the range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. - keep_dim (bool): Whether to reserve the reduced dimension in the + keep_dim (bool|False): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. name(str|None): A name for this layer(optional). If set None, the layer @@ -3098,33 +3098,33 @@ def multiplex(inputs, index): def softmax_with_cross_entropy(logits, label, soft_label=False): """ **Softmax With Cross Entropy Operator.** - + Cross entropy loss with softmax is used as the output layer extensively. This operator computes the softmax normalized values for each row of the input tensor, after which cross-entropy loss is computed. This provides a more numerically stable gradient. - + Because this operator performs a softmax on logits internally, it expects unscaled logits. This operator should not be used with the output of softmax operator since that would produce incorrect results. - + When the attribute soft_label is set false, this operators expects mutually exclusive hard labels, each sample in a batch is in exactly one class with a probability of 1.0. Each sample in the batch will have a single label. - + The equation is as follows: - + 1) Hard label (one-hot label, so every sample has exactly one class) - + .. math:: loss_j = -\\text{logit}_{label_j} + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logit}_i)\\right), j = 1,..., K - + 2) Soft label (each sample can have a distribution over all classes) .. math:: - + loss_j = -\\sum_{i=0}^{K}\\text{label}_i \\left(\\text{logit}_i - \\log\\left(\\sum_{i=0}^{K} \\exp(\\text{logit}_i)\\right)\\right), j = 1,...,K @@ -3169,7 +3169,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): The operator takes the first dimension of X and Y as batch size. For each instance, it computes the smooth l1 loss element by element first and then sums all the losses. So the shape of Out is [batch_size, 1]. - + Args: x (Variable): A tensor with rank at least 2. The input value of smooth l1 loss op with shape [batch_size, dim1, ..., dimN].