diff --git a/paddle/operators/smooth_l1_loss_op.cc b/paddle/operators/smooth_l1_loss_op.cc index 758481943d463f22eb6c6e0be9a99ad99161da5b..ebf7b43700a7498aa18b5f648b0b8c2c4e7b442b 100644 --- a/paddle/operators/smooth_l1_loss_op.cc +++ b/paddle/operators/smooth_l1_loss_op.cc @@ -77,14 +77,17 @@ class SmoothL1LossOpMaker : public framework::OpProtoAndCheckerMaker { "A float scalar with default value 3.0.") .SetDefault(3.0); AddComment(R"DOC( -Compute smooth l1 loss for input and target. The operator take the 1st -dimension of input as batch size. For each instance, it will compute -smooth l1 loss element by element first and sum all losses to one value. -So the output shape is [batch_size, 1]. +Smooth L1 Loss Operator. + +This operator computes the smooth l1 loss for input and target. +The operator takes the first dimension of input as the batch size. +For each instance, it computes the smooth l1 loss element by element first +and then sums all the losses. So the resulting output shape +is [batch_size, 1]. The equation is: -loss = 0.5 * (sigma * (x-y))^2 if abs(x - y) < 1 / sigma^2 - abs(x - y) - 0.5 / sigma^2 otherwise +loss = $$0.5 * (\sigma * (x-y))^2$$ if $$|x - y| < 1 /({\sigma}^2)$$ + $$\frac{|x - y| - 0.5}{{\sigma}^2}$$ otherwise )DOC"); } diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 00fd0b32a9b3c0dd9fedf7b7621b1f15e5c4ce93..93f89e33a73c5f4c6c0e5a8793a0abe7c692b656 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -44,20 +44,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { "2-D with shape [batch_size, input_feature_dimensions]."); AddOutput("Y", "The normalized values with the same shape as X."); AddComment(R"DOC( -The input of softmax operator is a 2-D tensor with shape N x K (N is the +Softmax Operator. + +The input of the softmax operator is a 2-D tensor with shape N x K (N is the batch_size, K is the dimension of input feature). The output tensor has the same shape as the input tensor. For each row of the input tensor, the softmax operator squashes the K-dimensional vector of arbitrary real values to a K-dimensional vector of real -values in the range [0, 1] that add up to 1. Specifically, it computes the -exponential of the given dimension and the sum of exponential values of all -the other dimensions in the K-dimensional vector input. Then the ratio of the -exponential of the given dimension and the sum of exponential values of all -the other dimensions is the output of the softmax operator. +values in the range [0, 1] that add up to 1. +It computes the exponential of the given dimension and the sum of exponential +values of all the other dimensions in the K-dimensional vector input. +Then the ratio of the exponential of the given dimension and the sum of +exponential values of all the other dimensions is the output of the softmax +operator. For each row `i` and each column `j` in input X, we have: - Y[i, j] = exp(X[i, j]) / sum_j(exp(X[i, j])) + $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ )DOC"); } diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index 50497da1b70d39d2638240dd91035c9181124af9..a006e0a595057d019b443d22cea4bdf171a6ee0b 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -51,32 +51,34 @@ class SoftmaxWithCrossEntropyOpMaker "the given labels as soft labels.") .SetDefault(false); AddComment(R"DOC( -Cross entropy loss with softmax are used as the output layer extensively. This +Softmax With Cross Entropy Operator. + +Cross entropy loss with softmax is used as the output layer extensively. This operator computes the softmax normalized values for each row of the input -tensor, after which cross-entropy loss is then computed. This provides a more +tensor, after which cross-entropy loss is computed. This provides a more numerically stable gradient. -Because this operators performs a softmax on logits internally, it expects -unscaled logits. Please do not call this op with the output of softmax operator, -which will produce incorrect results. +Because this operator performs a softmax on logits internally, it expects +unscaled logits. This operator should not be used with the output of +softmax operator since that would produce incorrect results. When the attribute softLabel is set false, this operators expects mutually -exclusive hard labels, each sample in a batch is in exactly one class with -probabilities 1. Each sample in the batch with one and only one label. +exclusive hard labels, each sample in a batch is in exactly one class with a +probability of 1.0. Each sample in the batch will have a single label. -Equation: +The equation is as follows: -1) hard label (one-hot label) +1) Hard label (one-hot label, so every sample has exactly one class) -Loss_j = \f$ -\text{Logit}_{Label_j} + +$$Loss_j = \f$ -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), -j = 1, ..., K $\f +j = 1, ..., K $\f$$ -2) soft label (a distribution over all classes) +2) Soft label (each sample can have a distribution over all classes) -Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i - +$$Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i - \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), -j = 1,...,K $\f +j = 1,...,K $\f$$ )DOC"); } diff --git a/paddle/operators/transpose_op.cc b/paddle/operators/transpose_op.cc index d785e57c830439ad80005d9a3d4bb77faf1ae1b9..94de3d5069017a7ca818e246ad574c4db92d8006 100644 --- a/paddle/operators/transpose_op.cc +++ b/paddle/operators/transpose_op.cc @@ -32,7 +32,7 @@ class TransposeOp : public framework::OperatorWithKernel { size_t axis_size = axis.size(); PADDLE_ENFORCE_EQ(x_rank, axis_size, - "the input tensor's rank(%d) " + "The input tensor's rank(%d) " "should be equal to the axis's size(%d)", x_rank, axis_size); @@ -64,12 +64,14 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "(Tensor)The output tensor"); AddAttr>( "axis", - "(vector)a list of values, and the size of the list should be " + "(vector)A list of values, and the size of the list should be " "the same with the input tensor rank, the tensor will " "permute the axes according the the values given"); AddComment(R"DOC( -The Tensor will be permuted according to the axis values given. -The op is very much like the numpy.transpose function in python +Transpose Operator. + +The input tensor will be permuted according to the axis values given. +The op functions similar to how numpy.transpose works in python. For example: >> input = numpy.arange(6).reshape((2,3)) >> input @@ -83,6 +85,7 @@ For example: [2, 5]]) So, given a input tensor of shape(N, C, H, W) and the axis is {0, 2, 3, 1}, the output tensor shape will be (N, H, W, C) + )DOC"); } };