Add python interface for huber regression loss

test=develop

Add python interface for huber regression loss
test=develop
c550e0ce · minqiyang · 6776e928 · c550e0ce · c550e0ce
隐藏空白更改
内联并排

Showing with 63 addition and 13 deletion

paddle/fluid/operators/huber_loss_op.cc paddle/fluid/operators/huber_loss_op.cc +4 -3

python/paddle/fluid/layers/nn.py python/paddle/fluid/layers/nn.py +59 -10

未找到文件。
--- a/paddle/fluid/operators/huber_loss_op.cc
+++ b/paddle/fluid/operators/huber_loss_op.cc
@@ -124,8 +124,9 @@ REGISTER_OPERATOR(huber_loss, ops::HuberLossOp, ops::HuberLossOpMaker<float>,
                  paddle::framework::DefaultGradOpDescMaker<true>);
 REGISTER_OPERATOR(huber_loss_grad, ops::HuberLossGradOp);
 REGISTER_OP_CPU_KERNEL(
-    huber_loss,
+    huber_loss, ops::HuberLossKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::HuberLossKernel<paddle::platform::CPUDeviceContext, float>);
+    ops::HuberLossKernel<paddle::platform::CPUDeviceContext, double>);
 REGISTER_OP_CPU_KERNEL(
    huber_loss_grad,
-    ops::HuberLossGradKernel<paddle::platform::CPUDeviceContext, float>);
+    ops::HuberLossGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::HuberLossGradKernel<paddle::platform::CPUDeviceContext, double>);
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -169,6 +169,7 @@ __all__ = [
    'log_loss',
    'add_position_encoding',
    'bilinear_tensor_product',
+    'huber_regression_loss',
 ]
@@ -4595,7 +4596,7 @@ def hsigmoid(input,
    """
    The hierarchical sigmoid operator is used to accelerate the training
    process of language model. This operator organizes the classes into a
-    complete binary tree, or you can use is_custom to pass your own tree to 
+    complete binary tree, or you can use is_custom to pass your own tree to
    implement hierarchical. Each leaf node represents a class(a word) and each
    internal node acts as a binary classifier. For each word there's a unique
    path from root to it's leaf node, hsigmoid calculate the cost for each
@@ -4611,7 +4612,7 @@ def hsigmoid(input,
        2. build a dict to store word_id -> word's leaf to root path, we call it path_table.
        3. build a dict to store word_id -> code of word's leaf to root path, we call it path_code. Code
         means label of each binary classification, using 1 indicate true, 0 indicate false.
-        4. now, each word should has its path and code along the path, you can pass a batch of path and code 
+        4. now, each word should has its path and code along the path, you can pass a batch of path and code
        related to the same batch of inputs.
@@ -4621,8 +4622,8 @@ def hsigmoid(input,
            and :math:`D` is the feature size.
        label (Variable): The tensor variable contains labels of training data.
            It's a tensor with shape is :math:`[N \\times 1]`.
-        num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set, 
+        num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set,
-            it should never be None under is_custom=False, but while is_custom is true, it should be non leaf num 
+            it should never be None under is_custom=False, but while is_custom is true, it should be non leaf num
            which indicates the num of classes using by binary classify.
        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
             of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid
@@ -4635,15 +4636,15 @@ def hsigmoid(input,
             is not set, the bias is initialized zero. Default: None.
        name (str|None): A name for this layer(optional). If set None, the layer
             will be named automatically. Default: None.
-        path_table: (Variable|None) this variable can store each batch of samples' path to root, 
+        path_table: (Variable|None) this variable can store each batch of samples' path to root,
            it should be in leaf -> root order
-            path_table should have the same shape with path_code, and for each sample i path_table[i] indicates a np.array like 
+            path_table should have the same shape with path_code, and for each sample i path_table[i] indicates a np.array like
-            structure and each element in this array is indexes in parent nodes' Weight Matrix. 
+            structure and each element in this array is indexes in parent nodes' Weight Matrix.
-        path_code:  (Variable|None) this variable can store each batch of samples' code, 
+        path_code:  (Variable|None) this variable can store each batch of samples' code,
            each code consist with every code of parent nodes. it should be in leaf -> root order
-        is_custom: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is 
+        is_custom: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is
             set you need to set path_table/path_code/num_classes, otherwise num_classes should be set
-        is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient 
+        is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient
             of W and input will be sparse.
    Returns:
@@ -8770,3 +8771,51 @@ def bilinear_tensor_product(x,
    # add activation
    return helper.append_activation(out)
+def huber_regression_loss(input, label, delta):
+    """
+    Huber regression loss is a loss function used in robust regression.
+    Huber regression loss can evaluate the fitness of input to label.
+    Different from MSE loss, Huber regression loss is more robust for outliers.
+    When the difference between input and label is large than delta
+    .. math::
+        huber\_regression\_loss = delta * (label - input) - 0.5 * delta * delta
+    When the difference between input and label is less than delta
+    .. math::
+        huber\_regression\_loss = 0.5 * (label - input) * (label - input)
+    Args:
+        input (Variable): This input is a probability computed by the previous operator.
+                          The first dimension is batch size, and the last dimension is 1.
+        label (Variable): The groud truth whose first dimension is batch size
+                          and last dimension is 1.
+        delta (float): The parameter of huber regression loss, which controls
+                       the range of outliers
+    Returns:
+        huber\_regression\_loss (Variable): The huber regression loss with shape [batch_size, 1].
+    Examples:
+        .. code-block:: python
+            predictions = fluid.layers.softmax(x)
+            loss = fluid.layers.huber_regression_loss(input=predictions, label=label, 1.0)
+    """
+    helper = LayerHelper('huber_regression_loss', **locals())
+    residual = helper.create_variable_for_type_inference(
+        dtype=helper.input_dtype())
+    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
+    helper.append_op(
+        type='huber_loss',
+        inputs={'X': input,
+                'Y': label},
+        outputs={'Out': out,
+                 'Residual': residual},
+        attrs={'delta': delta})
+    return out