SmoothL1 loss

1fd00758 · dangqingqing · 68b958c3 · 1fd00758 · 1fd00758 · 1fd00758
10 changed file
--- a/doc/api/v1/trainer_config_helpers/layers.rst
+++ b/doc/api/v1/trainer_config_helpers/layers.rst
@@ -498,6 +498,12 @@ hsigmoid
    :members: hsigmoid
    :noindex:
+smooth_l1
+---------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: smooth_l1
+    :noindex:
 Check Layer 
 ============

--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -217,7 +217,7 @@ void SmoothL1CostLayer::forwardImp(Matrix& output,
    targetCpu->copyFrom(target);
    outputCpu->copyFrom(output);
    labelCpu->copyFrom(*label.value);
-    targetCpu->smoothL1(*outputCpu, *(labelCpu));
+    targetCpu->smoothL1(*outputCpu, *labelCpu);
    target.copyFrom(*targetCpu);
  } else {
    target.smoothL1(output, *label.value);

--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -91,8 +91,8 @@ public:
 *
 * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar,
 *     Richard Schwartz, and John Makhoul. Fast and robust neural
- *     network joint models for statistical machine translation.
+ *     network joint models for statistical machine translation.  *     In
- *     In Proceedings of the ACL 2014 Conference.
+ * Proceedings of the ACL 2014 Conference.
 */
 class MultiClassCrossEntropyWithSelfNorm : public CostLayer {
 public:
@@ -164,9 +164,11 @@ public:
 * tasks.
 * \f[
 * L =
- *   (output - label)^2 * 0.5  / -1 < (output - label) < 1 /
+ *   0.5 * x^2    if / -1 < |x| < 1 /
- *   (output - label) - 0.5    / otherwise  /
+ *   |x| - 0.5    / otherwise /
 * \f]
+ *
+ * x = output - label
 */
 class SmoothL1CostLayer : public CostLayer {
 public:

--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1685,7 +1685,7 @@ TEST(Layer, smooth_l1) {
  config.layerConfig.add_inputs();
  for (auto useGpu : {false, true}) {
-    testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0);
+    testLayerGrad(config, "smooth_l1", 100, false, useGpu, false);
  }
 }

--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -3616,17 +3616,18 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
  CHECK_EQ(output.getHeight(), numSamples);
  CHECK_EQ(label.getWidth(), dim);
  CHECK_EQ(getWidth(), (size_t)1);
-  real* out = output.getData();
  real* cost = getData();
+  real* out = output.getData();
  real* lbl = label.getData();
-  for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
+  for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) {
    for (size_t j = 0; j < dim; ++j) {
-      cost[j] = std::fabs(out[j] - lbl[j]);
+      real absVal = std::fabs(out[j] - lbl[j]);
-      if (cost[j] < 1.0)
+      if (absVal < 1.0)
-        cost[j] = 0.5 * cost[j] * cost[j];
+        cost[i] += 0.5 * absVal * absVal;
      else
-        cost[j] = cost[j] - 0.5;
+        cost[i] += absVal - 0.5;
    }
  }
 }
@@ -3640,17 +3641,20 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
  CHECK_EQ(label.getHeight(), numSamples);
  CHECK_EQ(output.getHeight(), numSamples);
  CHECK_EQ(label.getWidth(), dim);
-  CHECK_EQ(getWidth(), (size_t)1);
+  CHECK_EQ(getWidth(), dim);
  real* out = output.getData();
-  real* cost = getData();
  real* lbl = label.getData();
+  real* grad = getData();
-  // f'(x) = x         if |x| < 1
+  for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) {
-  //       = sign(x)   otherwise
-  for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
    for (size_t j = 0; j < dim; ++j) {
-      cost[j] = out[j] - lbl[j];
+      real val = out[j] - lbl[j];
-      if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0);
+      if (std::fabs(val) < 1) {
+        grad[j] += val;
+      } else {
+        grad[j] += (real(0) < val) - (val < real(0));
+      }
    }
  }
 }

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2119,6 +2119,7 @@ define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
 define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
 define_cost('HuberTwoClass', 'huber')
 define_cost('SumCost', 'sum_cost')
+define_cost('SmoothL1Cost', 'smooth_l1')
 @config_layer('hsigmoid')

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -116,6 +116,7 @@ __all__ = [
    'spp_layer',
    'pad_layer',
    'eos_layer',
+    'smooth_l1_cost',
    'layer_support',
 ]
@@ -201,6 +202,7 @@ class LayerType(object):
    SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
    MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
    SUM_COST = "sum_cost"
+    SMOOTH_L1 = "smooth_l1"
    @staticmethod
    def is_layer_type(type_name):
@@ -5249,8 +5251,6 @@ def multi_binary_label_cross_entropy(input,
    :type input: LayerOutput
    :param label: The input label.
    :type input: LayerOutput
-    :param type: The type of cost.
-    :type type: basestring
    :param name: The name of this layers. It is not necessary.
    :type name: None|basestring
    :param coeff: The coefficient affects the gradient in the backward.
@@ -5279,3 +5279,56 @@ def multi_binary_label_cross_entropy(input,
        LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
        parents=[input, label],
        size=1)
+@wrap_name_default()
+@layer_support()
+def smooth_l1_cost(input, label, name=None, layer_attr=None):
+    """
+    This is a L1 loss but more smooth. It requires that the
+    size of input and label are equal.
+    More details can be found by referring to `Fast R-CNN
+    <https://arxiv.org/pdf/1504.08083v2.pdf>`_
+    .. math::
+        L = \sum_{i} smooth_{L1}(input_i - label_i)
+    in which
+    .. math::
+        mooth_{L1}(x) =
+        \begin{cases}
+        0.5x^2& \text{if} |x| < 1 \\
+        |x|-0.5& \text{otherwise}
+        \end{cases}
+    .. code-block:: python
+       cost = smooth_l1_cost(input=input_layer,
+                             label=label_layer)
+    :param input: The input layer.
+    :type input: LayerOutput
+    :param label: The input label.
+    :type input: LayerOutput
+    :param name: The name of this layers. It is not necessary.
+    :type name: None|basestring
+    :param layer_attr: Extra Layer Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    assert isinstance(input, LayerOutput)
+    assert isinstance(label, LayerOutput)
+    assert input.size == label.size
+    Layer(
+        name=name,
+        type=LayerType.SMOOTH_L1,
+        inputs=[input.name, label.name],
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name, LayerType.SMOOTH_L1, parents=[input, label], size=1)
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -5,6 +5,6 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
 test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
-test_seq_concat_reshape)
+test_seq_concat_reshape test_smooth_l1)
 export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 300
+  active_type: ""
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 300
+  active_type: ""
+}
+layers {
+  name: "__smooth_l1_cost_0__"
+  type: "smooth_l1"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  coeff: 1.0
+}
+input_layer_names: "input"
+input_layer_names: "label"
+output_layer_names: "__smooth_l1_cost_0__"
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "label"
+  layer_names: "__smooth_l1_cost_0__"
+  input_layer_names: "input"
+  input_layer_names: "label"
+  output_layer_names: "__smooth_l1_cost_0__"
+  is_recurrent_layer_group: false
+}
--- a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
+from paddle.trainer_config_helpers import *
+data = data_layer(name='input', size=300)
+lbl = data_layer(name='label', size=300)
+smooth_l1 = smooth_l1_cost(input=data, label=lbl)
+outputs(smooth_l1)