Add base class for huber_regression_cost and huber_classification_cost

27a99bfb · Luo Tao · 7f9af125 · 27a99bfb · 27a99bfb · 27a99bfb
4 changed file
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -409,9 +409,9 @@ multi_binary_label_cross_entropy_cost
 ..  autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
    :noindex:

-huber_cost
----------
-..  autoclass:: paddle.v2.layer.huber_cost
+huber_classification_cost
+-------------------------
+..  autoclass:: paddle.v2.layer.huber_classification_cost
    :noindex:

 lambda_cost

--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
  }
 }

-//
-// Huber loss for robust 2-classes classification
-//
-REGISTER_LAYER(huber, HuberTwoClassification);
-
-bool HuberTwoClassification::init(const LayerMap& layerMap,
-                                  const ParameterMap& parameterMap) {
+bool HuberCost::init(const LayerMap& layerMap,
+                     const ParameterMap& parameterMap) {
  CostLayer::init(layerMap, parameterMap);
  if (useGpu_) {
    tmpCpuInput_.reserve(inputLayers_.size());
@@ -589,9 +584,7 @@ bool HuberTwoClassification::init(const LayerMap& layerMap,
  return true;
 }

-void HuberTwoClassification::forwardImp(Matrix& output,
-                                        Argument& label,
-                                        Matrix& cost) {
+void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
  if (useGpu_) {
    for (size_t i = 0; i < inputLayers_.size(); i++) {
      tmpCpuInput_[i].resizeAndCopyFrom(
@@ -599,12 +592,22 @@ void HuberTwoClassification::forwardImp(Matrix& output,
    }
    hl_stream_synchronize(HPPL_STREAM_DEFAULT);
  }
-  forwardImpIn(output, label, cost);
 }

-void HuberTwoClassification::forwardImpIn(Matrix& output,
-                                          Argument& label,
-                                          Matrix& target) {
+//
+// Huber loss for robust 2-classes classification
+//
+REGISTER_LAYER(huber_classification, HuberTwoClassification);
+
+bool HuberTwoClassification::init(const LayerMap& layerMap,
+                                  const ParameterMap& parameterMap) {
+  return HuberCost::init(layerMap, parameterMap);
+}
+
+void HuberTwoClassification::forwardImp(Matrix& output,
+                                        Argument& label,
+                                        Matrix& target) {
+  HuberCost::forwardImp(output, label, target);
  size_t numSamples = target.getHeight();
  CHECK(label.ids);
  CHECK_EQ((*label.ids).getSize(), numSamples);
@@ -627,25 +630,13 @@ void HuberTwoClassification::forwardImpIn(Matrix& output,
  target.copyFrom(cost.data(), numSamples);
 }

-void HuberTwoClassification::backwardImp(Matrix& outputValue,
+void HuberTwoClassification::backwardImp(Matrix& output,
                                         Argument& label,
-                                         Matrix& outputGrad) {
-  if (useGpu_) {
-    backwardImpIn(
-        *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
-    outputGrad.copyFrom(*tmpCpuInput_[0].grad);
-  } else {
-    backwardImpIn(outputValue, label, outputGrad);
-  }
-}
-
-void HuberTwoClassification::backwardImpIn(Matrix& output,
-                                           Argument& label,
-                                           Matrix& outputG) {
+                                         Matrix& outputG) {
  size_t numSamples = output.getHeight();
-  real* out = output.getData();
-  real* grad = outputG.getData();
-  int* lbl = (*label.ids).getData();
+  real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
+  int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
+  real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
  for (size_t i = 0; i < numSamples; ++i) {
    int y = 2 * lbl[i] - 1;
    if (y * out[i] < -1)
@@ -653,8 +644,8 @@ void HuberTwoClassification::backwardImpIn(Matrix& output,
    else if (y * out[i] < 1)
      grad[i] += -2 * (1 - y * out[i]) * y;
  }
+  if (useGpu_) outputG.copyFrom(grad, numSamples);
 }
-
 /**
 * This cost layer compute the sum of its input as loss.
 * \f[

--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -304,6 +304,23 @@ public:
                   Matrix& outputGrad) override;
 };

+/*
+ * A base layer for HuberRegressionLoss and HuberTwoClassification.
+ */
+class HuberCost : public CostLayer {
+public:
+  std::vector<Argument> tmpCpuInput_;
+
+  explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
+
+  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
+};
+
 /**
 * Huber loss for robust 2-classes classification.
 *
@@ -312,25 +329,19 @@ public:
 * Loss = (1 - y * f)^2, if -1 < y * f < 1  \\
 * Loss = 0, otherwise
 */
-class HuberTwoClassification : public CostLayer {
-  std::vector<Argument> tmpCpuInput_;
-
+class HuberTwoClassification : public HuberCost {
 public:
  explicit HuberTwoClassification(const LayerConfig& config)
-      : CostLayer(config) {}
+      : HuberCost(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

-  void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);
-
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
-
-  void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
 };

 typedef std::shared_ptr<CostLayer> CostLayerPtr;

--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@@ -141,7 +141,7 @@ class CostLayerTest(unittest.TestCase):
        cost8 = layer.rank_cost(left=score, right=score, label=score)
        cost9 = layer.lambda_cost(input=inference, score=score)
        cost10 = layer.sum_cost(input=inference)
-        cost11 = layer.huber_cost(input=score, label=label)
+        cost11 = layer.huber_classification_cost(input=score, label=label)

        print layer.parse_network([cost1, cost2])
        print layer.parse_network([cost3, cost4])