From 984117458ca019335d4ba8cd111f0895800651aa Mon Sep 17 00:00:00 2001
From: yangyaming <mxscmxsc@gmail.com>
Date: Sat, 9 Sep 2017 16:55:19 +0800
Subject: [PATCH] Finish modified huber loss op.

---
 paddle/operators/modified_huber_loss_op.cc    | 26 ++++++---
 paddle/operators/modified_huber_loss_op.cu    | 49 +++++++++++++++--
 paddle/operators/modified_huber_loss_op.h     | 52 ++++++------------
 .../tests/test_modified_huber_loss_op.py      | 55 +++++++++++++++++++
 4 files changed, 134 insertions(+), 48 deletions(-)
 create mode 100644 python/paddle/v2/framework/tests/test_modified_huber_loss_op.py
diff --git a/paddle/operators/modified_huber_loss_op.cc b/paddle/operators/modified_huber_loss_op.cc
index 631464bc84..631d406fd4 100644
--- a/paddle/operators/modified_huber_loss_op.cc
+++ b/paddle/operators/modified_huber_loss_op.cc
@@ -45,11 +45,25 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
   ModifiedHuberLossOpMaker(framework::OpProto* proto,
                            framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "");
-    AddInput("Y", "");
-    AddOutput("intermediate_val", "").AsIntermediate();
-    AddOutput("Out", "");
-    AddComment("");
+    AddInput("X", "Input value of ModifiedHuberLossOp.");
+    AddInput("Y", "Target labels of ModifiedHuberLossOp.");
+    AddOutput("intermediate_val",
+              "Variable to save intermediate result which will be reused in "
+              "backward processing.")
+        .AsIntermediate();
+    AddOutput("Out", "Classification loss for input X.");
+    AddComment(R"DOC(
+Modified huber loss is used in binary classification problem. Dimensions of
+input X and target Y are both (N, 1) and so is the dimension of output loss.
+Since target Y is not differentiable, cacluating gradient for Y is illegal.
+The formulation of modified huber loss is:
+
+L(y, f(x)) = max(0, 1 - yf(x))^2  for yf(x) >= -1,
+             -4yf(x)              otherwise.
+
+Make sure the values of target label Y are in {0, 1} here. The operator will
+scale values of Y to {-1, +1} when computing loss and gradients.
+)DOC");
   }
 };
 
@@ -64,7 +78,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
     auto* intermediate_val = context.Input<Tensor>("intermediate_val");
     auto* out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
     auto* x_grad = context.Output<Tensor>(framework::GradVarName("X"));
-    auto* y_grad = context.Output<Tensor>(framework::GradVarName("Y"));
 
     PADDLE_ENFORCE_NOT_NULL(x, "Input X must not be null.");
     PADDLE_ENFORCE_NOT_NULL(y, "Target Y must not be null.");
@@ -80,7 +93,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
         "Dimension of Out gradient and X must be the same (N*1).");
 
     if (x_grad) x_grad->Resize(x->dims());
-    if (y_grad) y_grad->Resize(y->dims());
   }
 };
 
diff --git a/paddle/operators/modified_huber_loss_op.cu b/paddle/operators/modified_huber_loss_op.cu
index 06c710e0c5..f8aa5043dd 100644
--- a/paddle/operators/modified_huber_loss_op.cu
+++ b/paddle/operators/modified_huber_loss_op.cu
@@ -9,24 +9,61 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
+#include <thrust/device_ptr.h>
+#include <thrust/device_vector.h>
+#include <thrust/for_each.h>
+#include <thrust/tuple.h>
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/modified_huber_loss_op.h"
+#include "paddle/platform/hostdevice.h"
 
 namespace paddle {
 namespace operators {
 
 using Tensor = framework::Tensor;
 
+struct ModifiedHuberLossBackward {
+  template <typename Tuple>
+  HOSTDEVICE void operator()(Tuple t) const {
+    auto inter_val = thrust::get<1>(t);
+    auto y_val = thrust::get<2>(t);
+    auto out_grad = thrust::get<3>(t);
+    if (inter_val < -1) {
+      thrust::get<0>(t) = -4 * (2 * y_val - 1) * out_grad;
+    } else if (inter_val < 1) {
+      thrust::get<0>(t) = -2 * (1 - inter_val) * (2 * y_val - 1) * out_grad;
+    } else {
+      thrust::get<0>(t) = 0;
+    }
+  }
+};
+
 template <typename T>
 class ModifiedHuberLossGradGPUKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    // auto* in0 = context.Input<Tensor>("X");
-    // auto* in1 = context.Input<Tensor>("Y");
-    // auto* in2 = context.Input<Tensor>("intermediate_val");
-    // auto* in3 = context.Input<Tensor>(framework::GradVarName("Out"));
-    // auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
-    // auto* out1 = context.Output<Tensor>(framework::GradVarName("X"));
+    auto* in0 = context.Input<Tensor>("Y");
+    auto* in1 = context.Input<Tensor>("intermediate_val");
+    auto* in2 = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
+
+    if (out0) {
+      auto counts = framework::product(in1->dims());
+      auto y_ptr = thrust::device_pointer_cast(in0->data<T>());
+      auto inter_val_ptr = thrust::device_pointer_cast(in1->data<T>());
+      auto out_grad_ptr = thrust::device_pointer_cast(in2->data<T>());
+      thrust::device_ptr<T> x_grad_ptr(
+          out0->mutable_data<T>(context.GetPlace()));
+
+      auto iter_begin = thrust::make_zip_iterator(
+          thrust::make_tuple(x_grad_ptr, inter_val_ptr, y_ptr, out_grad_ptr));
+
+      auto iter_end = thrust::make_zip_iterator(
+          thrust::make_tuple(x_grad_ptr + counts, inter_val_ptr + counts,
+                             y_ptr + counts, out_grad_ptr + counts));
+
+      thrust::for_each(iter_begin, iter_end, ModifiedHuberLossBackward());
+    }
   }
 };
 
diff --git a/paddle/operators/modified_huber_loss_op.h b/paddle/operators/modified_huber_loss_op.h
index 2a429ab2e4..13c11684af 100644
--- a/paddle/operators/modified_huber_loss_op.h
+++ b/paddle/operators/modified_huber_loss_op.h
@@ -74,49 +74,31 @@ class ModifiedHuberLossKernel : public framework::OpKernel {
   }
 };
 
-// Use thrust lib to unify cpu and gpu
 // CPU backward kernel
 template <typename T>
 class ModifiedHuberLossGradCPUKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in0 = context.Input<Tensor>("X");
-    auto* in1 = context.Input<Tensor>("Y");
-    auto* in2 = context.Input<Tensor>("intermediate_val");
-    auto* in3 = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* in0 = context.Input<Tensor>("Y");
+    auto* in1 = context.Input<Tensor>("intermediate_val");
+    auto* in2 = context.Input<Tensor>(framework::GradVarName("Out"));
     auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
-    auto* out1 = context.Output<Tensor>(framework::GradVarName("X"));
-
-    // loop inter_val (x<-1) (x<1) otherwise
-    const T* p_inter_val = in2->data<T>();
-    const T* p_out_grad = in3->data<T>();
-    size_t counts = static_cast<size_t>(framework::product(in2->dims()));
 
     if (out0) {
-      T* p_x_grad = out0->mutable_data<T>(context.GetPlace());
-      const T* p_y = in1->data<T>();
-      ModifiedHuberLossBackward(p_inter_val, p_y, p_out_grad, p_x_grad, counts);
-    }
-
-    if (out1) {
-      T* p_y_grad = out1->mutable_data<T>(context.GetPlace());
-      const T* p_x = in0->data<T>();
-      ModifiedHuberLossBackward(p_inter_val, p_x, p_out_grad, p_y_grad, counts);
-    }
-  }
-
- protected:
-  void ModifiedHuberLossBackward(const T* p_inter_data, const T* p_in_data,
-                                 const T* p_in_grad, T* p_out_grad,
-                                 size_t counts) const {
-    for (size_t i = 0; i < counts; ++i) {
-      if (p_inter_data[i] < -1) {
-        p_out_grad[i] = -4 * p_in_data[i] * p_in_grad[i];
-      } else if (p_inter_data[i] < 1) {
-        p_out_grad[i] =
-            -2 * (1 - p_inter_data[i]) * p_in_data[i] * p_in_grad[i];
-      } else {
-        p_out_grad[i] = 0;
+      const T* y_ptr = in0->data<T>();
+      const T* inter_val_ptr = in1->data<T>();
+      const T* out_grad_ptr = in2->data<T>();
+      size_t counts = static_cast<size_t>(framework::product(in1->dims()));
+      T* x_grad_ptr = out0->mutable_data<T>(context.GetPlace());
+      for (size_t i = 0; i < counts; ++i) {
+        if (inter_val_ptr[i] < -1) {
+          x_grad_ptr[i] = -4 * (2 * y_ptr[i] - 1) * out_grad_ptr[i];
+        } else if (inter_val_ptr[i] < 1) {
+          x_grad_ptr[i] = -2 * (1 - inter_val_ptr[i]) * (2 * y_ptr[i] - 1) *
+                          out_grad_ptr[i];
+        } else {
+          x_grad_ptr[i] = 0;
+        }
       }
     }
   }
diff --git a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py
new file mode 100644
index 0000000000..2b76c53b6e
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py
@@ -0,0 +1,55 @@
+import unittest
+from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
+from paddle.v2.framework.op import Operator
+import numpy as np
+
+
+def modified_huber_loss_forward(val):
+    if val < -1:
+        return -4 * a
+    elif val < 1:
+        return (1 - val) * (1 - val)
+    else:
+        return 0
+
+
+class TestModifiedHuberLossOp_f0(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = 'modified_huber_loss'
+        samples_num = 32
+        self.inputs = {
+            'X': np.random.uniform(-1, 1., (samples_num, 1)).astype('float32'),
+            'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1))
+        }
+        product_res = self.inputs['X'] * (2 * self.inputs['Y'] - 1)
+        loss = np.vectorize(modified_huber_loss_forward)(product_res)
+
+        self.outputs = {
+            'intermediate_val': product_res,
+            'Out': loss.reshape((samples_num, 1))
+        }
+
+
+class TestModifiedHuberLossGradOp(GradientChecker):
+    def test_modified_huber_loss_b0(self):
+        samples_num = 10
+        inputs = {
+            'X': np.random.uniform(-1, 1, (samples_num, 1)).astype('float32'),
+            'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1))
+        }
+        op = Operator(
+            "modified_huber_loss",
+            X='X',
+            Y='Y',
+            intermediate_val='intermediate_val',
+            Out='Out')
+        self.compare_grad(
+            op, inputs, no_grad_set=set(['intermediate_val', 'Y']))
+        self.check_grad(op, inputs, set(["X"]), "Out")
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab