From f07164912bca60a36a72dc6ce22f8e00caa99301 Mon Sep 17 00:00:00 2001
From: Yancey1989 <yancey1989@gmail.com>
Date: Wed, 3 Jan 2018 20:00:07 +0800
Subject: [PATCH] fix backward

---
 paddle/operators/hierarchical_sigmoid_op.cc   | 28 +++++++-------
 paddle/operators/hierarchical_sigmoid_op.h    | 38 +++++++++----------
 paddle/operators/math/matrix_bit_code.cc      |  1 -
 paddle/pybind/pybind.cc                       |  2 -
 python/paddle/v2/fluid/executor.py            |  1 -
 python/paddle/v2/fluid/tests/op_test.py       |  2 -
 .../paddle/v2/fluid/tests/test_hsigmoid_op.py | 16 ++------
 7 files changed, 37 insertions(+), 51 deletions(-)
diff --git a/paddle/operators/hierarchical_sigmoid_op.cc b/paddle/operators/hierarchical_sigmoid_op.cc
index 4b3487f8b..bc6ceb987 100644
--- a/paddle/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/operators/hierarchical_sigmoid_op.cc
@@ -61,10 +61,8 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Parameters"),
-                   "Input(Parameters)"
-                   "should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
     const int64_t batch_size = ctx->GetInputDim("X")[0];
     std::vector<int64_t> output_shape({batch_size, 1});
@@ -84,15 +82,17 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("Parameters"),
-                   "Input(Parameters)"
-                   "should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"),
-                   "Input(Label)"
-                   "should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Parameters")),
-                   "Input(Parameters@Grad should not be null.)");
+    PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("W")),
+                   "Input(W@Grad should not be null.)");
     PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")));
+    if (ctx->HasOutput(framework::GradVarName("Bias"))) {
+      ctx->SetOutputDim(framework::GradVarName("Bias"),
+                        ctx->GetInputDim("Bias"));
+    }
+    ctx->SetOutputDim(framework::GradVarName("W"), ctx->GetInputDim("W"));
+    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
   }
 
  protected:
@@ -112,11 +112,11 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor, required) The input Tensor, which the shape is"
              "[N * D], which N is the size of mini-batch,"
              "D is the embded size");
-    AddInput("Parameters",
+    AddInput("W",
              "(Tensor, required), The parameters of hierarchical "
              "sigmoid operator, each of them is s a 3-D tensor, the shape is"
              "[N, num_classes - 1, D]");
-    AddInput("Label",
+    AddInput("Ids",
              "(Tensor, required), The labels of training data. It's a"
              "1-D tensor, which the shape is [1, N]");
     AddInput("Bias",
diff --git a/paddle/operators/hierarchical_sigmoid_op.h b/paddle/operators/hierarchical_sigmoid_op.h
index 531fd9f7f..1b8d21c09 100644
--- a/paddle/operators/hierarchical_sigmoid_op.h
+++ b/paddle/operators/hierarchical_sigmoid_op.h
@@ -32,15 +32,14 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<framework::Tensor>("X");
-    auto* params = ctx.Input<framework::Tensor>("Parameters");
-    auto* label = ctx.Input<framework::Tensor>("Label");
+    auto* w = ctx.Input<framework::Tensor>("W");
+    auto* ids = ctx.Input<framework::Tensor>("Ids");
     auto* bias = ctx.Input<framework::Tensor>("Bias");
     auto* out = ctx.Output<framework::Tensor>("Out");
     size_t num_classes = static_cast<size_t>(ctx.Attr<int>("num_classes"));
 
     int64_t code_length = math::FindLastSet(num_classes - 1);
     int64_t batch_size = in->dims()[0];
-    auto* ids = label->data<int64_t>();
     framework::Tensor pre_out;
     framework::Tensor sum;
     auto pre_out_data = pre_out.mutable_data<T>(
@@ -59,18 +58,19 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
     auto out_mat = framework::EigenVector<T>::Flatten(*out);
 
     if (bias) {
-      bit_code.Add(num_classes, ids, pre_out, *bias);
+      bit_code.Add(num_classes, ids->data<int64_t>(), pre_out, *bias);
     }
     for (int i = 0; i < in->dims()[0]; ++i) {
-      bit_code.Mul(num_classes, ids, pre_out, params->Slice(i, i + 1),
-                   in->Slice(i, i + 1));
+      bit_code.Mul(num_classes, ids->data<int64_t>(), pre_out,
+                   w->Slice(i, i + 1), in->Slice(i, i + 1));
     }
     // clip the matrix with (-40, 40)
     Transform<DeviceContext> trans;
     trans(ctx.template device_context<DeviceContext>(), pre_out_data,
           pre_out_data + pre_out.numel(), pre_out_data,
           ClipFunctor<T>(static_cast<T>(-40.0), static_cast<T>(40.0)));
-    bit_code.Sum(num_classes, ids, pre_out, *out, static_cast<T>(-1));
+    bit_code.Sum(num_classes, ids->data<int64_t>(), pre_out, *out,
+                 static_cast<T>(-1));
     // softrelu with threshold is 40.0
     trans(ctx.template device_context<DeviceContext>(), pre_out_data,
           pre_out_data + pre_out.numel(), pre_out_data,
@@ -88,10 +88,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<framework::Tensor>("X");
     auto* in_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
-    auto* params =
-        ctx.Output<framework::Tensor>(framework::GradVarName("Parameters"));
+    auto* w = ctx.Output<framework::Tensor>(framework::GradVarName("W"));
     auto* bias = ctx.Output<framework::Tensor>(framework::GradVarName("Bias"));
-    auto* label = ctx.Input<framework::Tensor>("Label");
+    auto* ids = ctx.Input<framework::Tensor>("Ids");
     size_t num_classes = static_cast<size_t>(ctx.Attr<int>("num_classes"));
     int64_t code_length = math::FindLastSet(num_classes - 1);
     int64_t batch_size = in->dims()[0];
@@ -102,8 +101,6 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
     auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
     auto& device_ctx = ctx.template device_context<DeviceContext>();
     auto pre_out_mat = EigenMatrix<T>::From(pre_out);
-    auto* ids = label->data<int64_t>();
-
     // init pre_out matrix with {1.0}
     math::SetConstant<DeviceContext, T> one;
     math::MatrixBitCodeFunctor<T> bit_code;
@@ -112,19 +109,22 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
     pre_out_mat.device(place) =
         pre_out_mat * (static_cast<T>(1.0) - static_cast<T>(1.0) / pre_out_mat);
 
-    bit_code.Sub(num_classes, ids, pre_out);
+    bit_code.Sub(num_classes, ids->data<int64_t>(), pre_out);
 
     if (bias) {
-      bit_code.AddGrad(num_classes, ids, pre_out, *bias);
+      bias->mutable_data<T>(ctx.GetPlace());
+      bit_code.AddGrad(num_classes, ids->data<int64_t>(), pre_out, *bias);
     }
-
+    in_grad->mutable_data<T>(ctx.GetPlace());
+    w->mutable_data<T>(ctx.GetPlace());
     for (int i = 0; i < in_grad->dims()[0]; ++i) {
-      auto p_sliced = params->Slice(i, i + 1);
+      auto p_sliced = w->Slice(i, i + 1);
       auto in_sliced = in->Slice(i, i + 1);
       auto in_grad_sliced = in_grad->Slice(i, i + 1);
-      bit_code.MulGradWeight(num_classes, ids, pre_out, p_sliced, in_sliced);
-      bit_code.MulGradError(num_classes, ids, pre_out, p_sliced,
-                            in_grad_sliced);
+      bit_code.MulGradWeight(num_classes, ids->data<int64_t>(), pre_out,
+                             p_sliced, in_sliced);
+      bit_code.MulGradError(num_classes, ids->data<int64_t>(), pre_out,
+                            p_sliced, in_grad_sliced);
     }
   }
 };
diff --git a/paddle/operators/math/matrix_bit_code.cc b/paddle/operators/math/matrix_bit_code.cc
index 4ad0a0000..b192183b1 100644
--- a/paddle/operators/math/matrix_bit_code.cc
+++ b/paddle/operators/math/matrix_bit_code.cc
@@ -56,7 +56,6 @@ static void AddByBitCodeT(Op op, CodeTable code_table, const int64_t* codes,
                           const framework::Tensor& vec) {
   size_t num_sample = tmat.dims()[0];
   size_t width = vec.dims()[1];
-
   for (size_t i = 0; i < num_sample; ++i) {
     auto code = code_table(static_cast<size_t>(codes[i]));
     int code_length = code.get_length();
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 921b316a6..de6b24f70 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -109,8 +109,6 @@ PYBIND11_PLUGIN(core) {
       .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
       .def("set_float_element", TensorSetElement<float>)
       .def("get_float_element", TensorGetElement<float>)
-      .def("set_int64_element", TensorSetElement<int64_t>)
-      .def("get_int64_element", TensorGetElement<int64_t>)
       .def("set_double_element", TensorSetElement<double>)
       .def("get_double_element", TensorGetElement<double>)
       .def("dtype", [](Tensor &self) { return ToDataType(self.type()); });
diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py
index cdd576294..a054d5eaf 100644
--- a/python/paddle/v2/fluid/executor.py
+++ b/python/paddle/v2/fluid/executor.py
@@ -148,7 +148,6 @@ class Executor(object):
                 inputs={'X': [var]},
                 outputs={'Out': [fetch_var]},
                 attrs={'col': i})
-
         self.executor.run(program.desc, scope, 0, True, True)
         outs = [
             core.get_fetch_variable(scope, fetch_var_name, i)
diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py
index 287dc2980..0493a0c20 100644
--- a/python/paddle/v2/fluid/tests/op_test.py
+++ b/python/paddle/v2/fluid/tests/op_test.py
@@ -123,8 +123,6 @@ def get_numeric_gradient(scope,
     def __set_elem__(tensor, i, e):
         if tensor_to_check_dtype == np.float32:
             tensor.set_float_element(i, e)
-        elif tensor_to_check_dtype == np.int64:
-            tensor.set_int64_element(i, e)
         else:
             tensor.set_double_element(i, e)
 
diff --git a/python/paddle/v2/fluid/tests/test_hsigmoid_op.py b/python/paddle/v2/fluid/tests/test_hsigmoid_op.py
index 194d5e315..b6d961b63 100644
--- a/python/paddle/v2/fluid/tests/test_hsigmoid_op.py
+++ b/python/paddle/v2/fluid/tests/test_hsigmoid_op.py
@@ -10,16 +10,11 @@ class TestHSigmoidOp(OpTest):
         embded_size = 10
         batch_size = 5
         x = np.random.random((batch_size, embded_size)).astype("float32")
-        parameter = np.random.random(
+        w = np.random.random(
             (batch_size, num_classes - 1, embded_size)).astype("float32")
-        label = np.random.randint(0, num_classes, batch_size)
+        ids = np.random.randint(0, num_classes, batch_size)
         bias = np.random.random((1, num_classes - 1)).astype("float32")
-        self.inputs = {
-            'X': x,
-            'Parameters': parameter,
-            'Label': label,
-            'Bias': bias
-        }
+        self.inputs = {'X': x, 'W': w, 'Ids': ids, 'Bias': bias}
         self.attrs = {'num_classes': num_classes}
         self.outputs = {
             'Out': np.random.random((batch_size, 1)).astype("float32")
@@ -29,10 +24,7 @@ class TestHSigmoidOp(OpTest):
         self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            ['X', 'Parameters', 'Label', 'Bias'],
-            'Out',
-            no_grad_set=set(['Label']))
+        self.check_grad(['X', 'W', 'Bias'], 'Out', no_grad_set=set('Ids'))
 
 
 if __name__ == '__main__':
-- 
GitLab