Finish adaption for backward.

bf3f56e8 · yangyaming · 352fa41a · bf3f56e8 · bf3f56e8 · bf3f56e8
4 changed file
--- a/paddle/fluid/operators/math/math_function.cc
+++ b/paddle/fluid/operators/math/math_function.cc
@@ -371,6 +371,8 @@ template struct RowwiseAdd<platform::CPUDeviceContext, double>;
 template struct ColwiseSum<platform::CPUDeviceContext, float>;
 template struct ColwiseSum<platform::CPUDeviceContext, double>;
+template struct ColwiseSum<platform::CPUDeviceContext, int>;
+template struct ColwiseSum<platform::CPUDeviceContext, int64_t>;
 template struct RowwiseSum<platform::CPUDeviceContext, float>;
 template struct RowwiseSum<platform::CPUDeviceContext, double>;

--- a/paddle/fluid/operators/math/math_function.cu
+++ b/paddle/fluid/operators/math/math_function.cu
@@ -422,6 +422,8 @@ struct RowwiseAdd<platform::CUDADeviceContext, T> {
 template struct RowwiseAdd<platform::CUDADeviceContext, float>;
 template struct RowwiseAdd<platform::CUDADeviceContext, double>;
 template struct ColwiseSum<platform::CUDADeviceContext, float>;
+template struct ColwiseSum<platform::CUDADeviceContext, int>;
+template struct ColwiseSum<platform::CUDADeviceContext, int64_t>;
 // template struct ColwiseSum<platform::CUDADeviceContext, double>;
 // The ColwiseSum<platform::CUDADeviceContext, double> failed in debug mode,
 // and only failed for this case. So reimplemented it.

--- a/paddle/fluid/operators/sequence_expand_op.cc
+++ b/paddle/fluid/operators/sequence_expand_op.cc
@@ -33,9 +33,10 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
                   "Output(Out) of SequenceExpandOp should not be null.");
    auto x_dims = ctx->GetInputDim("X");
+    int ref_level = ctx->Attrs().Get<int>("ref_level");
    PADDLE_ENFORCE_EQ(x_dims.size(), 2U,
                      "Dimension number of Input(X) should be 2.");
-    int ref_level = ctx->Attrs().Get<int>("ref_level");
    if (ctx->IsRuntime()) {
      framework::Variable* x_var =
@@ -51,39 +52,37 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
                        "greater than 1.");
      PADDLE_ENFORCE(x_lod.size() == y_lod.size() || x_lod.size() == 0,
-                     "Number of lod level of Input(X) either equal to 0 "
+                     "Level number of Input(X)'s lod should be either equal "
-                     "or equal to that of Input(Y).");
+                     "to 0 or equal to that of Input(Y).");
+      PADDLE_ENFORCE_GT(y_lod.size(), 0,
+                        "Level number of Input(Y)'s lod should be "
+                        "greater than 0.");
+      PADDLE_ENFORCE(
+          ref_level == -1 ||
+              (ref_level >= 0 && ref_level < static_cast<int>(y_lod.size())),
+          "Invlid `ref_level`, which should be either equal to -1 "
+          "or in [0, %d)",
+          y_lod.size());
+      if (ref_level == -1) ref_level = y_lod.size() - 1;
      int64_t out_first_dim = 0;
-      if (y_lod[ref_level].size() < 1) {
+      if (y_lod[ref_level].size() <= 1) {
        out_first_dim = x_dims[0];
      } else {
-        if (x_lod.size() == 1) {  // X is LoDTensor
+        for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
-          for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
+          int x_seq_len = 1;
-            int x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
+          if (x_lod.size() == 1) {
-            out_first_dim +=
+            x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
-                (y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len;
-          }
-        } else {  // X is normal Tensor
-          for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
-            out_first_dim += y_lod[ref_level][i] - y_lod[ref_level][i - 1];
          }
+          out_first_dim +=
+              (y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len;
        }
      }
      ctx->SetOutputDim("Out", {out_first_dim, x_dims[1]});
    } else {
-      framework::VarDesc* in_reader =
-          boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Y")[0]);
-      int lod_level_num = in_reader->GetLoDLevels().size();
-      PADDLE_ENFORCE_GE(ref_level, 0,
-                        "Level of referred lod should be greater or "
-                        "equal to 0.");
-      PADDLE_ENFORCE_LT(ref_level, lod_level_num,
-                        "Level of referred lod should be smaller than "
-                        "level number of Input(Y).");
      ctx->SetOutputDim("Out", {-1, x_dims[1]});
    }
  }
@@ -102,7 +101,7 @@ class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput("Out",
              "(LodTensor, default LoDTensor<float>) Output LoDTensor which is "
              "generated from Input(X) by referring lod of Input(Y).");
-    AddAttr<int>("ref_level", "Specify lod level of Input(Y).");
+    AddAttr<int>("ref_level", "Specify lod level of Input(Y).").SetDefault(-1);
    AddComment(R"DOC(
 Sequence Expand Operator.

--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -16,7 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/memory/memcpy.h"
-#include "unsupported/Eigen/CXX11/Tensor"
+#include "paddle/fluid/operators/math/math_function.h"
 namespace paddle {
 namespace operators {
@@ -32,52 +32,53 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
    auto* out = context.Output<LoDTensor>("Out");
    int ref_level = context.Attr<int>("ref_level");
+    out->mutable_data<T>(context.GetPlace());
    auto& x_lod = x->lod();
    auto& y_lod = y->lod();
-    PADDLE_ENFORCE_GE(ref_level, 0,
+    PADDLE_ENFORCE_GT(y_lod.size(), 0,
-                      "Value of attribute `ref_level` should be greater or "
+                      "Level number of `Y`'s lod should be greater than 0.");
-                      "equal to 0.");
-    PADDLE_ENFORCE_LT(ref_level, y_lod.size(),
+    PADDLE_ENFORCE(
-                      "Value of attribute `ref_level` should be smaller than "
+        ref_level == -1 || (ref_level >= 0 && ref_level < y_lod.size()),
-                      "level number of Y's lod.");
+        "Invlid `ref_level`, which should be either equal to -1 "
+        "or in [0, %d)",
+        y_lod.size());
-    if (y_lod[ref_level].size() < 1) {
+    if (ref_level == -1) ref_level = y_lod.size() - 1;
+    if (y_lod[ref_level].size() <= 1) {
      framework::TensorCopy(*x, context.GetPlace(), out);
      return;
    }
-    if (x_lod.size() == 0) {
+    auto& out_lod = *out->mutable_lod();
-      int out_start = 0;
+    if (x_lod.size() == 1) {
-      for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
-        int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
-        auto x_sub_tensor = x->Slice(i - 1, i);
-        for (size_t j = 0; j < repeat_num; ++j) {
-          auto out_sub_tensor = out->Slice(out_start, out_start + 1);
-          framework::TensorCopy(x_sub_tensor, context.GetPlace(),
-                                &out_sub_tensor);
-          out_start++;
-        }
-      }
-    } else {
-      auto& out_lod = *out->mutable_lod();
      out_lod.resize(1);
-      out_lod[0].resize(1);
+      out_lod[0] = {0};
-      out_lod[0][0] = 0;
+    }
-      int out_idx = 0;
-      for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
+    int out_offset = 0;
-        int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
+    for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
-        int x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
+      int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
-        auto x_sub_tensor = x->Slice(x_lod[0][i], x_lod[0][i - 1]);
+      int x_start = i - 1;
-        for (size_t j = 0; j < repeat_num; ++j) {
+      int x_end = i;
-          auto out_sub_tensor =
+      if (x_lod.size() == 1) {
-              out->Slice(out_lod[0][out_idx], out_lod[0][out_idx] + x_seq_len);
+        x_start = x_lod[0][i - 1];
-          framework::TensorCopy(x_sub_tensor, context.GetPlace(),
+        x_end = x_lod[0][i];
-                                &out_sub_tensor);
+      }
-          out_lod[0].push_back(out_lod[0][out_idx] + x_seq_len);
+      int x_seq_len = x_end - x_start;
-          out_idx++;
+      auto x_sub_tensor = x->Slice(x_start, x_end);
+      for (size_t j = 0; j < repeat_num; ++j) {
+        int out_start = out_offset;
+        if (x_lod.size() == 1) {
+          out_start = out_lod[0][out_offset];
+          out_lod[0].push_back(x_seq_len);
        }
+        auto out_sub_tensor = out->Slice(out_start, out_start + x_seq_len);
+        framework::TensorCopy(x_sub_tensor, context.GetPlace(),
+                              &out_sub_tensor);
+        out_offset++;
      }
    }
  }
@@ -99,27 +100,49 @@ template <typename DeviceContext, typename T>
 class SequenceExpandGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
    auto* x = context.Input<LoDTensor>("X");
-    auto* out = context.Input<LoDTensor>("Out");
+    auto* y = context.Input<LoDTensor>("Y");
-    auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
-    auto out_last_level = out->lod().back();
+    int ref_level = context.Attr<int>("ref_level");
-    d_x->set_lod(x->lod());
-    const T* d_out_data = d_out->data<T>();
+    g_x->mutable_data<T>(context.GetPlace());
-    T* d_x_data = d_x->mutable_data<T>(context.GetPlace());
+    g_x->set_lod(x->lod());
-    size_t element_len = d_out->numel() / d_out->dims()[0];
-    for (size_t i = 0; i < out_last_level.size() - 1; ++i) {
+    auto& x_lod = x->lod();
-      size_t repeat = out_last_level[i + 1] - out_last_level[i];
+    auto& y_lod = y->lod();
-      Eigen::TensorMap<
-          Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
+    if (ref_level == -1) ref_level = y_lod.size() - 1;
-      d_out_t(d_out_data, static_cast<int>(repeat), element_len);
-      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>
+    // just copy the gradient
-      d_x_t(d_x_data, static_cast<int>(element_len));
+    if (y_lod[ref_level].size() <= 1) {
-      auto place =
+      framework::TensorCopy(*g_out, context.GetPlace(), g_x);
-          context.template device_context<DeviceContext>().eigen_device();
+      return;
-      d_x_t.device(*place) = d_out_t.sum(Eigen::array<int, 1>({{0}}));
+    }
-      d_out_data += (repeat * element_len);
-      d_x_data += element_len;
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    int g_out_offset = 0;
+    for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
+      int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
+      if (repeat_num > 0) {
+        int x_start = i - 1;
+        int x_end = i;
+        if (x_lod.size() == 1) {
+          x_start = x_lod[0][i - 1];
+          x_end = x_lod[0][i];
+        }
+        int x_seq_len = x_end - x_start;
+        auto column = x_seq_len * x->dims()[1];
+        auto g_x_sub = g_x->Slice(x_start, x_end);
+        g_x_sub = framework::ReshapeToMatrix(g_x_sub, column);
+        int g_out_end = g_out_offset + repeat_num * x_seq_len;
+        auto g_out_sub = g_out->Slice(g_out_offset, g_out_end);
+        g_out_sub = framework::ReshapeToMatrix(g_out_sub, column);
+        math::ColwiseSum<DeviceContext, T> col_sum;
+        col_sum(dev_ctx, g_out_sub, &g_x_sub);
+        g_out_offset += repeat_num * x_seq_len;
+      }
    }
  }
 };