From f14a7966b0f982a703f24940b1e9ae5325ee83c9 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Thu, 21 Sep 2017 03:18:42 +0000
Subject: [PATCH] Initialize the sequence softmax operator.

---
 paddle/operators/sequence_avg_pool_op.cc      |  2 +-
 paddle/operators/sequence_softmax_op.cc       | 82 +++++++++++++++++++
 paddle/operators/sequence_softmax_op.cu       | 25 ++++++
 paddle/operators/sequence_softmax_op.h        | 62 ++++++++++++++
 .../tests/test_sequence_softmax_op.py         | 35 ++++++++
 5 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 paddle/operators/sequence_softmax_op.cc
 create mode 100644 paddle/operators/sequence_softmax_op.cu
 create mode 100644 paddle/operators/sequence_softmax_op.h
 create mode 100644 python/paddle/v2/framework/tests/test_sequence_softmax_op.py

diff --git a/paddle/operators/sequence_avg_pool_op.cc b/paddle/operators/sequence_avg_pool_op.cc
index 9815b8f3a8d..ff354c62b6f 100644
--- a/paddle/operators/sequence_avg_pool_op.cc
+++ b/paddle/operators/sequence_avg_pool_op.cc
@@ -36,7 +36,7 @@ class SequenceAvgPoolOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_GE(
         dims[0],
         /*batch size = */ static_cast<int64_t>(lod[0].size() - 1),
-        "The first dimension of Input(X) must be large than batch size.");
+        "The first dimension of Input(X) must be larger than batch size.");
     dims[0] = lod[0].size() - 1;
     ctx.Output<framework::LoDTensor>("Out")->Resize({dims});
   }
diff --git a/paddle/operators/sequence_softmax_op.cc b/paddle/operators/sequence_softmax_op.cc
new file mode 100644
index 00000000000..0a99717440e
--- /dev/null
+++ b/paddle/operators/sequence_softmax_op.cc
@@ -0,0 +1,82 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/sequence_softmax_op.h"
+
+namespace paddle {
+namespace operators {
+
+class SequenceSoftmaxOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(
+        ctx.InputVar("X"), "Input(X) of SequenceSoftmaxOp should not be null.");
+    PADDLE_ENFORCE_NOT_NULL(
+        ctx.OutputVar("Out"),
+        "Output(Out) of SequenceSoftmaxOp should not be null.");
+
+    auto *x = ctx.Input<framework::LoDTensor>("X");
+    auto dims = x->dims();
+    auto lod = x->lod();
+    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
+    PADDLE_ENFORCE_GE(
+        dims[0],
+        /* batch_size */ static_cast<int64_t>(lod[0].size() - 1),
+        "The first dimension of Input(X) should be larger than batch size.");
+    PADDLE_ENFORCE_EQ(x->numel(), static_cast<int64_t>(lod[0].size() - 1),
+                      "The width of each timestep in Input(X) of "
+                      "SequenceSoftmaxOp should be 1.");
+
+    dims[0] = lod[0].size() - 1;
+    ctx.Output<framework::LoDTensor>("Out")->Resize({dims});
+  }
+};
+
+class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SequenceSoftmaxOpMaker(framework::OpProto *proto,
+                         framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "(LoDTensor)");
+    AddOutput("Out", "(LoDTensor)");
+    AddComment(R"DOC(
+Softmax of Sequence.
+)DOC");
+  }
+};
+
+class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {}
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(sequence_softmax, ops::SequenceSoftmaxOp,
+            ops::SequenceSoftmaxOpMaker, sequence_softmax_grad,
+            ops::SequenceSoftmaxGradOp);
+REGISTER_OP_CPU_KERNEL(
+    sequence_softmax,
+    ops::SequenceSoftmaxKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    sequence_softmax_grad,
+    ops::SequenceSoftmaxGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/sequence_softmax_op.cu b/paddle/operators/sequence_softmax_op.cu
new file mode 100644
index 00000000000..f2a1e3d5e31
--- /dev/null
+++ b/paddle/operators/sequence_softmax_op.cu
@@ -0,0 +1,25 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+
+#include "paddle/operators/sequence_softmax_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(
+    sequence_softmax,
+    ops::SequenceSoftmaxKernel<paddle::platform::GPUPlace, float>)
+REGISTER_OP_GPU_KERNEL(
+    sequence_softmax_grad,
+    ops::SequenceSoftmaxGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/sequence_softmax_op.h b/paddle/operators/sequence_softmax_op.h
new file mode 100644
index 00000000000..54d82652711
--- /dev/null
+++ b/paddle/operators/sequence_softmax_op.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/softmax_function.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
+
+template <typename Place, typename T>
+class SequenceSoftmaxKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<LoDTensor>("X");
+    auto* out = ctx.Output<LoDTensor>("Out");
+
+    auto lod = x->lod();
+    const size_t level = lod.size();
+
+    out->mutable_data<T>(ctx.GetPlace());
+    for (int i = 0; i < static_cast<int>(lod[level].size()) - 1; ++i) {
+      int start_pos = static_cast<int>(lod[level][i]);
+      int end_pos = static_cast<int>(lod[level][i + 1]);
+      Tensor x_i = x->Slice<T>(start_pos, end_pos);
+      Tensor out_i = out->Slice<T>(start_pos, end_pos);
+
+      math::SoftmaxFunctor<Place, T>()(&x_i, &out_i, ctx);
+    }
+  }
+};
+
+template <typename Place, typename T>
+class SequenceSoftmaxGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {}
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/v2/framework/tests/test_sequence_softmax_op.py b/python/paddle/v2/framework/tests/test_sequence_softmax_op.py
new file mode 100644
index 00000000000..d0667c1308d
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_sequence_softmax_op.py
@@ -0,0 +1,35 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+def stable_softmax(x):
+    """Compute the softmax of vector x in a numerically stable way."""
+    shiftx = x - np.max(x)
+    exps = np.exp(shiftx)
+    return exps / np.sum(exps)
+
+
+class TestSequenceSoftmaxOp(OpTest):
+    def setUp(self):
+        self.op_type = "sequence_softmax"
+        x = np.random.uniform(0.1, 1, (11, 1)).astype("float32")
+        lod = [[0, 4, 5, 8, 11]]
+
+        out = np.zeros((11, 1)).astype("float32")
+        for i in range(4):
+            sub_x = x[lod[0][i]:lod[0][i + 1], :]
+            sub_x = sub_x.reshape(1, lod[0][i + 1] - lod[0][i])
+            sub_out = stable_softmax(sub_x)
+            out[lod[0][i]:lod[0][i + 1], :] = sub_out.reshape(
+                lod[0][i + 1] - lod[0][i], 1)
+
+        self.inputs = {"X": (x, lod)}
+        self.outputs = {"Out": out}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab