Fix/sequence op (#5264)

* "replace enum with string" * "fix layers"

Fix/sequence op (#5264)
* "replace enum with string" * "fix layers"
26492210 · dzhwinter · GitHub · bcdedecb · 26492210 · 26492210
4 changed file
--- a/paddle/operators/sequence_pool_op.cc
+++ b/paddle/operators/sequence_pool_op.cc
@@ -39,15 +39,14 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput("Out",
              "(Tensor), output of SequencePoolOp, which does not contain LoD "
              "infomation.");
-    AddAttr<int>(
-        "strategy",
-        "(int, default AVERAGE) the pooling strategy of SequencePoolOp.")
-        .SetDefault(AVERAGE)
-        .InEnum({AVERAGE, SUM, SQRT, MAX, LAST, FIRST});
+    AddAttr<std::string>(
+        "pooltype",
+        "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.")
+        .SetDefault("AVERAGE");
    AddComment(R"DOC(
    SequencePoolOp pools features of all time-steps of each instance.

-    It supports six pooling strategy:
+    It supports six pooling pooltype:
    - AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]}
    - SUM:     Out[i] = sum_{for each instance in i-th sequence}{X[i]}
    - SQRT:    Out[i] = sum_{for each instance in i-th sequence}{X[i]} 
@@ -63,7 +62,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
    and the value of X = [[1, 3], [2, 4, 6], [5, 1]].

    Thus, Out is a [3,1,1] Tensor without LoD infomation.
-    And for different strategy, the value of Out is as follows:
+    And for different pooltype, the value of Out is as follows:

    - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
    - SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1

--- a/paddle/operators/sequence_pool_op.h
+++ b/paddle/operators/sequence_pool_op.h
@@ -29,22 +29,13 @@ template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;

-enum SeqPoolType {
-  AVERAGE = 0,
-  SUM = 1,
-  SQRT = 2,  // square_root_n
-  MAX = 3,
-  LAST = 4,
-  FIRST = 5
-};
-
 template <typename Place, typename T>
 class SequencePoolKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* in = context.Input<LoDTensor>("X");
    auto* out = context.Output<LoDTensor>("Out");
-    int strategy = context.Attr<int>("strategy");
+    std::string pooltype = context.Attr<std::string>("pooltype");

    auto dims = in->dims();
    auto lod = in->lod();
@@ -71,28 +62,21 @@ class SequencePoolKernel : public framework::OpKernel<T> {
      auto in_e = EigenMatrix<T>::From(in_t, framework::make_ddim({h, w}));
      auto out_e = EigenVector<T>::Flatten(out_t);

-      switch (strategy) {
-        case AVERAGE:
-          out_e.device(place) = in_e.mean(Eigen::array<int, 1>({{0}}));
-          break;
-        case SUM:
-          out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}}));
-          break;
-        case SQRT:
-          out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
-                                std::sqrt(static_cast<T>(h));
-          break;
-        case MAX:
-          out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}}));
-          break;
-        case LAST:
-          out_e.device(place) = in_e.chip(h - 1, 0);
-          break;
-        case FIRST:
-          out_e.device(place) = in_e.chip(0, 0);
-          break;
-        default:
-          PADDLE_THROW("unsupported pooling strategy");
+      if (pooltype == "AVERAGE") {
+        out_e.device(place) = in_e.mean(Eigen::array<int, 1>({{0}}));
+      } else if (pooltype == "SUM") {
+        out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}}));
+      } else if (pooltype == "SQRT") {
+        out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
+                              std::sqrt(static_cast<T>(h));
+      } else if (pooltype == "MAX") {
+        out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}}));
+      } else if (pooltype == "LAST") {
+        out_e.device(place) = in_e.chip(h - 1, 0);
+      } else if (pooltype == "FIRST") {
+        out_e.device(place) = in_e.chip(0, 0);
+      } else {
+        PADDLE_THROW("unsupported pooling pooltype");
      }
    }
  }
@@ -105,15 +89,15 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
    auto* in = context.Input<LoDTensor>("X");
    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    int strategy = context.Attr<int>("strategy");
+    std::string pooltype = context.Attr<std::string>("pooltype");

    auto dims = in->dims();
    auto lod = in->lod()[0];
    int64_t w = in->numel() / dims[0];

    in_g->mutable_data<T>(context.GetPlace());
-    if (strategy == LAST || strategy == FIRST) {
-      // set X@Grad be zero at first when strategy is LAST/FIRST
+    if (pooltype == "LAST" || pooltype == "FIRST") {
+      // set X@Grad be zero at first when pooltype is LAST/FIRST
      math::SetConstant<Place, T> functor;
      functor(context.device_context(), in_g, 0);
    }
@@ -127,41 +111,33 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
      auto out_g_e = EigenMatrix<T>::From(out_g_t, {1, w});
      Eigen::DSizes<int, 2> bcast(h, 1);

-      switch (strategy) {
-        case AVERAGE:
-          in_g_e.device(place) = (out_g_e / static_cast<T>(h)).broadcast(bcast);
-          break;
-        case SUM:
-          in_g_e.device(place) = (out_g_e).broadcast(bcast);
-          break;
-        case SQRT:
-          in_g_e.device(place) =
-              (out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
-          break;
-        case MAX: {
-          auto in_t =
-              in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
-          Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
-              in_t_map(in_t.data<T>(), h, w);
-          int row_id;
-          Eigen::array<int, 2> extents{{1, 1}};
-          for (int col_id = 0; col_id < w; col_id++) {
-            in_t_map.col(col_id).maxCoeff(&row_id);
-            Eigen::array<int, 2> in_offsets{{row_id, col_id}};
-            Eigen::array<int, 2> out_offsets{{0, col_id}};
-            in_g_e.slice(in_offsets, extents).device(place) =
-                out_g_e.slice(out_offsets, extents);
-          }
-          break;
+      if (pooltype == "AVERAGE") {
+        in_g_e.device(place) = (out_g_e / static_cast<T>(h)).broadcast(bcast);
+      } else if (pooltype == "SUM") {
+        in_g_e.device(place) = (out_g_e).broadcast(bcast);
+      } else if (pooltype == "SQRT") {
+        in_g_e.device(place) =
+            (out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
+      } else if (pooltype == "MAX") {
+        auto in_t =
+            in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
+        Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
+            in_t_map(in_t.data<T>(), h, w);
+        int row_id;
+        Eigen::array<int, 2> extents{{1, 1}};
+        for (int col_id = 0; col_id < w; col_id++) {
+          in_t_map.col(col_id).maxCoeff(&row_id);
+          Eigen::array<int, 2> in_offsets{{row_id, col_id}};
+          Eigen::array<int, 2> out_offsets{{0, col_id}};
+          in_g_e.slice(in_offsets, extents).device(place) =
+              out_g_e.slice(out_offsets, extents);
        }
-        case LAST:
-          in_g_e.chip(h - 1, 0).device(place) = out_g_e;
-          break;
-        case FIRST:
-          in_g_e.chip(0, 0).device(place) = out_g_e;
-          break;
-        default:
-          PADDLE_THROW("unsupported pooling strategy");
+      } else if (pooltype == "LAST") {
+        in_g_e.chip(h - 1, 0).device(place) = out_g_e;
+      } else if (pooltype == "FIRST") {
+        in_g_e.chip(0, 0).device(place) = out_g_e;
+      } else {
+        PADDLE_THROW("unsupported pooling pooltype");
      }
    }
  }

--- a/python/paddle/v2/framework/layers.py
+++ b/python/paddle/v2/framework/layers.py
@@ -351,32 +351,21 @@ def conv2d(input,
    return helper.append_activation(pre_act)


-def sequence_pool(input, pool_type, program=None, init_program=None):
-    # FIXME(dzh) : want to unify the argument of python layer
-    # function. So we ignore some unecessary attributes
-
-    ENUM_POOL_TYPE = dict({
-        "AVERAGE": 0,
-        "SUM": 1,
-        "SQRT": 2,
-        "MAX": 3,
-        "LAST": 4,
-        "FIRST": 5
-    })
+def sequence_pool(input, pool_type, **kwargs):
+    ENUM_POOL_TYPE = set(["MAX", "AVG", "SQRT", "LAST", "FIRST"])
    if pool_type.upper() not in ENUM_POOL_TYPE:
        raise ValueError("Unknown pool_type: '%s'. It can only be %s.",
-                         str(pool_type), " ".join(ENUM_POOL_TYPE.keys()))
+                         str(pool_type), " ".join(ENUM_POOL_TYPE))

-    helper = LayerHelper('sequence_pool', **locals())
+    helper = LayerHelper('sequence_pool', **kwargs)
    dtype = helper.input_dtype()
    pool_out = helper.create_tmp_variable(dtype)

-    # FIXME(dzh): strategy
    helper.append_op(
        type="sequence_pool",
        inputs={"X": [input]},
        outputs={"Out": [pool_out]},
-        attrs={"strategy": ENUM_POOL_TYPE[pool_type.upper()]})
+        attrs={"pooltype": pool_type.upper()})

    return pool_out


--- a/python/paddle/v2/framework/tests/test_seq_pool.py
+++ b/python/paddle/v2/framework/tests/test_seq_pool.py
@@ -3,15 +3,6 @@ import numpy as np
 from op_test import OpTest


-class SeqPoolType(OpTest):
-    AVERAGE = 0
-    SUM = 1
-    SQRT = 2
-    MAX = 3
-    LAST = 4
-    FIRST = 5
-
-
 class TestSeqAvgPool(OpTest):
    def set_data(self):
        self.op_type = 'sequence_pool'
@@ -25,7 +16,7 @@ class TestSeqAvgPool(OpTest):
        return x, lod, out

    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.AVERAGE}
+        self.attrs = {'pooltype': "AVERAGE"}
        for i in range(4):
            sub_x = x[lod[0][i]:lod[0][i + 1], :]
            out[i] = sub_x.mean(axis=0)
@@ -54,7 +45,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool):
        return x, lod, out

    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.AVERAGE}
+        self.attrs = {'pooltype': "AVERAGE"}
        for i in range(4):
            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
            out[i] = np.reshape(sub_x.mean(axis=0), (3, 17))
@@ -62,7 +53,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool):

 class TestSeqSumPool(TestSeqAvgPool):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.SUM}
+        self.attrs = {'pooltype': "SUM"}
        for i in range(4):
            sub_x = x[lod[0][i]:lod[0][i + 1], :]
            out[i] = sub_x.sum(axis=0)
@@ -70,7 +61,7 @@ class TestSeqSumPool(TestSeqAvgPool):

 class TestSeqSumPool2D(TestSeqAvgPool2D):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.SUM}
+        self.attrs = {'pooltype': "SUM"}
        for i in range(4):
            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
            out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))
@@ -78,7 +69,7 @@ class TestSeqSumPool2D(TestSeqAvgPool2D):

 class TestSeqSqrtPool(TestSeqAvgPool):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.SQRT}
+        self.attrs = {'pooltype': "SQRT"}
        for i in range(4):
            sub_x = x[lod[0][i]:lod[0][i + 1], :]
            len = lod[0][i + 1] - lod[0][i]
@@ -87,7 +78,7 @@ class TestSeqSqrtPool(TestSeqAvgPool):

 class TestSeqSqrtPool2D(TestSeqAvgPool2D):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.SQRT}
+        self.attrs = {'pooltype': "SQRT"}
        for i in range(4):
            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
            len = lod[0][i + 1] - lod[0][i]
@@ -99,7 +90,7 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D):

 class TestSeqMaxPool(TestSeqAvgPool):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.MAX}
+        self.attrs = {'pooltype': "MAX"}
        for i in range(4):
            sub_x = x[lod[0][i]:lod[0][i + 1], :]
            out[i] = np.amax(sub_x, axis=0)
@@ -111,7 +102,7 @@ class TestSeqMaxPool(TestSeqAvgPool):

 class TestSeqMaxPool2D(TestSeqAvgPool2D):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.MAX}
+        self.attrs = {'pooltype': "MAX"}
        for i in range(4):
            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
            out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17))
@@ -123,7 +114,7 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D):

 class TestSeqLastPool(TestSeqAvgPool):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.LAST}
+        self.attrs = {'pooltype': "LAST"}
        for i in range(4):
            sub_x = x[lod[0][i]:lod[0][i + 1], :]
            out[i] = sub_x[-1, :]
@@ -131,7 +122,7 @@ class TestSeqLastPool(TestSeqAvgPool):

 class TestSeqLastPool2D(TestSeqAvgPool2D):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.LAST}
+        self.attrs = {'pooltype': "LAST"}
        for i in range(4):
            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
            out[i] = np.reshape(sub_x[-1, :], (3, 17))
@@ -139,7 +130,7 @@ class TestSeqLastPool2D(TestSeqAvgPool2D):

 class TestSeqFirstPool(TestSeqAvgPool):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.FIRST}
+        self.attrs = {'pooltype': "FIRST"}
        for i in range(4):
            sub_x = x[lod[0][i]:lod[0][i + 1], :]
            out[i] = sub_x[0, :]
@@ -147,7 +138,7 @@ class TestSeqFirstPool(TestSeqAvgPool):

 class TestSeqFirstPool2D(TestSeqAvgPool2D):
    def compute(self, x, lod, out):
-        self.attrs = {'strategy': SeqPoolType.FIRST}
+        self.attrs = {'pooltype': "FIRST"}
        for i in range(4):
            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
            out[i] = np.reshape(sub_x[0, :], (3, 17))