From 26492210c02a32cfdb229a4b02ef606335a52ca8 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 31 Oct 2017 16:59:37 -0700 Subject: [PATCH] Fix/sequence op (#5264) * "replace enum with string" * "fix layers" --- paddle/operators/sequence_pool_op.cc | 13 +- paddle/operators/sequence_pool_op.h | 114 +++++++----------- python/paddle/v2/framework/layers.py | 21 +--- .../v2/framework/tests/test_seq_pool.py | 33 ++--- 4 files changed, 68 insertions(+), 113 deletions(-) diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 6d600c2727..29d19df108 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -39,15 +39,14 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "(Tensor), output of SequencePoolOp, which does not contain LoD " "infomation."); - AddAttr( - "strategy", - "(int, default AVERAGE) the pooling strategy of SequencePoolOp.") - .SetDefault(AVERAGE) - .InEnum({AVERAGE, SUM, SQRT, MAX, LAST, FIRST}); + AddAttr( + "pooltype", + "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.") + .SetDefault("AVERAGE"); AddComment(R"DOC( SequencePoolOp pools features of all time-steps of each instance. - It supports six pooling strategy: + It supports six pooling pooltype: - AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]} - SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]} - SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]} @@ -63,7 +62,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. Thus, Out is a [3,1,1] Tensor without LoD infomation. - And for different strategy, the value of Out is as follows: + And for different pooltype, the value of Out is as follows: - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 - SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1 diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index 07bf61df45..e0e0493fe0 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -29,22 +29,13 @@ template using EigenMatrix = framework::EigenMatrix; -enum SeqPoolType { - AVERAGE = 0, - SUM = 1, - SQRT = 2, // square_root_n - MAX = 3, - LAST = 4, - FIRST = 5 -}; - template class SequencePoolKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); auto* out = context.Output("Out"); - int strategy = context.Attr("strategy"); + std::string pooltype = context.Attr("pooltype"); auto dims = in->dims(); auto lod = in->lod(); @@ -71,28 +62,21 @@ class SequencePoolKernel : public framework::OpKernel { auto in_e = EigenMatrix::From(in_t, framework::make_ddim({h, w})); auto out_e = EigenVector::Flatten(out_t); - switch (strategy) { - case AVERAGE: - out_e.device(place) = in_e.mean(Eigen::array({{0}})); - break; - case SUM: - out_e.device(place) = in_e.sum(Eigen::array({{0}})); - break; - case SQRT: - out_e.device(place) = in_e.sum(Eigen::array({{0}})) / - std::sqrt(static_cast(h)); - break; - case MAX: - out_e.device(place) = in_e.maximum(Eigen::array({{0}})); - break; - case LAST: - out_e.device(place) = in_e.chip(h - 1, 0); - break; - case FIRST: - out_e.device(place) = in_e.chip(0, 0); - break; - default: - PADDLE_THROW("unsupported pooling strategy"); + if (pooltype == "AVERAGE") { + out_e.device(place) = in_e.mean(Eigen::array({{0}})); + } else if (pooltype == "SUM") { + out_e.device(place) = in_e.sum(Eigen::array({{0}})); + } else if (pooltype == "SQRT") { + out_e.device(place) = in_e.sum(Eigen::array({{0}})) / + std::sqrt(static_cast(h)); + } else if (pooltype == "MAX") { + out_e.device(place) = in_e.maximum(Eigen::array({{0}})); + } else if (pooltype == "LAST") { + out_e.device(place) = in_e.chip(h - 1, 0); + } else if (pooltype == "FIRST") { + out_e.device(place) = in_e.chip(0, 0); + } else { + PADDLE_THROW("unsupported pooling pooltype"); } } } @@ -105,15 +89,15 @@ class SequencePoolGradKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* in_g = context.Output(framework::GradVarName("X")); auto* out_g = context.Input(framework::GradVarName("Out")); - int strategy = context.Attr("strategy"); + std::string pooltype = context.Attr("pooltype"); auto dims = in->dims(); auto lod = in->lod()[0]; int64_t w = in->numel() / dims[0]; in_g->mutable_data(context.GetPlace()); - if (strategy == LAST || strategy == FIRST) { - // set X@Grad be zero at first when strategy is LAST/FIRST + if (pooltype == "LAST" || pooltype == "FIRST") { + // set X@Grad be zero at first when pooltype is LAST/FIRST math::SetConstant functor; functor(context.device_context(), in_g, 0); } @@ -127,41 +111,33 @@ class SequencePoolGradKernel : public framework::OpKernel { auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); Eigen::DSizes bcast(h, 1); - switch (strategy) { - case AVERAGE: - in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); - break; - case SUM: - in_g_e.device(place) = (out_g_e).broadcast(bcast); - break; - case SQRT: - in_g_e.device(place) = - (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); - break; - case MAX: { - auto in_t = - in->Slice(static_cast(lod[i]), static_cast(lod[i + 1])); - Eigen::Map> - in_t_map(in_t.data(), h, w); - int row_id; - Eigen::array extents{{1, 1}}; - for (int col_id = 0; col_id < w; col_id++) { - in_t_map.col(col_id).maxCoeff(&row_id); - Eigen::array in_offsets{{row_id, col_id}}; - Eigen::array out_offsets{{0, col_id}}; - in_g_e.slice(in_offsets, extents).device(place) = - out_g_e.slice(out_offsets, extents); - } - break; + if (pooltype == "AVERAGE") { + in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); + } else if (pooltype == "SUM") { + in_g_e.device(place) = (out_g_e).broadcast(bcast); + } else if (pooltype == "SQRT") { + in_g_e.device(place) = + (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); + } else if (pooltype == "MAX") { + auto in_t = + in->Slice(static_cast(lod[i]), static_cast(lod[i + 1])); + Eigen::Map> + in_t_map(in_t.data(), h, w); + int row_id; + Eigen::array extents{{1, 1}}; + for (int col_id = 0; col_id < w; col_id++) { + in_t_map.col(col_id).maxCoeff(&row_id); + Eigen::array in_offsets{{row_id, col_id}}; + Eigen::array out_offsets{{0, col_id}}; + in_g_e.slice(in_offsets, extents).device(place) = + out_g_e.slice(out_offsets, extents); } - case LAST: - in_g_e.chip(h - 1, 0).device(place) = out_g_e; - break; - case FIRST: - in_g_e.chip(0, 0).device(place) = out_g_e; - break; - default: - PADDLE_THROW("unsupported pooling strategy"); + } else if (pooltype == "LAST") { + in_g_e.chip(h - 1, 0).device(place) = out_g_e; + } else if (pooltype == "FIRST") { + in_g_e.chip(0, 0).device(place) = out_g_e; + } else { + PADDLE_THROW("unsupported pooling pooltype"); } } } diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index dab72f0195..86a2c7bf08 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -351,32 +351,21 @@ def conv2d(input, return helper.append_activation(pre_act) -def sequence_pool(input, pool_type, program=None, init_program=None): - # FIXME(dzh) : want to unify the argument of python layer - # function. So we ignore some unecessary attributes - - ENUM_POOL_TYPE = dict({ - "AVERAGE": 0, - "SUM": 1, - "SQRT": 2, - "MAX": 3, - "LAST": 4, - "FIRST": 5 - }) +def sequence_pool(input, pool_type, **kwargs): + ENUM_POOL_TYPE = set(["MAX", "AVG", "SQRT", "LAST", "FIRST"]) if pool_type.upper() not in ENUM_POOL_TYPE: raise ValueError("Unknown pool_type: '%s'. It can only be %s.", - str(pool_type), " ".join(ENUM_POOL_TYPE.keys())) + str(pool_type), " ".join(ENUM_POOL_TYPE)) - helper = LayerHelper('sequence_pool', **locals()) + helper = LayerHelper('sequence_pool', **kwargs) dtype = helper.input_dtype() pool_out = helper.create_tmp_variable(dtype) - # FIXME(dzh): strategy helper.append_op( type="sequence_pool", inputs={"X": [input]}, outputs={"Out": [pool_out]}, - attrs={"strategy": ENUM_POOL_TYPE[pool_type.upper()]}) + attrs={"pooltype": pool_type.upper()}) return pool_out diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py index 56602c57e6..efc4920124 100644 --- a/python/paddle/v2/framework/tests/test_seq_pool.py +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -3,15 +3,6 @@ import numpy as np from op_test import OpTest -class SeqPoolType(OpTest): - AVERAGE = 0 - SUM = 1 - SQRT = 2 - MAX = 3 - LAST = 4 - FIRST = 5 - - class TestSeqAvgPool(OpTest): def set_data(self): self.op_type = 'sequence_pool' @@ -25,7 +16,7 @@ class TestSeqAvgPool(OpTest): return x, lod, out def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.AVERAGE} + self.attrs = {'pooltype': "AVERAGE"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x.mean(axis=0) @@ -54,7 +45,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool): return x, lod, out def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.AVERAGE} + self.attrs = {'pooltype': "AVERAGE"} for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) @@ -62,7 +53,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool): class TestSeqSumPool(TestSeqAvgPool): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.SUM} + self.attrs = {'pooltype': "SUM"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x.sum(axis=0) @@ -70,7 +61,7 @@ class TestSeqSumPool(TestSeqAvgPool): class TestSeqSumPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.SUM} + self.attrs = {'pooltype': "SUM"} for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) @@ -78,7 +69,7 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): class TestSeqSqrtPool(TestSeqAvgPool): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.SQRT} + self.attrs = {'pooltype': "SQRT"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] len = lod[0][i + 1] - lod[0][i] @@ -87,7 +78,7 @@ class TestSeqSqrtPool(TestSeqAvgPool): class TestSeqSqrtPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.SQRT} + self.attrs = {'pooltype': "SQRT"} for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) len = lod[0][i + 1] - lod[0][i] @@ -99,7 +90,7 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): class TestSeqMaxPool(TestSeqAvgPool): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.MAX} + self.attrs = {'pooltype': "MAX"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = np.amax(sub_x, axis=0) @@ -111,7 +102,7 @@ class TestSeqMaxPool(TestSeqAvgPool): class TestSeqMaxPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.MAX} + self.attrs = {'pooltype': "MAX"} for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17)) @@ -123,7 +114,7 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D): class TestSeqLastPool(TestSeqAvgPool): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.LAST} + self.attrs = {'pooltype': "LAST"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x[-1, :] @@ -131,7 +122,7 @@ class TestSeqLastPool(TestSeqAvgPool): class TestSeqLastPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.LAST} + self.attrs = {'pooltype': "LAST"} for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x[-1, :], (3, 17)) @@ -139,7 +130,7 @@ class TestSeqLastPool2D(TestSeqAvgPool2D): class TestSeqFirstPool(TestSeqAvgPool): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.FIRST} + self.attrs = {'pooltype': "FIRST"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x[0, :] @@ -147,7 +138,7 @@ class TestSeqFirstPool(TestSeqAvgPool): class TestSeqFirstPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): - self.attrs = {'strategy': SeqPoolType.FIRST} + self.attrs = {'pooltype': "FIRST"} for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x[0, :], (3, 17)) -- GitLab