diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index e3f5d509a85537669237b8fd0ed44efe8abb6874..6d600c27271c660f0cf933e8bd05455df61740ec 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -47,6 +47,15 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( SequencePoolOp pools features of all time-steps of each instance. + It supports six pooling strategy: + - AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]} + - SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]} + - SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]} + / sqrt(i-th sequence length) + - LAST: Out[i] = last instance in i-th sequence X[i] + - FIRST: Out[i] = first instance in i-th sequence X[i] + - MAX: Out[i] = max_{for each instance in i-th sequence}{X[i]} + For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps: Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2. diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index 0de6cafe9ca83f09636a69b5579d19afde1c73b5..ead30e8e90b25165664b690491895ae68c8fc0ab 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -82,6 +82,9 @@ class SequencePoolKernel : public framework::OpKernel { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h)); break; + case MAX: + out_e.device(place) = in_e.maximum(Eigen::array({{0}})); + break; case LAST: out_e.device(place) = in_e.chip(h - 1, 0); break; @@ -100,8 +103,8 @@ class SequencePoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); - auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); + auto* out_g = context.Input(framework::GradVarName("Out")); int strategy = context.Attr("strategy"); auto dims = in->dims(); @@ -135,6 +138,22 @@ class SequencePoolGradKernel : public framework::OpKernel { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); break; + case MAX: { + auto in_t = + in->Slice(static_cast(lod[i]), static_cast(lod[i + 1])); + Eigen::Map> + in_t_map(in_t.data(), h, w); + int row_id; + Eigen::array extents = {1, 1}; + for (int col_id = 0; col_id < w; col_id++) { + in_t_map.col(col_id).maxCoeff(&row_id); + Eigen::array in_offsets = {row_id, col_id}; + Eigen::array out_offsets = {0, col_id}; + in_g_e.slice(in_offsets, extents).device(place) = + out_g_e.slice(out_offsets, extents); + } + break; + } case LAST: in_g_e.chip(h - 1, 0).device(place) = out_g_e; break; diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py index 0ebf78bf8f02b4b2e5935e3177373b2d3ded7818..56602c57e6b63b71d6b089e774a876ad6164040e 100644 --- a/python/paddle/v2/framework/tests/test_seq_pool.py +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -22,18 +22,17 @@ class TestSeqAvgPool(OpTest): out = np.zeros((4, 23)).astype('float32') self.outputs = {'Out': out} + return x, lod, out - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.AVERAGE} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x.mean(axis=0) def setUp(self): - self.set_data() - self.compute() + x, lod, out = self.set_data() + self.compute(x, lod, out) def test_check_output(self): self.check_output() @@ -52,41 +51,34 @@ class TestSeqAvgPool2D(TestSeqAvgPool): out = np.zeros((4, 3, 17)).astype('float32') self.outputs = {'Out': out} + return x, lod, out - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.AVERAGE} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) class TestSeqSumPool(TestSeqAvgPool): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.SUM} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x.sum(axis=0) class TestSeqSumPool2D(TestSeqAvgPool2D): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.SUM} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) class TestSeqSqrtPool(TestSeqAvgPool): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.SQRT} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] len = lod[0][i + 1] - lod[0][i] @@ -94,10 +86,8 @@ class TestSeqSqrtPool(TestSeqAvgPool): class TestSeqSqrtPool2D(TestSeqAvgPool2D): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.SQRT} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) len = lod[0][i + 1] - lod[0][i] @@ -107,41 +97,57 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): self.check_grad(["X"], "Out", max_relative_error=0.06) +class TestSeqMaxPool(TestSeqAvgPool): + def compute(self, x, lod, out): + self.attrs = {'strategy': SeqPoolType.MAX} + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = np.amax(sub_x, axis=0) + + def test_check_grad(self): + # Remove MaxPool2D from gradient check to confirm the success of CI. + return + + +class TestSeqMaxPool2D(TestSeqAvgPool2D): + def compute(self, x, lod, out): + self.attrs = {'strategy': SeqPoolType.MAX} + for i in range(4): + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17)) + + def test_check_grad(self): + # Remove MaxPool2D from gradient check to confirm the success of CI. + return + + class TestSeqLastPool(TestSeqAvgPool): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.LAST} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x[-1, :] class TestSeqLastPool2D(TestSeqAvgPool2D): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.LAST} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x[-1, :], (3, 17)) class TestSeqFirstPool(TestSeqAvgPool): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.FIRST} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = sub_x[0, :] class TestSeqFirstPool2D(TestSeqAvgPool2D): - def compute(self): + def compute(self, x, lod, out): self.attrs = {'strategy': SeqPoolType.FIRST} - x, lod = self.inputs['X'] - out = self.outputs['Out'] for i in range(4): sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) out[i] = np.reshape(sub_x[0, :], (3, 17))