diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index a5569d1aace215c848de43dd9c3dcb414b709083..41d23ed43f7fea7673544d9362ac7f2b101b3a5d 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -82,6 +82,9 @@ class SequencePoolKernel : public framework::OpKernel { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h)); break; + case MAX: + out_e.device(place) = in_e.maximum(Eigen::array({{0}})); + break; case LAST: out_e.device(place) = in_e.chip(h - 1, 0); break; @@ -100,8 +103,9 @@ class SequencePoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); - auto* out_g = context.Input(framework::GradVarName("Out")); + auto* out = context.Input("Out"); auto* in_g = context.Output(framework::GradVarName("X")); + auto* out_g = context.Input(framework::GradVarName("Out")); int strategy = context.Attr("strategy"); auto dims = in->dims(); @@ -135,6 +139,19 @@ class SequencePoolGradKernel : public framework::OpKernel { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); break; + case MAX: { + auto in_t = in->Slice(static_cast(lod[i]), + static_cast(lod[i + 1])); + auto out_t = out->Slice(i, i + 1); + auto in_e = EigenMatrix::From(in_t, {h, w}); + auto out_e = EigenMatrix::From(out_t, {1, w}); + auto equals = in_e == out_e.broadcast(bcast); + auto ones = in_g_e.constant(1); + auto zeros = in_g_e.constant(0); + in_g_e.device(place) = + out_g_e.broadcast(bcast) * equals.select(ones, zeros); + break; + } case LAST: in_g_e.chip(h - 1, 0).device(place) = out_g_e; break; diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py index 0ebf78bf8f02b4b2e5935e3177373b2d3ded7818..58a555f7737a540b338b79ae2d83145cc9b568e3 100644 --- a/python/paddle/v2/framework/tests/test_seq_pool.py +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -16,11 +16,11 @@ class TestSeqAvgPool(OpTest): def set_data(self): self.op_type = 'sequence_pool' # one level, batch size is 4 - x = np.random.uniform(0.1, 1, [11, 23]).astype('float32') + x = np.random.uniform(0.1, 1, [11, 2]).astype('float32') lod = [[0, 4, 5, 8, 11]] self.inputs = {'X': (x, lod)} - out = np.zeros((4, 23)).astype('float32') + out = np.zeros((4, 2)).astype('float32') self.outputs = {'Out': out} def compute(self): @@ -107,6 +107,30 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): self.check_grad(["X"], "Out", max_relative_error=0.06) +class TestSeqMaxPool(TestSeqAvgPool): + def compute(self): + self.attrs = {'strategy': SeqPoolType.MAX} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = np.amax(sub_x, axis=0) + + +class TestSeqMaxPool2D(TestSeqAvgPool2D): + def compute(self): + self.attrs = {'strategy': SeqPoolType.MAX} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17)) + + def test_check_grad(self): + # Remove MaxPool2D from gradient check to confirm the success of CI. + return + + class TestSeqLastPool(TestSeqAvgPool): def compute(self): self.attrs = {'strategy': SeqPoolType.LAST}