From 458b16f42a03fd68af4da05bb93fbc6bf2a75f9e Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 23 Oct 2018 11:41:19 +0200 Subject: [PATCH] Rebase of seqpool-max optimization test=develop - Added rough profiling - Profiled maxpool itself - First draft of max seqpool optimization (is_test added) - Added unit tests to seqpool - Cosmetic fixes - Fix to UT of Seq pool Disabled grad checking for sequence max pool when is_test is set to True -Cosmetic fix to comment test=develop - Fix to GPU build test=develop - yet another GPU fix for sequence max pool - Fix to comment test=develop - Change to API of sequence_pool test=develop - Yet another API spec change test=develop --- paddle/fluid/API.spec | 2 +- .../fluid/operators/math/sequence_pooling.cc | 48 +++++++++++++++++-- .../fluid/operators/math/sequence_pooling.cu | 2 +- .../fluid/operators/math/sequence_pooling.h | 2 +- paddle/fluid/operators/sequence_pool_op.cc | 1 + paddle/fluid/operators/sequence_pool_op.h | 17 ++++--- python/paddle/fluid/layers/nn.py | 6 ++- .../fluid/tests/unittests/test_seq_pool.py | 14 ++++++ 8 files changed, 77 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 2b8b82e74..e0707fdc3 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -64,7 +64,7 @@ paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', ' paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)) paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) -paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None)) diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 7be8539a7..6d491dbf1 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -31,7 +31,7 @@ template using EigenMatrix = framework::EigenMatrix; -template +template class MaxSeqPoolFunctor { public: void operator()(const platform::CPUDeviceContext& context, @@ -70,7 +70,41 @@ class MaxSeqPoolFunctor { } } }; +// Instantisation of Max Sequence Pooling for test phase eg. no need to fill +// index buffer +template +class MaxSeqPoolFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::LoDTensor& input, framework::Tensor* output, + framework::Tensor* index) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + PADDLE_ENFORCE_GT(in_dims.size(), 1); + PADDLE_ENFORCE_GT(out_dims.size(), 1); + for (int64_t i = 1; i < in_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]); + } + + auto starts = input.lod()[0]; + const T* in_data = input.data(); + T* out_data = output->data(); + int64_t num_seq = out_dims[0]; + int64_t dim = output->numel() / num_seq; + for (int64_t i = 0; i < num_seq; ++i) { + std::memcpy(&out_data[i * dim], &in_data[starts[i] * dim], + dim * sizeof(T)); + for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) { + for (int64_t k = 0; k < dim; ++k) { + if (in_data[j * dim + k] > out_data[i * dim + k]) { + out_data[i * dim + k] = in_data[j * dim + k]; + } + } + } + } + } +}; template class MaxSeqPoolGradFunctor { public: @@ -188,11 +222,16 @@ class SequencePoolFunctor { /* max pool has index output */ void operator()(const platform::CPUDeviceContext& context, const std::string pooltype, const framework::LoDTensor& input, - framework::Tensor* output, + framework::Tensor* output, bool is_test, framework::Tensor* index = nullptr) { if (pooltype == "MAX") { - math::MaxSeqPoolFunctor max_pool; - max_pool(context, input, output, index); + if (is_test) { + math::MaxSeqPoolFunctor max_pool; + max_pool(context, input, output, index); + } else { + math::MaxSeqPoolFunctor max_pool; + max_pool(context, input, output, index); + } return; } if (pooltype == "LAST") { @@ -200,6 +239,7 @@ class SequencePoolFunctor { last_pool(context, input, output); return; } + if (pooltype == "FIRST") { math::FirstSeqPoolFunctor first_pool; first_pool(context, input, output); diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index a92aef805..0015fafbc 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -133,7 +133,7 @@ class SequencePoolFunctor { public: void operator()(const platform::CUDADeviceContext& context, const std::string pooltype, const framework::LoDTensor& input, - framework::Tensor* output, + framework::Tensor* output, bool is_test, framework::Tensor* index = nullptr) { auto& lod = input.lod()[0]; const size_t item_dim = output->numel() / output->dims()[0]; diff --git a/paddle/fluid/operators/math/sequence_pooling.h b/paddle/fluid/operators/math/sequence_pooling.h index 8dcbee65d..a1046ea21 100644 --- a/paddle/fluid/operators/math/sequence_pooling.h +++ b/paddle/fluid/operators/math/sequence_pooling.h @@ -28,7 +28,7 @@ class SequencePoolFunctor { /* max pool has index output */ void operator()(const DeviceContext& context, const std::string pooltype, const framework::LoDTensor& input, framework::Tensor* output, - framework::Tensor* index = nullptr); + bool is_test = false, framework::Tensor* index = nullptr); }; template diff --git a/paddle/fluid/operators/sequence_pool_op.cc b/paddle/fluid/operators/sequence_pool_op.cc index 15d3f064e..217bb1610 100644 --- a/paddle/fluid/operators/sequence_pool_op.cc +++ b/paddle/fluid/operators/sequence_pool_op.cc @@ -47,6 +47,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor) This tensor is used for the sequence max-pooling " "to record the max indexes.") .AsIntermediate(); + AddAttr("is_test", "").SetDefault(false); AddAttr( "pooltype", "(string, default 'AVERAGE') the pooling pooltype of SequencePoolOp.") diff --git a/paddle/fluid/operators/sequence_pool_op.h b/paddle/fluid/operators/sequence_pool_op.h index 2aa20792f..f2e4a55de 100644 --- a/paddle/fluid/operators/sequence_pool_op.h +++ b/paddle/fluid/operators/sequence_pool_op.h @@ -32,10 +32,6 @@ class SequencePoolKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* out = context.Output("Out"); std::string pooltype = context.Attr("pooltype"); - Tensor* index = nullptr; - if (pooltype == "MAX") { - index = context.Output("MaxIndex"); - } auto dims = in->dims(); auto lod = in->lod(); @@ -48,13 +44,22 @@ class SequencePoolKernel : public framework::OpKernel { dims[0] = lod[0].size() - 1; out->Resize({dims}); out->mutable_data(context.GetPlace()); - if (pooltype == "MAX") { + Tensor* index = nullptr; + + const bool is_test = context.Attr("is_test"); + + // Do not create index buffer for inference (is_test) mode + // TODO(jczaja): Skip index buffer creation for other devices eg. GPU + if (pooltype == "MAX" && + (is_test == false || + platform::is_cpu_place(context.GetPlace()) == false)) { + index = context.Output("MaxIndex"); index->Resize({dims}); index->mutable_data(context.GetPlace()); } math::SequencePoolFunctor pool; pool(context.template device_context(), pooltype, *in, out, - index); + is_test, index); } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4bfa89d9f..b9d1b7c28 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1823,7 +1823,7 @@ def conv3d(input, return helper.append_activation(pre_act) -def sequence_pool(input, pool_type): +def sequence_pool(input, pool_type, is_test=False): """ This function add the operator for sequence pooling. It pools features of all time-steps of each instance, and is applied @@ -1860,6 +1860,7 @@ def sequence_pool(input, pool_type): input(variable): The input variable which is a LoDTensor. pool_type (string): The pooling type of sequence_pool. It supports average, sum, sqrt and max. + is_test(bool, Default False): Used distinguish training from scoring mode. Returns: The sequence pooling variable which is a Tensor. @@ -1887,7 +1888,8 @@ def sequence_pool(input, pool_type): inputs={"X": input}, outputs={"Out": pool_out, "MaxIndex": max_index}, - attrs={"pooltype": pool_type.upper()}) + attrs={"pooltype": pool_type.upper(), + "is_test": is_test}) # when pool_type is max, variable max_index is initialized, # so we stop the gradient explicitly here diff --git a/python/paddle/fluid/tests/unittests/test_seq_pool.py b/python/paddle/fluid/tests/unittests/test_seq_pool.py index 641eb03a5..a80ad5b07 100644 --- a/python/paddle/fluid/tests/unittests/test_seq_pool.py +++ b/python/paddle/fluid/tests/unittests/test_seq_pool.py @@ -184,6 +184,20 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D): out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11)) +class TestSeqMaxPool2DInference(TestSeqMaxPool2D): + def compute(self, x, offset, out): + self.attrs = {'pooltype': "MAX", 'is_test': True} + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 11)) + out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11)) + + def test_check_grad(self): + """Grad computation does not apply to Sequence MAX + Pool executed when is_test is true """ + return + + class TestSeqLastPool2D(TestSeqAvgPool2D): def compute(self, x, offset, out): self.attrs = {'pooltype': "LAST"} -- GitLab