diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 2b8b82e74fc49d454b5331460acbffd0e9404fb5..e0707fdc3ae0c7e4ce9ec07fdde14c1a24dee062 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -64,7 +64,7 @@ paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', '
 paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None))
 paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
 paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
-paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type'], varargs=None, keywords=None, defaults=None)
+paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,))
 paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None))
 paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
 paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None))
diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc
index 7be8539a7b0f1890898fd386a3056601fda8a7c3..6d491dbf1ed162ef07fda4c07e95cc57108486fd 100644
--- a/paddle/fluid/operators/math/sequence_pooling.cc
+++ b/paddle/fluid/operators/math/sequence_pooling.cc
@@ -31,7 +31,7 @@ template <typename T, int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
 
-template <typename T>
+template <typename T, bool is_test>
 class MaxSeqPoolFunctor {
  public:
   void operator()(const platform::CPUDeviceContext& context,
@@ -70,7 +70,41 @@ class MaxSeqPoolFunctor {
     }
   }
 };
+// Instantisation of Max Sequence Pooling for test phase eg. no need to fill
+// index buffer
+template <typename T>
+class MaxSeqPoolFunctor<T, true> {
+ public:
+  void operator()(const platform::CPUDeviceContext& context,
+                  const framework::LoDTensor& input, framework::Tensor* output,
+                  framework::Tensor* index) {
+    auto in_dims = input.dims();
+    auto out_dims = output->dims();
+    PADDLE_ENFORCE_GT(in_dims.size(), 1);
+    PADDLE_ENFORCE_GT(out_dims.size(), 1);
+    for (int64_t i = 1; i < in_dims.size(); ++i) {
+      PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
+    }
+
+    auto starts = input.lod()[0];
+    const T* in_data = input.data<T>();
+    T* out_data = output->data<T>();
 
+    int64_t num_seq = out_dims[0];
+    int64_t dim = output->numel() / num_seq;
+    for (int64_t i = 0; i < num_seq; ++i) {
+      std::memcpy(&out_data[i * dim], &in_data[starts[i] * dim],
+                  dim * sizeof(T));
+      for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) {
+        for (int64_t k = 0; k < dim; ++k) {
+          if (in_data[j * dim + k] > out_data[i * dim + k]) {
+            out_data[i * dim + k] = in_data[j * dim + k];
+          }
+        }
+      }
+    }
+  }
+};
 template <typename T>
 class MaxSeqPoolGradFunctor {
  public:
@@ -188,11 +222,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
   /* max pool has index output */
   void operator()(const platform::CPUDeviceContext& context,
                   const std::string pooltype, const framework::LoDTensor& input,
-                  framework::Tensor* output,
+                  framework::Tensor* output, bool is_test,
                   framework::Tensor* index = nullptr) {
     if (pooltype == "MAX") {
-      math::MaxSeqPoolFunctor<T> max_pool;
-      max_pool(context, input, output, index);
+      if (is_test) {
+        math::MaxSeqPoolFunctor<T, true> max_pool;
+        max_pool(context, input, output, index);
+      } else {
+        math::MaxSeqPoolFunctor<T, false> max_pool;
+        max_pool(context, input, output, index);
+      }
       return;
     }
     if (pooltype == "LAST") {
@@ -200,6 +239,7 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
       last_pool(context, input, output);
       return;
     }
+
     if (pooltype == "FIRST") {
       math::FirstSeqPoolFunctor<T> first_pool;
       first_pool(context, input, output);
diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu
index a92aef805a0434f2ebcbc62d4e5eaef0cfb21bfa..0015fafbc892912424dfa6dbd1778438d384ca19 100644
--- a/paddle/fluid/operators/math/sequence_pooling.cu
+++ b/paddle/fluid/operators/math/sequence_pooling.cu
@@ -133,7 +133,7 @@ class SequencePoolFunctor<platform::CUDADeviceContext, T> {
  public:
   void operator()(const platform::CUDADeviceContext& context,
                   const std::string pooltype, const framework::LoDTensor& input,
-                  framework::Tensor* output,
+                  framework::Tensor* output, bool is_test,
                   framework::Tensor* index = nullptr) {
     auto& lod = input.lod()[0];
     const size_t item_dim = output->numel() / output->dims()[0];
diff --git a/paddle/fluid/operators/math/sequence_pooling.h b/paddle/fluid/operators/math/sequence_pooling.h
index 8dcbee65d0b63a137e5f422ec8667cc950641b4a..a1046ea2160d0ae9c2251612c97d3f2640b0aad1 100644
--- a/paddle/fluid/operators/math/sequence_pooling.h
+++ b/paddle/fluid/operators/math/sequence_pooling.h
@@ -28,7 +28,7 @@ class SequencePoolFunctor {
   /* max pool has index output */
   void operator()(const DeviceContext& context, const std::string pooltype,
                   const framework::LoDTensor& input, framework::Tensor* output,
-                  framework::Tensor* index = nullptr);
+                  bool is_test = false, framework::Tensor* index = nullptr);
 };
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/fluid/operators/sequence_pool_op.cc b/paddle/fluid/operators/sequence_pool_op.cc
index 15d3f064eb7b025dc9a85b2aabad24186061cbd4..217bb1610fd3f02f0f72d3b7750ebcdfad243f48 100644
--- a/paddle/fluid/operators/sequence_pool_op.cc
+++ b/paddle/fluid/operators/sequence_pool_op.cc
@@ -47,6 +47,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
               "(Tensor<int>) This tensor is used for the sequence max-pooling "
               "to record the max indexes.")
         .AsIntermediate();
+    AddAttr<bool>("is_test", "").SetDefault(false);
     AddAttr<std::string>(
         "pooltype",
         "(string, default 'AVERAGE') the pooling pooltype of SequencePoolOp.")
diff --git a/paddle/fluid/operators/sequence_pool_op.h b/paddle/fluid/operators/sequence_pool_op.h
index 2aa20792f24305a106c500a3d7a6e3d363bc31d8..f2e4a55dee49664b2fc09813f6dba5f68aaf11d5 100644
--- a/paddle/fluid/operators/sequence_pool_op.h
+++ b/paddle/fluid/operators/sequence_pool_op.h
@@ -32,10 +32,6 @@ class SequencePoolKernel : public framework::OpKernel<T> {
     auto* in = context.Input<LoDTensor>("X");
     auto* out = context.Output<Tensor>("Out");
     std::string pooltype = context.Attr<std::string>("pooltype");
-    Tensor* index = nullptr;
-    if (pooltype == "MAX") {
-      index = context.Output<Tensor>("MaxIndex");
-    }
 
     auto dims = in->dims();
     auto lod = in->lod();
@@ -48,13 +44,22 @@ class SequencePoolKernel : public framework::OpKernel<T> {
     dims[0] = lod[0].size() - 1;
     out->Resize({dims});
     out->mutable_data<T>(context.GetPlace());
-    if (pooltype == "MAX") {
+    Tensor* index = nullptr;
+
+    const bool is_test = context.Attr<bool>("is_test");
+
+    // Do not create index buffer for inference (is_test) mode
+    // TODO(jczaja): Skip index buffer creation for other devices eg. GPU
+    if (pooltype == "MAX" &&
+        (is_test == false ||
+         platform::is_cpu_place(context.GetPlace()) == false)) {
+      index = context.Output<Tensor>("MaxIndex");
       index->Resize({dims});
       index->mutable_data<int>(context.GetPlace());
     }
     math::SequencePoolFunctor<DeviceContext, T> pool;
     pool(context.template device_context<DeviceContext>(), pooltype, *in, out,
-         index);
+         is_test, index);
   }
 };
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 4bfa89d9facf1d368e3018a248dc090c81c3402e..b9d1b7c28abbd8637928780886195e75e0b2a1bb 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1823,7 +1823,7 @@ def conv3d(input,
     return helper.append_activation(pre_act)
 
 
-def sequence_pool(input, pool_type):
+def sequence_pool(input, pool_type, is_test=False):
     """
     This function add the operator for sequence pooling.
     It pools features of all time-steps of each instance, and is applied
@@ -1860,6 +1860,7 @@ def sequence_pool(input, pool_type):
         input(variable): The input variable which is a LoDTensor.
         pool_type (string): The pooling type of sequence_pool.
             It supports average, sum, sqrt and max.
+        is_test(bool, Default False): Used distinguish training from scoring mode.
 
     Returns:
         The sequence pooling variable which is a Tensor.
@@ -1887,7 +1888,8 @@ def sequence_pool(input, pool_type):
         inputs={"X": input},
         outputs={"Out": pool_out,
                  "MaxIndex": max_index},
-        attrs={"pooltype": pool_type.upper()})
+        attrs={"pooltype": pool_type.upper(),
+               "is_test": is_test})
 
     # when pool_type is max, variable max_index is initialized,
     # so we stop the gradient explicitly here
diff --git a/python/paddle/fluid/tests/unittests/test_seq_pool.py b/python/paddle/fluid/tests/unittests/test_seq_pool.py
index 641eb03a5fbf1bb140b20cc3518cea83386fa577..a80ad5b079891efe1b0e1222b3c2455d4891d5f5 100644
--- a/python/paddle/fluid/tests/unittests/test_seq_pool.py
+++ b/python/paddle/fluid/tests/unittests/test_seq_pool.py
@@ -184,6 +184,20 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D):
             out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11))
 
 
+class TestSeqMaxPool2DInference(TestSeqMaxPool2D):
+    def compute(self, x, offset, out):
+        self.attrs = {'pooltype': "MAX", 'is_test': True}
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 11))
+            out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11))
+
+    def test_check_grad(self):
+        """Grad computation does not apply to Sequence MAX 
+            Pool executed when is_test is true """
+        return
+
+
 class TestSeqLastPool2D(TestSeqAvgPool2D):
     def compute(self, x, offset, out):
         self.attrs = {'pooltype': "LAST"}