diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py
index 1c856556bd0cb32f60eba322469b3621c37e1349..5151444a86290975f07297593f7f8b20b6c8d4c5 100644
--- a/demo/sentiment/train_v2.py
+++ b/demo/sentiment/train_v2.py
@@ -103,7 +103,7 @@ def stacked_lstm_net(input_dim,
 
 if __name__ == '__main__':
     # init
-    paddle.init(use_gpu=False)
+    paddle.init(use_gpu=False, log_clipping=True)
 
     #data
     print 'load dictionary...'
@@ -131,6 +131,7 @@ if __name__ == '__main__':
     # create optimizer
     adam_optimizer = paddle.optimizer.Adam(
         learning_rate=2e-3,
+        gradient_clipping_threshold=0.003,
         regularization=paddle.optimizer.L2Regularization(rate=8e-4),
         model_average=paddle.optimizer.ModelAverage(average_window=0.5))
 
diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py
index 7c6b83541002071d6e9d00c17be97b6ce4bf8528..cfb82e92d51c22467360a67a04d4bed4f6608e2e 100644
--- a/paddle/py_paddle/dataprovider_converter.py
+++ b/paddle/py_paddle/dataprovider_converter.py
@@ -17,6 +17,7 @@ import collections
 import swig_paddle
 import numpy
 import itertools
+from functools import reduce
 
 __all__ = ['DataProviderConverter']
 
@@ -59,12 +60,14 @@ class IScanner(object):
         """
         pass
 
-    def finish_pre_scan(self, argument):
+    def finish_pre_scan(self, argument, dat=None):
         """
         Finish first scan pass. Allocate the memory.
 
         :param argument: Output arguments object.
         :type argument: swig_paddle.Arguments
+        :param dat: Output arguments object.
+        :type dat: The Python object, numpy.array or List.
         :return:
         """
         pass
@@ -95,17 +98,27 @@ class DenseScanner(IScanner):
     def __init__(self, input_type, pos):
         IScanner.__init__(self, input_type, pos)
         self.__mat__ = None
+        self.__shape__ = None
         self.__height__ = 0
 
     def pre_scan(self, dat):
         self.__height__ += 1
 
-    def finish_pre_scan(self, argument):
+    def finish_pre_scan(self, argument, dat=None):
+        self.__shape__ = numpy.array(dat).shape
+        if len(self.__shape__) > 3:
+            raise ValueError("The dimension of input is greater than 3.")
+        dim = reduce(lambda x, y: x * y, self.__shape__)
+        if len(self.__shape__) == 1:
+            assert dim == self.input_type.dim
         self.__mat__ = numpy.ndarray(
-            shape=(self.__height__, self.input_type.dim), dtype=numpy.float32)
+            shape=(self.__height__, dim), dtype=numpy.float32)
         self.__height__ = 0
 
     def scan(self, dat):
+        if isinstance(dat, numpy.ndarray):
+            assert self.__shape__ == dat.shape
+            dat = dat.flatten()
         self.__mat__[self.__height__] = dat
         self.__height__ += 1
 
@@ -116,6 +129,13 @@ class DenseScanner(IScanner):
         m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
                                                     self.data_in_gpu)
         argument.setSlotValue(self.pos, m)
+        if len(self.__shape__) > 1:
+            # The last-two dimenstions are the frame height and width.
+            # For example, the layout is CHW for 3-D feature of image.
+            # The H and W are the fram height and width.
+            h, w = self.__shape__[-2:]
+            argument.setSlotFrameHeight(self.pos, h)
+            argument.setSlotFrameWidth(self.pos, w)
 
 
 class SparseBinaryScanner(IScanner):
@@ -166,7 +186,7 @@ class IndexScanner(IScanner):
     def pre_scan(self, dat):
         self.__idx__ += 1
 
-    def finish_pre_scan(self, argument):
+    def finish_pre_scan(self, argument, dat=None):
         self.__ids__ = [0] * self.__idx__
         self.__idx__ = 0
 
@@ -191,8 +211,8 @@ class SequenceScanner(IScanner):
         for each in dat:
             self.__inner_scanner__.pre_scan(each)
 
-    def finish_pre_scan(self, argument):
-        self.__inner_scanner__.finish_pre_scan(argument)
+    def finish_pre_scan(self, argument, dat=None):
+        self.__inner_scanner__.finish_pre_scan(argument, dat)
 
     def scan(self, dat):
         self.__seq__.append(self.__seq__[-1] + self.get_size(dat))
@@ -233,8 +253,11 @@ class DataProviderConverter(object):
             for each_step, scanner in itertools.izip(each_sample, scanners):
                 scanner.pre_scan(each_step)
 
-        for scanner in scanners:
-            scanner.finish_pre_scan(argument)
+        # Some scanners, like dense scanner, pre-allocate memory for mini-batch
+        # in finish_pre_scan function. The dat[0] is used to calculate the size
+        # of input data.
+        for scanner, each_feature in itertools.izip(scanners, dat[0]):
+            scanner.finish_pre_scan(argument, each_feature)
 
         for each_sample in dat:
             for each_step, scanner in itertools.izip(each_sample, scanners):
diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py
index a36f0ebfdcb9f90f54ba2d688f9f4bcee2939ef3..7e305e2cd9fbe306368a44d08f7f66b4185ae2d2 100644
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
@@ -72,9 +72,16 @@ class InputType(object):
 
 def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
     """
-    Dense Vector. It means the input feature is dense float vector. For example,
-    if the input is an image with 28*28 pixels, the input of Paddle neural
-    network should be a dense vector with dimension 784.
+    Dense Array. It means the input feature is dense array with float type.
+    For example, if the input is an image with 28*28 pixels, the input of
+    Paddle neural network could be a dense vector with dimension 784 or a
+    numpy array with shape (28, 28).
+
+    For the 2-D convolution operation, each sample in one mini-batch must have
+    the similarly size in PaddlePaddle now. But, it supports variable-dimension
+    feature across mini-batch. For the variable-dimension, the param dim is not
+    used. While the data reader must yield numpy array and the data feeder will
+    set the data shape correctly.
 
     :param dim: dimension of this vector.
     :type dim: int
@@ -135,6 +142,10 @@ sparse_binary_vector = sparse_non_value_slot
 sparse_vector = sparse_value_slot
 integer_value = index_slot
 
+# dense_array can be used for variable-length input feature.
+# Each feature is not a vector, but a multi-dimensional array.
+dense_array = dense_slot
+
 
 def dense_vector_sequence(dim):
     """
diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py
index d582f76ddf01ed3430a1d075624bbb8e0bf3f2a9..226997465f2ec97c6224b248427739592e9694df 100644
--- a/python/paddle/v2/data_type.py
+++ b/python/paddle/v2/data_type.py
@@ -16,7 +16,8 @@ import paddle.trainer.PyDataProvider2 as pydp2
 
 import_list = [
     nm for nm in dir(pydp2)
-    if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm)
+    if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm or
+                                       'array' in nm)
 ]
 import_list.extend(['InputType'])
 
diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py
index 71eb3bf31425c22b47accc11c9550042e077ef12..83da678da387ed1c86868847f140c6c09fbec3b5 100644
--- a/python/paddle/v2/tests/test_data_feeder.py
+++ b/python/paddle/v2/tests/test_data_feeder.py
@@ -233,6 +233,30 @@ class DataFeederTest(unittest.TestCase):
             self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
             self.assertEqual(out_index[i], data[i][0])
 
+    def test_dense_set_shape(self):
+        # test 2-D data
+        def gen_data(batch_size, shape):
+            data = []
+            for i in xrange(batch_size):
+                each_sample = []
+                each_sample.append(np.random.random(shape))
+                data.append(each_sample)
+            return data
+
+        feeder = DataFeeder([('image', data_type.dense_array(2352))],
+                            {'image': 0})
+        arg = feeder(gen_data(32, (3, 28, 28)))
+        h = arg.getSlotFrameHeight(0)
+        w = arg.getSlotFrameWidth(0)
+        self.assertEqual(h, 28)
+        self.assertEqual(w, 28)
+
+        arg = feeder(gen_data(32, (3, 30, 32)))
+        h = arg.getSlotFrameHeight(0)
+        w = arg.getSlotFrameWidth(0)
+        self.assertEqual(h, 30)
+        self.assertEqual(w, 32)
+
 
 if __name__ == '__main__':
     api.initPaddle("--use_gpu=0")