提交 1220f385 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #2215 from qingqing01/variable_input

Support variable-dimension input feature for 2D convolution operation.
...@@ -17,6 +17,7 @@ import collections ...@@ -17,6 +17,7 @@ import collections
import swig_paddle import swig_paddle
import numpy import numpy
import itertools import itertools
from functools import reduce
__all__ = ['DataProviderConverter'] __all__ = ['DataProviderConverter']
...@@ -65,6 +66,8 @@ class IScanner(object): ...@@ -65,6 +66,8 @@ class IScanner(object):
:param argument: Output arguments object. :param argument: Output arguments object.
:type argument: swig_paddle.Arguments :type argument: swig_paddle.Arguments
:param dat: Output arguments object.
:type dat: The Python object, numpy.array or List.
:return: :return:
""" """
pass pass
...@@ -95,17 +98,35 @@ class DenseScanner(IScanner): ...@@ -95,17 +98,35 @@ class DenseScanner(IScanner):
def __init__(self, input_type, pos): def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos) IScanner.__init__(self, input_type, pos)
self.__mat__ = None self.__mat__ = None
self.__shape__ = None
self.__height__ = 0 self.__height__ = 0
self.__dim__ = 0
def pre_scan(self, dat): def pre_scan(self, dat):
self.__height__ += 1 self.__height__ += 1
if self.__shape__ is None:
self.__shape__ = numpy.array(dat).shape
if len(self.__shape__) > 3:
raise ValueError(
"The dimension of input cannot be greater than 3.")
self.__dim__ = reduce(lambda x, y: x * y, self.__shape__)
if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim:
raise ValueError(
"The data size must be equal to it in data layer.")
else:
if self.__shape__ != numpy.array(dat).shape:
raise ValueError(
"The data shape must be same in one mini-batch.")
def finish_pre_scan(self, argument): def finish_pre_scan(self, argument):
self.__mat__ = numpy.ndarray( self.__mat__ = numpy.ndarray(
shape=(self.__height__, self.input_type.dim), dtype=numpy.float32) shape=(self.__height__, self.__dim__), dtype=numpy.float32)
self.__height__ = 0 self.__height__ = 0
def scan(self, dat): def scan(self, dat):
# It's better to use NumPy array for speed.
dat = numpy.array(dat)
dat = dat.flatten()
self.__mat__[self.__height__] = dat self.__mat__[self.__height__] = dat
self.__height__ += 1 self.__height__ += 1
...@@ -116,6 +137,14 @@ class DenseScanner(IScanner): ...@@ -116,6 +137,14 @@ class DenseScanner(IScanner):
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
self.data_in_gpu) self.data_in_gpu)
argument.setSlotValue(self.pos, m) argument.setSlotValue(self.pos, m)
if len(self.__shape__) > 1:
# The last-two dimenstions are the frame height and width.
# For example, the layout is CHW for 3-D feature of image.
# The H and W are the fram height and width.
h, w = self.__shape__[-2:]
argument.setSlotFrameHeight(self.pos, h)
argument.setSlotFrameWidth(self.pos, w)
self.__shape__ = None
class SparseBinaryScanner(IScanner): class SparseBinaryScanner(IScanner):
......
...@@ -72,9 +72,16 @@ class InputType(object): ...@@ -72,9 +72,16 @@ class InputType(object):
def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
""" """
Dense Vector. It means the input feature is dense float vector. For example, Dense Array. It means the input feature is dense array with float type.
if the input is an image with 28*28 pixels, the input of Paddle neural For example, if the input is an image with 28*28 pixels, the input of
network should be a dense vector with dimension 784. Paddle neural network could be a dense vector with dimension 784 or a
numpy array with shape (28, 28).
For the 2-D convolution operation, each sample in one mini-batch must have
the similarly size in PaddlePaddle now. But, it supports variable-dimension
feature across mini-batch. For the variable-dimension, the param dim is not
used. While the data reader must yield numpy array and the data feeder will
set the data shape correctly.
:param dim: dimension of this vector. :param dim: dimension of this vector.
:type dim: int :type dim: int
...@@ -135,6 +142,10 @@ sparse_binary_vector = sparse_non_value_slot ...@@ -135,6 +142,10 @@ sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot sparse_vector = sparse_value_slot
integer_value = index_slot integer_value = index_slot
# dense_array can be used for variable-length input feature.
# Each feature is not a vector, but a multi-dimensional array.
dense_array = dense_slot
def dense_vector_sequence(dim): def dense_vector_sequence(dim):
""" """
......
...@@ -16,7 +16,8 @@ import paddle.trainer.PyDataProvider2 as pydp2 ...@@ -16,7 +16,8 @@ import paddle.trainer.PyDataProvider2 as pydp2
import_list = [ import_list = [
nm for nm in dir(pydp2) nm for nm in dir(pydp2)
if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm) if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm or
'array' in nm)
] ]
import_list.extend(['InputType']) import_list.extend(['InputType'])
......
...@@ -233,6 +233,30 @@ class DataFeederTest(unittest.TestCase): ...@@ -233,6 +233,30 @@ class DataFeederTest(unittest.TestCase):
self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1]) self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
self.assertEqual(out_index[i], data[i][0]) self.assertEqual(out_index[i], data[i][0])
def test_dense_set_shape(self):
# test 2-D data
def gen_data(batch_size, shape):
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(np.random.random(shape))
data.append(each_sample)
return data
feeder = DataFeeder([('image', data_type.dense_array(2352))],
{'image': 0})
arg = feeder(gen_data(32, (3, 28, 28)))
h = arg.getSlotFrameHeight(0)
w = arg.getSlotFrameWidth(0)
self.assertEqual(h, 28)
self.assertEqual(w, 28)
arg = feeder(gen_data(32, (3, 30, 32)))
h = arg.getSlotFrameHeight(0)
w = arg.getSlotFrameWidth(0)
self.assertEqual(h, 30)
self.assertEqual(w, 32)
if __name__ == '__main__': if __name__ == '__main__':
api.initPaddle("--use_gpu=0") api.initPaddle("--use_gpu=0")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册