提交 67b8150f 编写于 作者: D dangqingqing

data converter test

上级 29c5c878
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import py_paddle.swig_paddle as api
import numpy as np
import paddle.trainer.PyDataProvider2 as dp2
from paddle.v2.data_converter import DataConverter
class DataConverterTest(unittest.TestCase):
def dense_reader(self, shape):
data = np.random.random(shape)
return data
def sparse_binary_reader(self,
high,
size_limit,
batch_size,
non_empty=False):
data = []
for i in xrange(batch_size):
num = np.random.randint(size_limit) # num could be 0
while non_empty and num == 0:
num = np.random.randint(size_limit)
data.append(np.random.randint(high, size=num).tolist())
return data
def test_dense_vector(self):
def compare(input):
converter = DataConverter([('image', dp2.dense_vector(784))])
arg = converter([input], {'image': 0})
output = arg.getSlotValue(0).copyToNumpyMat()
input = np.array(input, dtype='float32')
self.assertAlmostEqual(input.all(), output.all())
# test numpy array
data = self.dense_reader(shape=[32, 784])
compare(data)
# test list
compare(data.tolist())
#def test_sparse_binary(self):
# dim = 100000
# data = self.sparse_binary_reader(dim, 5, 2)
# converter = DataConverter([('input', dp2.sparse_binary_vector(dim))])
# arg = converter([data], {'input':0})
# output = arg.getSlotValue(0)
#def test_sparse(self):
# dim = 100000
# v = self.sparse_binary_reader(dim, 5, 2)
# w = []
# for dat in data:
# x = self.dense_reader(shape=[1, len(dat)])
# w.append(x.tolist())
# data = []
# for each in zip(v, w):
# data.append(zip(each[0], each[1]))
#
# converter = DataConverter([('input', dp2.sparse_binary_vector(dim))])
# arg = converter([data], {'input':0})
# output = arg.getSlotValue(0)
def test_integer(self):
dim = 100
index = np.random.randint(dim, size=32)
print index
converter = DataConverter([('input', dp2.integer_value(dim))])
arg = converter([index], {'input': 0})
print arg.getSlotValue(0)
output = arg.getSlotValue(0).copyToNumpyArray()
print 'output=', output
if __name__ == '__main__':
unittest.main()
...@@ -53,9 +53,9 @@ class DenseConvert(IDataConverter): ...@@ -53,9 +53,9 @@ class DenseConvert(IDataConverter):
:type argument: Paddle's Arguments :type argument: Paddle's Arguments
""" """
assert isinstance(argument, api.Arguments) assert isinstance(argument, api.Arguments)
if data.dtype != np.float32: # TODO: handle data type (float, double, ...)
data = data.astype(np.float32) data = np.array(data, np.float32)
m = api.Matrix.createDenseFromNumpy(data, True, False) m = api.Matrix.createDenseFromNumpy(data)
argument.setSlotValue(self.pos, m) argument.setSlotValue(self.pos, m)
...@@ -72,12 +72,12 @@ class SparseBinaryConvert(IDataConverter): ...@@ -72,12 +72,12 @@ class SparseBinaryConvert(IDataConverter):
self.__height__ = len(data) self.__height__ = len(data)
for x in data: for x in data:
self.__rows__.append(self.__rows__[-1] + len(x)) self.__rows__.append(self.__rows__[-1] + len(x))
self.__cols__ = data.flatten() self.__cols__.extend(x)
def convert(self, data, argument): def convert(self, data, argument):
assert isinstance(argument, api.Arguments) assert isinstance(argument, api.Arguments)
fill_csr(data) self.fill_csr(data)
m = api.Matrix.createSparse(self.__height__, self.input_type.dim, m = api.Matrix.createSparse(self.__height__, self.input_type.dim,
len(self.__cols__), len(self.__cols__),
len(self.__value__) == 0) len(self.__value__) == 0)
...@@ -94,8 +94,8 @@ class SparseFloatConvert(SparseBinaryConvert): ...@@ -94,8 +94,8 @@ class SparseFloatConvert(SparseBinaryConvert):
self.__height__ = len(data) self.__height__ = len(data)
for x in data: for x in data:
self.__rows__.append(self.__rows__[-1] + len(x)) self.__rows__.append(self.__rows__[-1] + len(x))
self.__cols__.extend((x[0] for x in data)) self.__cols__.extend(x[0])
self.__value__.extend((x[1] for x in data)) self.__value__.extend(x[1])
class IndexConvert(IDataConverter): class IndexConvert(IDataConverter):
...@@ -105,7 +105,10 @@ class IndexConvert(IDataConverter): ...@@ -105,7 +105,10 @@ class IndexConvert(IDataConverter):
def convert(self, data, argument): def convert(self, data, argument):
assert isinstance(argument, api.Arguments) assert isinstance(argument, api.Arguments)
self.__ids__ = data.flatten() #for x in data:
# self.__ids__.append(x)
self.__ids__.extend(x)
ids = api.IVector.create(self.__ids__) ids = api.IVector.create(self.__ids__)
argument.setSlotIds(self.pos, ids) argument.setSlotIds(self.pos, ids)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import py_paddle.swig_paddle as api
import numpy as np
import paddle.trainer.PyDataProvider2 as dp2
from paddle.v2.data_converter import DataConverter
class DataConverterTest(unittest.TestCase):
def dense_reader(self, shape):
data = np.random.random(shape)
return data
def sparse_binary_reader(self,
high,
size_limit,
batch_size,
non_empty=False):
data = []
for i in xrange(batch_size):
num = np.random.randint(size_limit) # num could be 0
while non_empty and num == 0:
num = np.random.randint(size_limit)
data.append(np.random.randint(high, size=num).tolist())
return data
def test_dense_vector(self):
def compare(input):
converter = DataConverter([('image', dp2.dense_vector(784))])
arg = converter([input], {'image': 0})
output = arg.getSlotValue(0).copyToNumpyMat()
input = np.array(input, dtype='float32')
self.assertAlmostEqual(input.all(), output.all())
# test numpy array
data = self.dense_reader(shape=[32, 784])
compare(data)
# test list
compare(data.tolist())
#def test_sparse_binary(self):
# dim = 100000
# data = self.sparse_binary_reader(dim, 5, 2)
# converter = DataConverter([('input', dp2.sparse_binary_vector(dim))])
# arg = converter([data], {'input':0})
# output = arg.getSlotValue(0)
#def test_sparse(self):
# dim = 100000
# v = self.sparse_binary_reader(dim, 5, 2)
# w = []
# for dat in data:
# x = self.dense_reader(shape=[1, len(dat)])
# w.append(x.tolist())
# data = []
# for each in zip(v, w):
# data.append(zip(each[0], each[1]))
#
# converter = DataConverter([('input', dp2.sparse_binary_vector(dim))])
# arg = converter([data], {'input':0})
# output = arg.getSlotValue(0)
def test_integer(self):
dim = 100
index = np.random.randint(dim, size=32)
print index
converter = DataConverter([('input', dp2.integer_value(dim))])
arg = converter([index], {'input': 0})
print arg.getSlotValue(0)
output = arg.getSlotValue(0).copyToNumpyArray()
print 'output=', output
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册