From d73f2bd6bd37282c3b8e281a9343870dfc23e05f Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Tue, 15 May 2018 15:58:40 -0700 Subject: [PATCH] fix data_feeder lod bug --- python/paddle/fluid/data_feeder.py | 4 +- python/paddle/fluid/tests/test_data_feeder.py | 61 ++++++++++++++++--- 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 0051b698471..a44e078d0c1 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -54,9 +54,9 @@ class DataToLoDTensorConverter(object): self.data.append(data) else: cur_lod_len = len(data) - lod[-1].append(lod[-1][-1] + cur_lod_len) + lod[0].append(lod[0][-1] + cur_lod_len) for each_data in data: - self._feed_impl_(each_data, lod[:-1], lod_level - 1) + self._feed_impl_(each_data, lod[1:], lod_level - 1) def done(self): arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) diff --git a/python/paddle/fluid/tests/test_data_feeder.py b/python/paddle/fluid/tests/test_data_feeder.py index 861dd3174a2..ce3ba3ebc50 100644 --- a/python/paddle/fluid/tests/test_data_feeder.py +++ b/python/paddle/fluid/tests/test_data_feeder.py @@ -13,15 +13,62 @@ # limitations under the License. import paddle.fluid as fluid +import unittest -def test_converter(): - img = fluid.layers.data(name='image', shape=[1, 28, 28]) - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) - result = feeder.feed([[[0] * 784, [9]], [[1] * 784, [1]]]) - print(result) +class TestDataFeeder(unittest.TestCase): + def test_lod_level_0_converter(self): + img = fluid.layers.data(name='image', shape=[1, 28, 28]) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) + result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])]) + print(result) + + self.assertEqual(result['image'].shape(), [2, 1, 28, 28]) + self.assertEqual(result['label'].shape(), [2, 1]) + self.assertEqual(result['image'].lod(), []) + self.assertEqual(result['label'].lod(), []) + + def test_lod_level_1_converter(self): + # lod_level = 1 + # each sentence has a different number of words + sentences = fluid.layers.data( + name='sentences', shape=[1], dtype='int64', lod_level=1) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder([sentences, label], fluid.CPUPlace()) + + # lod = [[0, 3, 5, 9]] + # data = [[1, 2, 3], [4, 5], [6, 7, 8, 9]] + # label = [1] * len(data) + result = feeder.feed( + [([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])]) + print(result) + + self.assertEqual(result['sentences'].shape(), [9, 1]) + self.assertEqual(result['label'].shape(), [3, 1]) + self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]]) + self.assertEqual(result['label'].lod(), []) + + def test_lod_level_2_converter(self): + # lod_level = 2 + # paragraphs -> sentences -> words + paragraphs = fluid.layers.data( + name='paragraphs', shape=[1], dtype='int64', lod_level=2) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder([paragraphs, label], fluid.CPUPlace()) + + # lod = [[0, 2, 3], [0, 3, 5, 9]] + # data = [[[1, 2, 3], [4, 5]], [[6, 7, 8, 9]]] + # label = [1] * len(data) + result = feeder.feed( + [([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])]) + print(result) + + self.assertEqual(result['paragraphs'].shape(), [9, 1]) + self.assertEqual(result['label'].shape(), [2, 1]) + self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]]) + self.assertEqual(result['label'].lod(), []) if __name__ == '__main__': - test_converter() + unittest.main() -- GitLab