From b7c179a87fc370c4a4b176621b5176c0aff5a7d1 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Tue, 26 Jun 2018 23:26:10 -0700 Subject: [PATCH] fix lodtensor.py --- python/paddle/fluid/lod_tensor.py | 57 ++++++++------- python/paddle/fluid/tests/test_lod_tensor.py | 73 +++++++++++--------- 2 files changed, 73 insertions(+), 57 deletions(-) diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index c417ab393..b2b3186c1 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -18,15 +18,16 @@ import numpy as np __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] -def create_lod_tensor(data, lod, place): +def create_lod_tensor(data, recursive_seq_lens, place): """ Create a lod tensor from a numpy array, a list, or an existing lod tensor. Create a lod tensor by doing the following: - 1. Check that the length-based input lod is valid. + 1. Check that the length-based level of detail (LoD) also known as + recursive_sequence_lengths of the input is valid. - 2. Convert the length-based lod to a offset-based LoD. + 2. Convert recursive_sequence_lengths to a offset-based LoD. 3. Copy the data from a numpy array, a list or a existing lod tensor to CPU or GPU device (based on input place). @@ -37,45 +38,47 @@ def create_lod_tensor(data, lod, place): Suppose we want LoDTensor to hold data for sequences of word, where each word is represented by an integer. If we want to create a LoDTensor to - represent two sentences, one of 2 words, and one of 3 words. + represent two sentences, one of 2 words, and one of 3 words. Then :code:`data` can be a numpy array of integers with shape (5, 1). - :code:`lod` will be [[2, 3]], indicating the length(# of words) in each - sentence. This length-based input lod [[2, 3]] will be converted to - offset-based lod [[0, 2, 5]] inside the function call. + :code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each + sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to + offset-based LoD [[0, 2, 5]] inside the function call. Please reference :ref:`api_guide_low_level_lod_tensor` for more details regarding LoD. Args: data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a - list holding the data to be copied. - lod(list): a list of lists indicating the length-based LoD info - specified by the user. + list holding the data to be copied. + recursive_seq_lens(list): a list of lists indicating the length-based level of detail + info specified by the user. place(Place): CPU or GPU place indicating where the data in the new LoDTensor will be stored. Returns: - A fluid LoDTensor object with tensor data and lod info. + A fluid LoDTensor object with tensor data and recursive_seq_lens info. """ if isinstance(data, core.LoDTensor): - return create_lod_tensor(np.array(data), lod, place) + return create_lod_tensor(np.array(data), recursive_seq_lens, place) elif isinstance(data, list): # When input data is a list, it only deal with the case where the base element # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number # of words or other indexes in the sequence. - new_lod = [] + new_recursive_seq_lens = [] for seq in data: - new_lod.append(len(seq)) - assert [new_lod] == lod, "data and lod do not match" + new_recursive_seq_lens.append(len(seq)) + assert [ + new_recursive_seq_lens + ] == recursive_seq_lens, "data and recursive_seq_lens do not match" flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) - return create_lod_tensor(flattened_data, lod, place) + return create_lod_tensor(flattened_data, recursive_seq_lens, place) elif isinstance(data, np.ndarray): tensor = core.LoDTensor() tensor.set(data, place) - tensor.set_recursive_sequence_lengths(lod) + tensor.set_recursive_sequence_lengths(recursive_seq_lens) assert tensor.has_valid_recursive_sequence_lengths( ), "the provided lod info is invalid" return tensor @@ -84,7 +87,8 @@ def create_lod_tensor(data, lod, place): "data should be either a LoDTensor, a Numpy array or a list") -def create_random_int_lodtensor(lod, base_shape, place, low, high): +def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, + high): """ Create a LoDTensor containing random integers. @@ -95,7 +99,7 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high): The function does the following: 1. Calculate the overall shape of the LoDTensor based on the length-based - :code:`lod` input and the shape of the basic element in + :code:`recursive_seq_lens` input and the shape of the basic element in :code:`base_shape`. 2. Create a numpy array of this shape. @@ -105,12 +109,13 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high): Suppose we want LoDTensor to hold data for sequences of word, where each word is represented by an integer. If we want to create a LoDTensor to represent two sentences, one of 2 words, and one of 3 words. Then - 'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall - shape of the LoDTensor would be [5, 1], holding 5 words for two sentences. + 'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]]. + Then the overall shape of the LoDTensor would be [5, 1], holding 5 words + for two sentences. Args: - lod(list): a list of lists indicating the length-based LoD info - specified by the user. + recursive_seq_lens(list): a list of lists indicating the length-based + level of detail info specified by the user. base_shape(list): the shape of the basic element to be held by the LoDTensor. place(Place): CPU or GPU place indicating where the data in the new @@ -119,11 +124,11 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high): high(int): the upper bound of the random integers. Returns: - A fluid LoDTensor object with tensor data and lod info. + A fluid LoDTensor object with tensor data and recursive_seq_lens info. """ assert isinstance(base_shape, list), "base_shape should be a list" # append the total number of basic elements to the front of its shape - overall_shape = [sum(lod[-1])] + base_shape + overall_shape = [sum(recursive_seq_lens[-1])] + base_shape # the range of integer data elements is [low, high] data = np.random.random_integers(low, high, overall_shape).astype("int64") - return create_lod_tensor(data, lod, place) + return create_lod_tensor(data, recursive_seq_lens, place) diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index b7e7f5801..f7a9dd412 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -19,18 +19,21 @@ import unittest class TestLoDTensor(unittest.TestCase): - def test_pybind_lod(self): + def test_pybind_recursive_seq_lens(self): tensor = fluid.LoDTensor() - lod = [] - tensor.set_recursive_sequence_lengths(lod) - lod = [[], [1], [3]] - self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod) - lod = [[0], [2], [3]] - self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod) + recursive_seq_lens = [] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + recursive_seq_lens = [[], [1], [3]] + self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, + recursive_seq_lens) + recursive_seq_lens = [[0], [2], [3]] + self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, + recursive_seq_lens) - lod = [[1, 2, 3]] - tensor.set_recursive_sequence_lengths(lod) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + recursive_seq_lens = [[1, 2, 3]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) tensor.set(np.random.random([6, 1]), fluid.CPUPlace()) self.assertTrue(tensor.has_valid_recursive_sequence_lengths()) tensor.set(np.random.random([9, 1]), fluid.CPUPlace()) @@ -38,13 +41,14 @@ class TestLoDTensor(unittest.TestCase): # Each level's sum should be equal to the number of items in the next level # Moreover, last level's sum should be equal to the tensor height - lod = [[2, 3], [1, 3, 1, 2, 2]] - tensor.set_recursive_sequence_lengths(lod) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + recursive_seq_lens = [[2, 3], [1, 3, 1, 2, 2]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) tensor.set(np.random.random([8, 1]), fluid.CPUPlace()) self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) - lod = [[2, 3], [1, 3, 1, 2, 1]] - tensor.set_recursive_sequence_lengths(lod) + recursive_seq_lens = [[2, 3], [1, 3, 1, 2, 1]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) self.assertTrue(tensor.has_valid_recursive_sequence_lengths()) tensor.set(np.random.random([9, 1]), fluid.CPUPlace()) self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) @@ -52,35 +56,42 @@ class TestLoDTensor(unittest.TestCase): def test_create_lod_tensor(self): # Create LoDTensor from a list data = [[1, 2, 3], [3, 4]] - wrong_lod = [[2, 2]] - correct_lod = [[3, 2]] - self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, - fluid.CPUPlace()) - tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) - self.assertEqual(tensor.recursive_sequence_lengths(), correct_lod) + wrong_recursive_seq_lens = [[2, 2]] + correct_recursive_seq_lens = [[3, 2]] + self.assertRaises(AssertionError, create_lod_tensor, data, + wrong_recursive_seq_lens, fluid.CPUPlace()) + tensor = create_lod_tensor(data, correct_recursive_seq_lens, + fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + correct_recursive_seq_lens) # Create LoDTensor from numpy array data = np.random.random([10, 1]) - lod = [[2, 1], [3, 3, 4]] - tensor = create_lod_tensor(data, lod, fluid.CPUPlace()) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + recursive_seq_lens = [[2, 1], [3, 3, 4]] + tensor = create_lod_tensor(data, recursive_seq_lens, fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) # Create LoDTensor from another LoDTensor, they are differnt instances - new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]] - new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace()) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) - self.assertEqual(new_tensor.recursive_sequence_lengths(), new_lod) + new_recursive_seq_lens = [[2, 2, 1], [1, 2, 2, 3, 2]] + new_tensor = create_lod_tensor(tensor, new_recursive_seq_lens, + fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) + self.assertEqual(new_tensor.recursive_sequence_lengths(), + new_recursive_seq_lens) def test_create_random_int_lodtensor(self): # The shape of a word, commonly used in speech and NLP problem, is [1] shape = [1] - lod = [[2, 3, 5]] + recursive_seq_lens = [[2, 3, 5]] dict_size = 10000 low = 0 high = dict_size - 1 - tensor = create_random_int_lodtensor(lod, shape, + tensor = create_random_int_lodtensor(recursive_seq_lens, shape, fluid.CPUPlace(), low, high) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) self.assertEqual(tensor.shape(), [10, 1]) -- GitLab