From 044957087baa0aa58d34fa852b54612660f72561 Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Thu, 28 Jun 2018 17:50:27 +0800 Subject: [PATCH] Merge pull request #11712 from kexinzhao/fix_lod_name Replace lod with recursive_sequence_lengths in book examples --- doc/fluid/design/concepts/lod_tensor.md | 20 +++++ python/paddle/fluid/lod_tensor.py | 57 ++++++++------- .../test_label_semantic_roles_newapi.py | 28 +++---- .../test_machine_translation.py | 12 +-- .../test_recommender_system_newapi.py | 12 +-- .../test_understand_sentiment_conv.py | 10 +-- .../test_understand_sentiment_dynamic_rnn.py | 10 +-- .../test_understand_sentiment_stacked_lstm.py | 10 +-- .../word2vec/test_word2vec_new_api.py | 19 ++--- .../tests/book/notest_understand_sentiment.py | 16 ++-- .../tests/book/test_label_semantic_roles.py | 62 ++++++++++++---- .../tests/book/test_machine_translation.py | 14 ++-- .../tests/book/test_recommender_system.py | 12 +-- .../tests/book/test_rnn_encoder_decoder.py | 14 ++-- .../paddle/fluid/tests/book/test_word2vec.py | 21 +++--- python/paddle/fluid/tests/test_lod_tensor.py | 73 +++++++++++-------- 16 files changed, 236 insertions(+), 154 deletions(-) diff --git a/doc/fluid/design/concepts/lod_tensor.md b/doc/fluid/design/concepts/lod_tensor.md index d606d7a790b..748488f6d5f 100644 --- a/doc/fluid/design/concepts/lod_tensor.md +++ b/doc/fluid/design/concepts/lod_tensor.md @@ -173,6 +173,7 @@ are transformed into offsets of elements/words as follows: ## Slicing of LoD Tensors + When we use the above 2-level LoD Tensor as the input to a nested-RNN, we need to retrieve certain sequences. Here we define the sequence identified by branch as the **-slice**. For example, the <2>-slice of above example is @@ -189,3 +190,22 @@ and the <2,0>-slice of above slice is 10 12 || ``` + +## Length Representation vs Offset Representation + +The offset representation is an implementation-oriented decision and it makes understanding the idea behind LoDTensor difficult. +Hence, we encapsulate this implementation detail in C++ and expose the original length representation in our Python API. +Specifically, we call this length representation `recursive_sequence_lengths` and users can use the following code to set or get the `recursive_sequence_lengths` of a LoDTensor in Python: +```Python +# length representation of lod called recursive_sequence_lengths +recursive_seq_lens = [[3, 1, 2], [2, 2, 1, 3, 1, 2]] +# Create a LoDTensor that has the above recursive_sequence_lengths info. +# This recursive_sequence_lengths will be converted to an offset representation of LoD in the C++ implementation under the hood. +tensor = fluid.LoDTensor(lod) + +# Set/Change the recursive_sequence_lengths info of LoDTensor +tensor.set_recursive_sequence_lengths([[3, 1, 2]]) +# Get the recursive_sequence_lengths info of a LoDTensor (the offset-based LoD representation stored in C++ will be converted +# back to length-based recursive_sequence_lengths), new_recursive_seq_lens = [[3, 1, 2]] +new_recursive_seq_lens = tensor.recursive_sequence_lengths() +``` diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index c417ab393fc..b2b3186c1e8 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -18,15 +18,16 @@ import numpy as np __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] -def create_lod_tensor(data, lod, place): +def create_lod_tensor(data, recursive_seq_lens, place): """ Create a lod tensor from a numpy array, a list, or an existing lod tensor. Create a lod tensor by doing the following: - 1. Check that the length-based input lod is valid. + 1. Check that the length-based level of detail (LoD) also known as + recursive_sequence_lengths of the input is valid. - 2. Convert the length-based lod to a offset-based LoD. + 2. Convert recursive_sequence_lengths to a offset-based LoD. 3. Copy the data from a numpy array, a list or a existing lod tensor to CPU or GPU device (based on input place). @@ -37,45 +38,47 @@ def create_lod_tensor(data, lod, place): Suppose we want LoDTensor to hold data for sequences of word, where each word is represented by an integer. If we want to create a LoDTensor to - represent two sentences, one of 2 words, and one of 3 words. + represent two sentences, one of 2 words, and one of 3 words. Then :code:`data` can be a numpy array of integers with shape (5, 1). - :code:`lod` will be [[2, 3]], indicating the length(# of words) in each - sentence. This length-based input lod [[2, 3]] will be converted to - offset-based lod [[0, 2, 5]] inside the function call. + :code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each + sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to + offset-based LoD [[0, 2, 5]] inside the function call. Please reference :ref:`api_guide_low_level_lod_tensor` for more details regarding LoD. Args: data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a - list holding the data to be copied. - lod(list): a list of lists indicating the length-based LoD info - specified by the user. + list holding the data to be copied. + recursive_seq_lens(list): a list of lists indicating the length-based level of detail + info specified by the user. place(Place): CPU or GPU place indicating where the data in the new LoDTensor will be stored. Returns: - A fluid LoDTensor object with tensor data and lod info. + A fluid LoDTensor object with tensor data and recursive_seq_lens info. """ if isinstance(data, core.LoDTensor): - return create_lod_tensor(np.array(data), lod, place) + return create_lod_tensor(np.array(data), recursive_seq_lens, place) elif isinstance(data, list): # When input data is a list, it only deal with the case where the base element # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number # of words or other indexes in the sequence. - new_lod = [] + new_recursive_seq_lens = [] for seq in data: - new_lod.append(len(seq)) - assert [new_lod] == lod, "data and lod do not match" + new_recursive_seq_lens.append(len(seq)) + assert [ + new_recursive_seq_lens + ] == recursive_seq_lens, "data and recursive_seq_lens do not match" flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) - return create_lod_tensor(flattened_data, lod, place) + return create_lod_tensor(flattened_data, recursive_seq_lens, place) elif isinstance(data, np.ndarray): tensor = core.LoDTensor() tensor.set(data, place) - tensor.set_recursive_sequence_lengths(lod) + tensor.set_recursive_sequence_lengths(recursive_seq_lens) assert tensor.has_valid_recursive_sequence_lengths( ), "the provided lod info is invalid" return tensor @@ -84,7 +87,8 @@ def create_lod_tensor(data, lod, place): "data should be either a LoDTensor, a Numpy array or a list") -def create_random_int_lodtensor(lod, base_shape, place, low, high): +def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, + high): """ Create a LoDTensor containing random integers. @@ -95,7 +99,7 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high): The function does the following: 1. Calculate the overall shape of the LoDTensor based on the length-based - :code:`lod` input and the shape of the basic element in + :code:`recursive_seq_lens` input and the shape of the basic element in :code:`base_shape`. 2. Create a numpy array of this shape. @@ -105,12 +109,13 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high): Suppose we want LoDTensor to hold data for sequences of word, where each word is represented by an integer. If we want to create a LoDTensor to represent two sentences, one of 2 words, and one of 3 words. Then - 'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall - shape of the LoDTensor would be [5, 1], holding 5 words for two sentences. + 'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]]. + Then the overall shape of the LoDTensor would be [5, 1], holding 5 words + for two sentences. Args: - lod(list): a list of lists indicating the length-based LoD info - specified by the user. + recursive_seq_lens(list): a list of lists indicating the length-based + level of detail info specified by the user. base_shape(list): the shape of the basic element to be held by the LoDTensor. place(Place): CPU or GPU place indicating where the data in the new @@ -119,11 +124,11 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high): high(int): the upper bound of the random integers. Returns: - A fluid LoDTensor object with tensor data and lod info. + A fluid LoDTensor object with tensor data and recursive_seq_lens info. """ assert isinstance(base_shape, list), "base_shape should be a list" # append the total number of basic elements to the front of its shape - overall_shape = [sum(lod[-1])] + base_shape + overall_shape = [sum(recursive_seq_lens[-1])] + base_shape # the range of integer data elements is [low, high] data = np.random.random_integers(low, high, overall_shape).astype("int64") - return create_lod_tensor(data, lod, place) + return create_lod_tensor(data, recursive_seq_lens, place) diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index 0ccb3a39e02..67aa21e8c56 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -206,35 +206,35 @@ def infer(use_cuda, inference_program, params_dirname): inferencer = fluid.Inferencer( inference_program, param_path=params_dirname, place=place) - # Setup inputs by creating LoDTensors to represent sequences of words. - # Here each word is the basic element of these LoDTensors and the shape of + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensors will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_n2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_n1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_0 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_p1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_p2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) pred = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=PRED_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=PRED_DICT_LEN - 1) mark = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=MARK_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=MARK_DICT_LEN - 1) results = inferencer.infer( { diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py index c4b37df3a09..12c4134dc93 100644 --- a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py @@ -215,11 +215,13 @@ def decode_main(use_cuda, is_sparse): [1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) - init_lod = [1] * batch_size - init_lod = [init_lod, init_lod] + init_recursive_seq_lens = [1] * batch_size + init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens] - init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) - init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) + init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, + place) + init_scores = fluid.create_lod_tensor(init_scores_data, + init_recursive_seq_lens, place) train_data = paddle.batch( paddle.reader.shuffle( @@ -243,7 +245,7 @@ def decode_main(use_cuda, is_sparse): feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) - print result_ids.lod() + print result_ids.recursive_sequence_lengths() break diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py index 090c11ce1e7..c860f164170 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -209,13 +209,15 @@ def infer(use_cuda, inference_program, params_dirname): inference_program, param_path=params_dirname, place=place) # Use the first data from paddle.dataset.movielens.test() as input. - # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor, - # where `data` is a list of sequences of index numbers, `lod` is - # the level of detail (lod) info associated with `data`. + # Use create_lod_tensor(data, recursive_sequence_lengths, place) API + # to generate LoD Tensor where `data` is a list of sequences of index + # numbers, `recursive_sequence_lengths` is the length-based level of detail + # (lod) info associated with `data`. # For example, data = [[10, 2, 3], [2, 3]] means that it contains # two sequences of indexes, of length 3 and 2, respectively. - # Correspondingly, lod = [[3, 2]] contains one level of detail info, - # indicating that `data` consists of two sequences of length 3 and 2. + # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one + # level of detail info, indicating that `data` consists of two sequences + # of length 3 and 2, respectively. user_id = fluid.create_lod_tensor([[1]], [[1]], place) gender_id = fluid.create_lod_tensor([[1]], [[1]], place) age_id = fluid.create_lod_tensor([[0]], [[1]], place) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 9b61f7a00ce..1668ae83d35 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -128,17 +128,17 @@ def infer(use_cuda, inference_program, params_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index aa7c567b4d6..8da89d82cb8 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -143,17 +143,17 @@ def infer(use_cuda, inference_program, params_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index 8c74be0f088..74faa2e8aa7 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -138,17 +138,17 @@ def infer(use_cuda, inference_program, params_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index ba44f72d9b0..02e65cf56c4 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -124,21 +124,22 @@ def infer(use_cuda, inference_program, params_dirname=None): # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word # is simply an index to look up for the corresponding word vector and hence - # the shape of word (base_shape) should be [1]. The length-based level of - # detail (lod) info of each LoDtensor should be [[1]] meaning there is only - # one lod_level and there is only one sequence of one word on this level. - # Note that lod info should be a list of lists. - lod = [[1]] + # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths, + # which is length-based level of detail (lod) of each LoDTensor, should be [[1]] + # meaning there is only one level of detail and there is only one sequence of + # one word on this level. + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[1]] base_shape = [1] # The range of random integers is [low, high] first_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) second_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) third_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) fourth_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) result = inferencer.infer( { diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 5d9a47c9ba3..1df7b99aad6 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -238,17 +238,21 @@ def infer(word_dict, use_cuda, save_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -257,7 +261,7 @@ def infer(word_dict, use_cuda, save_dirname=None): feed={feed_target_names[0]: tensor_words}, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference Shape: ", np_data.shape) print("Inference results: ", np_data) diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index e214ced0b55..d489feae9c5 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -247,35 +247,67 @@ def infer(use_cuda, save_dirname=None): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) - # Setup inputs by creating LoDTensors to represent sequences of words. - # Here each word is the basic element of these LoDTensors and the shape of + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensors will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) pred = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=pred_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=pred_dict_len - 1) ctx_n2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_n1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_0 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_p1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_p2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) mark = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=mark_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=mark_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -301,7 +333,7 @@ def infer(use_cuda, save_dirname=None): }, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference Shape: ", np_data.shape) diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 372d6ec8223..a68eae5bcb2 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -108,7 +108,7 @@ def decoder_decode(context, is_sparse): pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) - # expand the lod of pre_state to be the same with pre_score + # expand the recursive_sequence_lengths of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( @@ -238,11 +238,13 @@ def decode_main(use_cuda, is_sparse): [1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) - init_lod = [1] * batch_size - init_lod = [init_lod, init_lod] + init_recursive_seq_lens = [1] * batch_size + init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens] - init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) - init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) + init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, + place) + init_scores = fluid.create_lod_tensor(init_scores_data, + init_recursive_seq_lens, place) train_data = paddle.batch( paddle.reader.shuffle( @@ -266,7 +268,7 @@ def decode_main(use_cuda, is_sparse): feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) - print result_ids.lod() + print result_ids.recursive_sequence_lengths() break diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 937d8dd5b06..6548766ef5d 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -260,13 +260,15 @@ def infer(use_cuda, save_dirname=None): # Use the first data from paddle.dataset.movielens.test() as input assert feed_target_names[0] == "user_id" - # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor - # where `data` is a list of sequences of index numbers, `lod` is - # the level of detail (lod) info associated with `data`. + # Use create_lod_tensor(data, recursive_sequence_lengths, place) API + # to generate LoD Tensor where `data` is a list of sequences of index + # numbers, `recursive_sequence_lengths` is the length-based level of detail + # (lod) info associated with `data`. # For example, data = [[10, 2, 3], [2, 3]] means that it contains # two sequences of indexes, of length 3 and 2, respectively. - # Correspondingly, lod = [[3, 2]] contains one level of detail info, - # indicating that `data` consists of two sequences of length 3 and 2. + # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one + # level of detail info, indicating that `data` consists of two sequences + # of length 3 and 2, respectively. user_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[1] == "gender_id" diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py index 7ada57def6b..46728262415 100644 --- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py @@ -216,19 +216,19 @@ def infer(use_cuda, save_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[4, 6]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[4, 6]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for two sentences of # length 4 and 6, respectively. - # Note that lod info should be a list of lists. - lod = [[4, 6]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[4, 6]] base_shape = [1] # The range of random integers is [low, high] word_data = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=1) + recursive_seq_lens, base_shape, place, low=0, high=1) trg_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=1) + recursive_seq_lens, base_shape, place, low=0, high=1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -241,7 +241,7 @@ def infer(use_cuda, save_dirname=None): }, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference shape: ", np_data.shape) print("Inference results: ", np_data) diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 75bed06bd7a..49bd72c7a53 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -168,21 +168,22 @@ def infer(use_cuda, save_dirname=None): # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word # is simply an index to look up for the corresponding word vector and hence - # the shape of word (base_shape) should be [1]. The length-based level of - # detail (lod) info of each LoDtensor should be [[1]] meaning there is only - # one lod_level and there is only one sequence of one word on this level. - # Note that lod info should be a list of lists. - lod = [[1]] + # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths, + # which is length-based level of detail (lod) of each LoDTensor, should be [[1]] + # meaning there is only one level of detail and there is only one sequence of + # one word on this level. + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[1]] base_shape = [1] # The range of random integers is [low, high] first_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) second_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) third_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) fourth_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) assert feed_target_names[0] == 'firstw' assert feed_target_names[1] == 'secondw' @@ -200,7 +201,7 @@ def infer(use_cuda, save_dirname=None): }, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference Shape: ", np_data.shape) diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index b7e7f5801fb..f7a9dd41290 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -19,18 +19,21 @@ import unittest class TestLoDTensor(unittest.TestCase): - def test_pybind_lod(self): + def test_pybind_recursive_seq_lens(self): tensor = fluid.LoDTensor() - lod = [] - tensor.set_recursive_sequence_lengths(lod) - lod = [[], [1], [3]] - self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod) - lod = [[0], [2], [3]] - self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod) + recursive_seq_lens = [] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + recursive_seq_lens = [[], [1], [3]] + self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, + recursive_seq_lens) + recursive_seq_lens = [[0], [2], [3]] + self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, + recursive_seq_lens) - lod = [[1, 2, 3]] - tensor.set_recursive_sequence_lengths(lod) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + recursive_seq_lens = [[1, 2, 3]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) tensor.set(np.random.random([6, 1]), fluid.CPUPlace()) self.assertTrue(tensor.has_valid_recursive_sequence_lengths()) tensor.set(np.random.random([9, 1]), fluid.CPUPlace()) @@ -38,13 +41,14 @@ class TestLoDTensor(unittest.TestCase): # Each level's sum should be equal to the number of items in the next level # Moreover, last level's sum should be equal to the tensor height - lod = [[2, 3], [1, 3, 1, 2, 2]] - tensor.set_recursive_sequence_lengths(lod) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + recursive_seq_lens = [[2, 3], [1, 3, 1, 2, 2]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) tensor.set(np.random.random([8, 1]), fluid.CPUPlace()) self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) - lod = [[2, 3], [1, 3, 1, 2, 1]] - tensor.set_recursive_sequence_lengths(lod) + recursive_seq_lens = [[2, 3], [1, 3, 1, 2, 1]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) self.assertTrue(tensor.has_valid_recursive_sequence_lengths()) tensor.set(np.random.random([9, 1]), fluid.CPUPlace()) self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) @@ -52,35 +56,42 @@ class TestLoDTensor(unittest.TestCase): def test_create_lod_tensor(self): # Create LoDTensor from a list data = [[1, 2, 3], [3, 4]] - wrong_lod = [[2, 2]] - correct_lod = [[3, 2]] - self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, - fluid.CPUPlace()) - tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) - self.assertEqual(tensor.recursive_sequence_lengths(), correct_lod) + wrong_recursive_seq_lens = [[2, 2]] + correct_recursive_seq_lens = [[3, 2]] + self.assertRaises(AssertionError, create_lod_tensor, data, + wrong_recursive_seq_lens, fluid.CPUPlace()) + tensor = create_lod_tensor(data, correct_recursive_seq_lens, + fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + correct_recursive_seq_lens) # Create LoDTensor from numpy array data = np.random.random([10, 1]) - lod = [[2, 1], [3, 3, 4]] - tensor = create_lod_tensor(data, lod, fluid.CPUPlace()) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + recursive_seq_lens = [[2, 1], [3, 3, 4]] + tensor = create_lod_tensor(data, recursive_seq_lens, fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) # Create LoDTensor from another LoDTensor, they are differnt instances - new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]] - new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace()) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) - self.assertEqual(new_tensor.recursive_sequence_lengths(), new_lod) + new_recursive_seq_lens = [[2, 2, 1], [1, 2, 2, 3, 2]] + new_tensor = create_lod_tensor(tensor, new_recursive_seq_lens, + fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) + self.assertEqual(new_tensor.recursive_sequence_lengths(), + new_recursive_seq_lens) def test_create_random_int_lodtensor(self): # The shape of a word, commonly used in speech and NLP problem, is [1] shape = [1] - lod = [[2, 3, 5]] + recursive_seq_lens = [[2, 3, 5]] dict_size = 10000 low = 0 high = dict_size - 1 - tensor = create_random_int_lodtensor(lod, shape, + tensor = create_random_int_lodtensor(recursive_seq_lens, shape, fluid.CPUPlace(), low, high) - self.assertEqual(tensor.recursive_sequence_lengths(), lod) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) self.assertEqual(tensor.shape(), [10, 1]) -- GitLab