lod_tensor.py 6.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

17
from . import core
18 19 20 21 22
import numpy as np

__all__ = ['create_lod_tensor', 'create_random_int_lodtensor']


K
Kexin Zhao 已提交
23
def create_lod_tensor(data, recursive_seq_lens, place):
Y
yuyang18 已提交
24 25
    """
    Create a lod tensor from a numpy array, a list, or an existing lod tensor.
26 27

    Create a lod tensor by doing the following:
Y
yuyang18 已提交
28

29
    1. Check that the length-based level of detail (LoD) also known as
K
Kexin Zhao 已提交
30
       recursive_sequence_lengths of the input is valid.
Y
yuyang18 已提交
31

K
Kexin Zhao 已提交
32
    2. Convert recursive_sequence_lengths to a offset-based LoD.
Y
yuyang18 已提交
33 34

    3. Copy the data from a numpy array, a list or a existing lod tensor to
35
       CPU or GPU device (based on input place).
Y
yuyang18 已提交
36

37
    4. Set the level of detail (LoD) using the offset-based LoD.
38

Y
yuyang18 已提交
39
    Examples:
40

Y
yuyang18 已提交
41 42
        Suppose we want LoDTensor to hold data for sequences of word, where each
        word is represented by an integer. If we want to create a LoDTensor to
K
Kexin Zhao 已提交
43
        represent two sentences, one of 2 words, and one of 3 words.
44

Y
yuyang18 已提交
45
        Then :code:`data` can be a numpy array of integers with shape (5, 1).
K
Kexin Zhao 已提交
46 47 48
        :code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each
        sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to
        offset-based LoD [[0, 2, 5]] inside the function call.
Y
yuyang18 已提交
49

Z
Zeng Jinle 已提交
50 51 52 53 54 55 56
        .. code-block:: python

          import paddle.fluid as fluid
          import numpy as np

          t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace())

Y
yuyang18 已提交
57 58
    Please reference :ref:`api_guide_low_level_lod_tensor` for more details
    regarding LoD.
59 60

    Args:
Y
yuyang18 已提交
61
        data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
K
Kexin Zhao 已提交
62
            list holding the data to be copied.
63
        recursive_seq_lens(list): a list of lists indicating the length-based level of detail
K
Kexin Zhao 已提交
64
            info specified by the user.
Y
yuyang18 已提交
65 66
        place(Place): CPU or GPU place indicating where the data in the new
            LoDTensor will be stored.
67 68

    Returns:
K
Kexin Zhao 已提交
69
        A fluid LoDTensor object with tensor data and recursive_seq_lens info.
70 71
    """
    if isinstance(data, core.LoDTensor):
K
Kexin Zhao 已提交
72
        return create_lod_tensor(np.array(data), recursive_seq_lens, place)
73
    elif isinstance(data, list):
74 75 76 77
        # When input data is a list, it only deal with the case where the base element
        # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
        # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
        # of words or other indexes in the sequence.
K
Kexin Zhao 已提交
78
        new_recursive_seq_lens = []
79
        for seq in data:
K
Kexin Zhao 已提交
80 81 82 83
            new_recursive_seq_lens.append(len(seq))
        assert [
            new_recursive_seq_lens
        ] == recursive_seq_lens, "data and recursive_seq_lens do not match"
T
tensor-tang 已提交
84
        flattened_data = np.concatenate(data, axis=0)
85
        flattened_data = flattened_data.reshape([len(flattened_data), 1])
K
Kexin Zhao 已提交
86
        return create_lod_tensor(flattened_data, recursive_seq_lens, place)
87 88 89
    elif isinstance(data, np.ndarray):
        tensor = core.LoDTensor()
        tensor.set(data, place)
K
Kexin Zhao 已提交
90
        tensor.set_recursive_sequence_lengths(recursive_seq_lens)
91 92
        assert tensor.has_valid_recursive_sequence_lengths(
        ), "the provided lod info is invalid"
93 94
        return tensor
    else:
95 96
        raise TypeError(
            "data should be either a LoDTensor, a Numpy array or a list")
97 98


K
Kexin Zhao 已提交
99 100
def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low,
                                high):
Y
yuyang18 已提交
101 102
    """
    Create a LoDTensor containing random integers.
103

Y
yuyang18 已提交
104 105 106
    This function is frequently used in the book examples. So we revised it
    based on the new create_lod_tensor API and put it here in the lod_tensor
    module to simplify the code.
107 108

    The function does the following:
Y
yuyang18 已提交
109 110

    1. Calculate the overall shape of the LoDTensor based on the length-based
K
Kexin Zhao 已提交
111
       :code:`recursive_seq_lens` input and the shape of the basic element in
Y
yuyang18 已提交
112 113
       :code:`base_shape`.

114
    2. Create a numpy array of this shape.
Y
yuyang18 已提交
115

116 117
    3. Create the LoDTensor using create_lod_tensor API.

Y
yuyang18 已提交
118 119 120
    Suppose we want LoDTensor to hold data for sequences of word, where each
    word is represented by an integer. If we want to create a LoDTensor to
    represent two sentences, one of 2 words, and one of 3 words. Then
121 122
    'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]].
    Then the overall shape of the LoDTensor would be [5, 1], holding 5 words
K
Kexin Zhao 已提交
123
    for two sentences.
124 125

    Args:
126
        recursive_seq_lens(list): a list of lists indicating the length-based
K
Kexin Zhao 已提交
127
            level of detail info specified by the user.
Y
yuyang18 已提交
128 129 130 131 132 133
        base_shape(list): the shape of the basic element to be held by the
            LoDTensor.
        place(Place): CPU or GPU place indicating where the data in the new
            LoDTensor will be stored.
        low(int): the lower bound of the random integers.
        high(int): the upper bound of the random integers.
134 135

    Returns:
136
        A fluid LoDTensor object with tensor data and recursive_seq_lens info.
Z
Zeng Jinle 已提交
137 138 139 140 141 142 143 144

    Examples:
        .. code-block:: python

          import paddle.fluid as fluid

          t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]], 
                base_shape=[30], place=fluid.CPUPlace(), low=0, high=10)
145 146 147
    """
    assert isinstance(base_shape, list), "base_shape should be a list"
    # append the total number of basic elements to the front of its shape
K
Kexin Zhao 已提交
148
    overall_shape = [sum(recursive_seq_lens[-1])] + base_shape
149
    # the range of integer data elements is [low, high]
150
    data = np.random.random_integers(low, high, overall_shape).astype("int64")
K
Kexin Zhao 已提交
151
    return create_lod_tensor(data, recursive_seq_lens, place)