lod_tensor.py 6.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

17
from . import core
Z
Zeng Jinle 已提交
18
from .data_feeder import DataToLoDTensorConverter
19 20 21 22 23
import numpy as np

__all__ = ['create_lod_tensor', 'create_random_int_lodtensor']


K
Kexin Zhao 已提交
24
def create_lod_tensor(data, recursive_seq_lens, place):
Y
yuyang18 已提交
25
    """
Z
Zeng Jinle 已提交
26
    Create a LoDTensor from a numpy array, list or existing LoDTensor.
27

Z
Zeng Jinle 已提交
28
    The implementation is as follows:
Y
yuyang18 已提交
29

Z
Zeng Jinle 已提交
30 31
    1. Check whether the length-based LoD, i.e., :code:`recursive_seq_lens`
       is valid.
Y
yuyang18 已提交
32

Z
Zeng Jinle 已提交
33
    2. Convert :code:`recursive_seq_lens` to a offset-based LoD.
Y
yuyang18 已提交
34

Z
Zeng Jinle 已提交
35 36
    3. Based on :code:`place` , copy the :code:`data` from a numpy array, list
       or existing LoDTensor to CPU or GPU device.
Y
yuyang18 已提交
37

Z
Zeng Jinle 已提交
38
    4. Set offset-based LoD to the output LoDTensor.
39

Z
Zeng Jinle 已提交
40 41 42
    Suppose we want to create a LoDTensor to hold data for word sequences,
    where each word is represented by an integer. If we want to create
    a LoDTensor to represent two sentences, one of 2 words, and one of 3 words.
43

Z
Zeng Jinle 已提交
44 45 46 47 48
    Then :code:`data` would be a numpy array of integers with shape (5, 1).
    :code:`recursive_seq_lens` would be [[2, 3]], indicating the word number
    in each sentence. This length-based :code:`recursive_seq_lens` [[2, 3]]
    would be converted to offset-based LoD [[0, 2, 5]] inside the function
    call.
49

Z
Zeng Jinle 已提交
50
    Please reference :ref:`user_guide_lod_tensor` for more details regarding LoD.
Y
yuyang18 已提交
51

Z
Zeng Jinle 已提交
52 53 54 55 56 57 58
    Args:
        data (numpy.ndarray|list|LoDTensor): a numpy array, a list or ad LoDTensor
                holding the data to be copied.
        recursive_seq_lens (list[list[int]]): a list of lists indicating the
                length-based LoD info.
        place (CPUPlace|CUDAPlace): CPU or GPU place indicating where the data
                in the created LoDTensor will be stored.
Z
Zeng Jinle 已提交
59

Z
Zeng Jinle 已提交
60 61
    Returns:
         A LoDTensor with tensor data and recursive_seq_lens info.
Z
Zeng Jinle 已提交
62

Z
Zeng Jinle 已提交
63
    Examples:
Z
Zeng Jinle 已提交
64

Z
Zeng Jinle 已提交
65
        .. code-block:: python
66

Z
Zeng Jinle 已提交
67 68
            import paddle.fluid as fluid
            import numpy as np
69

Z
Zeng Jinle 已提交
70
            t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace())
71 72
    """
    if isinstance(data, core.LoDTensor):
K
Kexin Zhao 已提交
73
        return create_lod_tensor(np.array(data), recursive_seq_lens, place)
74
    elif isinstance(data, list):
75
        # dtype and shape are not important here,
Z
Zeng Jinle 已提交
76 77 78 79 80 81 82
        # we only want to reuse code of DataToLoDTensorConverter
        converter = DataToLoDTensorConverter(
            place=place,
            lod_level=len(recursive_seq_lens),
            shape=[],
            dtype=core.VarDesc.VarType.FP32)

K
Kexin Zhao 已提交
83
        new_recursive_seq_lens = []
84
        for seq in data:
K
Kexin Zhao 已提交
85
            new_recursive_seq_lens.append(len(seq))
Z
Zeng Jinle 已提交
86 87
            converter.feed(seq)

K
Kexin Zhao 已提交
88 89 90
        assert [
            new_recursive_seq_lens
        ] == recursive_seq_lens, "data and recursive_seq_lens do not match"
Z
Zeng Jinle 已提交
91 92 93 94 95 96 97 98 99 100 101

        arr = np.array(converter.data)

        # FIXME(zjl): the original logic of create_lod_tensor would append
        # 1 to the shape. Maybe it is not a right way? Currently, we only
        # follow the previous logic
        arr = arr.reshape(arr.shape + (1, ))
        tensor = core.LoDTensor()
        tensor.set(arr, place)
        tensor.set_recursive_sequence_lengths(recursive_seq_lens)
        return tensor
102 103 104
    elif isinstance(data, np.ndarray):
        tensor = core.LoDTensor()
        tensor.set(data, place)
K
Kexin Zhao 已提交
105
        tensor.set_recursive_sequence_lengths(recursive_seq_lens)
106 107
        assert tensor.has_valid_recursive_sequence_lengths(
        ), "the provided lod info is invalid"
108 109
        return tensor
    else:
110 111
        raise TypeError(
            "data should be either a LoDTensor, a Numpy array or a list")
112 113


K
Kexin Zhao 已提交
114 115
def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low,
                                high):
Y
yuyang18 已提交
116 117
    """
    Create a LoDTensor containing random integers.
118

Z
Zeng Jinle 已提交
119
    The implementation is as follows:
Y
yuyang18 已提交
120

Z
Zeng Jinle 已提交
121 122 123 124
    1. Obtain the shape of output LoDTensor based on :code:`recursive_seq_lens`
       and :code:`base_shape` . The first dimension of the shape is the total
       length of sequences, while the other dimensions are the same as
       :code:`base_shape` .
Y
yuyang18 已提交
125

Z
Zeng Jinle 已提交
126 127 128
    2. Create a numpy array of random integers, and parse the created numpy
       array as parameter :code:`data` of :ref:`api_fluid_create_lod_tensor` to
       create the output LoDTensor.
129

Z
Zeng Jinle 已提交
130 131 132 133 134 135 136
    Suppose we want to create a LoDTensor to hold data for 2 sequences, where
    the dimension of the sequences are [2, 30] and [3, 30] respectively.
    The :code:`recursive_seq_lens` would be [[2, 3]], and :code:`base_shape`
    would be [30] (the other dimensions excluding the sequence length).
    Therefore, the shape of the output LoDTensor would be [5, 30], where
    the first dimension 5 is the total lengths of the sequences, and the
    other dimensions are :code:`base_shape`.
137 138

    Args:
Z
Zeng Jinle 已提交
139 140 141 142 143 144 145 146
        recursive_seq_lens (list[list[int]]): a list of lists indicating the
                length-based LoD info.
        base_shape (list[int]): the shape of the output LoDTensor excluding
                the first dimension.
        place (CPUPlace|CUDAPlace): CPU or GPU place indicating where
                the data in the created LoDTensor will be stored.
        low (int): the lower bound of the random integers.
        high (int): the upper bound of the random integers.
147 148

    Returns:
Z
Zeng Jinle 已提交
149 150
        A LoDTensor with tensor data and recursive_seq_lens info, whose data
        is inside [low, high].
Z
Zeng Jinle 已提交
151 152 153 154 155 156

    Examples:
        .. code-block:: python

          import paddle.fluid as fluid

Z
Zeng Jinle 已提交
157 158 159
          t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]],
                    base_shape=[30], place=fluid.CPUPlace(), low=0, high=10)
          print(t.shape()) # [5, 30]
160 161 162
    """
    assert isinstance(base_shape, list), "base_shape should be a list"
    # append the total number of basic elements to the front of its shape
K
Kexin Zhao 已提交
163
    overall_shape = [sum(recursive_seq_lens[-1])] + base_shape
164
    # the range of integer data elements is [low, high]
165
    data = np.random.random_integers(low, high, overall_shape).astype("int64")
K
Kexin Zhao 已提交
166
    return create_lod_tensor(data, recursive_seq_lens, place)