lod_tensor.py 5.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import core
import numpy as np

__all__ = ['create_lod_tensor', 'create_random_int_lodtensor']


def create_lod_tensor(data, lod, place):
Y
yuyang18 已提交
22 23
    """
    Create a lod tensor from a numpy array, a list, or an existing lod tensor.
24 25

    Create a lod tensor by doing the following:
Y
yuyang18 已提交
26

27
    1. Check that the length-based input lod is valid.
Y
yuyang18 已提交
28

29
    2. Convert the length-based lod to a offset-based LoD.
Y
yuyang18 已提交
30 31

    3. Copy the data from a numpy array, a list or a existing lod tensor to
32
       CPU or GPU device (based on input place).
Y
yuyang18 已提交
33

34 35
    4. Set the level of detail (LoD) using the offset-based LoD.
    
Y
yuyang18 已提交
36
    Examples:
37

Y
yuyang18 已提交
38 39 40
        Suppose we want LoDTensor to hold data for sequences of word, where each
        word is represented by an integer. If we want to create a LoDTensor to
        represent two  sentences, one of 2 words, and one of 3 words.
41

Y
yuyang18 已提交
42 43 44 45 46 47 48
        Then :code:`data` can be a numpy array of integers with shape (5, 1).
        :code:`lod` will be [[2, 3]], indicating the length(# of words) in each
        sentence. This length-based input lod [[2, 3]] will be converted to
        offset-based lod [[0, 2, 5]] inside the function call.

    Please reference :ref:`api_guide_low_level_lod_tensor` for more details
    regarding LoD.
49 50

    Args:
Y
yuyang18 已提交
51 52 53 54 55 56
        data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
            list holding the data to be  copied.
        lod(list): a list of lists indicating the length-based LoD info
            specified by the user.
        place(Place): CPU or GPU place indicating where the data in the new
            LoDTensor will be stored.
57 58 59 60 61 62

    Returns:
        A fluid LoDTensor object with tensor data and lod info.
    """
    if isinstance(data, core.LoDTensor):
        return create_lod_tensor(np.array(data), lod, place)
63 64 65 66 67 68 69 70 71 72 73 74
    elif isinstance(data, list):
        # When input data is a list, it only deal with the case where the base element 
        # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated 
        # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number 
        # of words or other indexes in the sequence. 
        new_lod = []
        for seq in data:
            new_lod.append(len(seq))
        assert [new_lod] == lod, "data and lod do not match"
        flattened_data = np.concatenate(data, axis=0).astype("int64")
        flattened_data = flattened_data.reshape([len(flattened_data), 1])
        return create_lod_tensor(flattened_data, lod, place)
75 76 77
    elif isinstance(data, np.ndarray):
        tensor = core.LoDTensor()
        tensor.set(data, place)
78 79 80
        tensor.set_recursive_sequence_lengths(lod)
        assert tensor.has_valid_recursive_sequence_lengths(
        ), "the provided lod info is invalid"
81 82
        return tensor
    else:
83 84
        raise TypeError(
            "data should be either a LoDTensor, a Numpy array or a list")
85 86 87


def create_random_int_lodtensor(lod, base_shape, place, low, high):
Y
yuyang18 已提交
88 89
    """
    Create a LoDTensor containing random integers.
90

Y
yuyang18 已提交
91 92 93
    This function is frequently used in the book examples. So we revised it
    based on the new create_lod_tensor API and put it here in the lod_tensor
    module to simplify the code.
94 95

    The function does the following:
Y
yuyang18 已提交
96 97 98 99 100

    1. Calculate the overall shape of the LoDTensor based on the length-based
       :code:`lod` input and the shape of the basic element in
       :code:`base_shape`.

101
    2. Create a numpy array of this shape.
Y
yuyang18 已提交
102

103 104
    3. Create the LoDTensor using create_lod_tensor API.

Y
yuyang18 已提交
105 106 107 108 109
    Suppose we want LoDTensor to hold data for sequences of word, where each
    word is represented by an integer. If we want to create a LoDTensor to
    represent two sentences, one of 2 words, and one of 3 words. Then
    'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall
    shape of the LoDTensor would be [5, 1], holding 5 words for two sentences.
110 111

    Args:
Y
yuyang18 已提交
112 113 114 115 116 117 118 119
        lod(list): a list of lists indicating the length-based LoD info
            specified by the user.
        base_shape(list): the shape of the basic element to be held by the
            LoDTensor.
        place(Place): CPU or GPU place indicating where the data in the new
            LoDTensor will be stored.
        low(int): the lower bound of the random integers.
        high(int): the upper bound of the random integers.
120 121 122 123 124 125

    Returns:
        A fluid LoDTensor object with tensor data and lod info. 
    """
    assert isinstance(base_shape, list), "base_shape should be a list"
    # append the total number of basic elements to the front of its shape
126
    overall_shape = [sum(lod[-1])] + base_shape
127 128 129
    # the range of integer data elements is [low, high]    
    data = np.random.random_integers(low, high, overall_shape).astype("int64")
    return create_lod_tensor(data, lod, place)