From 5a7142ac4e9492eff588e995d05a843531deda02 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 11 Oct 2019 08:45:31 +0800 Subject: [PATCH] Update en APIs of LoDTensor (#20115) * polish en APIs of LodTensor, test=develop, test=document_dix * polish en APIs of LoDTensor, test=develop, test=document_fix * follow comments, test=develop, test=document_dix --- paddle/fluid/API.spec | 2 +- paddle/fluid/pybind/pybind.cc | 258 ++++++++++++++++++++++++---------- 2 files changed, 181 insertions(+), 79 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 1a4c57a225e..63b15777cf2 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -1079,7 +1079,7 @@ paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRe paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'b94371c3434d7f695bc5b2d6fb5531fd')) paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '25e8432ed1b9a375868bc8911359aa0d')) +paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '8ee00d246c952b92e5e8ca2d92a4fc00')) paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> bool paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]] diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0adbe59bfc1..7961cb6f05c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -381,33 +381,92 @@ PYBIND11_MODULE(core_noavx, m) { return reinterpret_cast(self.mutable_data(place, type)); }) .def("_clear", &Tensor::clear) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) - .def("set", PyCPUTensorSetFromArray) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCPUTensorSetFromArray, py::arg("array"), + py::arg("place")) #ifdef PADDLE_WITH_CUDA - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDATensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) - .def("set", PyCUDAPinnedTensorSetFromArray) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDATensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place")) + .def("set", PyCUDAPinnedTensorSetFromArray, py::arg("array"), + py::arg("place"), R"DOC( + Set the data of LoDTensor on place with given numpy array. + + Args: + lod (numpy.ndarray): The data to set. + place (CPUPlace|CUDAPlace|CUDAPinnedPlace): The place where the + LoDTensor is to be set. + + Returns: + None. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import numpy as np + + t = fluid.LoDTensor() + t.set(np.ndarray([5, 30]), fluid.CPUPlace()) + )DOC") #endif - .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) + .def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC( + Return the shape of LoDTensor. + + Returns: + list[int]: The shape of LoDTensor. + + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import numpy as np + + t = fluid.LoDTensor() + t.set(np.ndarray([5, 30]), fluid.CPUPlace()) + print(t.shape()) # [5, 30] + )DOC") .def("_set_float_element", TensorSetElement) .def("_get_float_element", TensorGetElement) .def("_set_double_element", TensorSetElement) @@ -421,39 +480,82 @@ PYBIND11_MODULE(core_noavx, m) { return ostr.str(); }); + // TODO(cql): add reference: en_user_guide_lod_tensor py::class_(m, "LoDTensor", R"DOC( - LoDTensor is a Tensor with optional LoD information. - - np.array(lod_tensor) can convert LoDTensor to numpy array. - lod_tensor.lod() can retrieve the LoD information. - - LoD is short for Level of Details and is usually used for varied sequence - length. You can skip the following comment if you don't need optional LoD. - - For example, a LoDTensor X can look like the example below. It contains - 2 sequences. The first has length 2 and the second has length 3, as - described by x.lod. - - The first tensor dimension 5=2+3 is calculated from LoD if it's available. - It means the total number of sequence element. In X, each element has 2 - columns, hence [5, 2]. - - x.lod = [[2, 3]] - - x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] - - x.shape = [5, 2] - - LoD can have multiple levels (for example, a paragraph can have multiple - sentences and a sentence can have multiple words). In the following - LodTensor Y, the lod_level is 2. It means there are 2 sequence, the - first sequence length is 2 (has 2 sub-sequences), the second one's - length is 1. The first sequence's 2 sub-sequences have length 2 and 2, - respectively. And the second sequence's 1 sub-sequence has length 3. - - y.lod = [[2 1], [2 2 3]] - - y.shape = [2+2+3, ...] + LoDTensor is a Tensor with optional LoD (Level of Details) information, + it can be used for variable-length sequences, + see :ref:`user_guide_lod_tensor` for details. + + LoDTensor can be converted to numpy array using :code:`numpy.array(lod_tensor)`. + + You can skip the following explanation if you don't need to know details + of LoDTensor. + + The following two examples show how to use LODtensor to represent + variable-length sequences. + + Example 1: + + Suppose x is a LoDTensor representing a variable-length sequence. + It contains two logical subsequences, the length of first logical sequence + is 2 (e.g., number of samples is 2), the length of second logical sequence + is 3, and the total length is 5. The data of the first logical sequence is + [1, 2], [3, 4], and the data of the second logical sequence is [5, 6], + [7, 8], [9, 10]. The data dimension of each sample is 2. So, the final + shape of the LoDTensor is [5, 2], of which 5 is the total length and 2 is + the dimension of each sample. + + Logically, we can represent the variable-length sequence in two ways: one + is in the form of recursive sequence lengths, that is, + x.recursive_sequence_lengths=[[2, 3]]; the other is in the form of offsets, + that is, x.lod=[[0, 2, 2+3]]. These two representations are equivalent, and + you can set and retrieve recursive_sequence_lengths or LoD through the + corresponding interfaces of LoDTensor introduced later. + + Actually, in order to access sequence faster, Paddle uses offset to store + different lengths of sequences. + Therefore, the operations on recursive_sequence_lengths will be converted + to the operations on LoD eventually. + + .. code-block:: python + + y.data = [[1, 2], [3, 4], + [5, 6], [7, 8], + [9, 10], [11, 12], [13, 14]] + + y.shape = [2+2+3, 2] + + y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]] + + y.lod = [[0, 2, 3], [0, 2, 4, 7]] + + Example 2: + + LoD may have more than one level (for example, a paragraph may have more + than one sentence and a sentence may have more than one word). Suppose y + is a LoDTensor and its lod_level is 2. + From level = 0, there are two logical sequences, the length of which is + 2 and 1, respectively, indicating that the first logical sequence contains + two sub-sequences and the second logical sequence contains one sub-sequence. + From level = 1, the lengths of two sub-sequences contained by the first + logical sequence is 2 and 2, and the length of sub-sequence contained by + the second logical sequence is 3. + + Therefore, the LoDTensor is represented in the form of recursive sequence + lengths as y.recursive_sequence_lengths=[[2,1], [2,2,3]]; and equally, in + the form of offset, it is represented as y.lod=[[0,2,3], [0,2,4,7]]. + + .. code-block:: python + + y.data = [[1, 2], [3, 4], + [5, 6], [7, 8], + [9, 10], [11, 12], [13, 14]] + + y.shape = [2+2+3, 2] + + y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]] + + y.lod = [[0, 2, 3], [0, 2, 4, 7]] Examples: .. code-block:: python @@ -462,16 +564,6 @@ PYBIND11_MODULE(core_noavx, m) { t = fluid.LoDTensor() - Note: - In above description, LoD is length-based. In Paddle internal - implementation, lod is offset-based. Hence, internally, - y.lod is represented as [[0, 2, 3], [0, 2, 4, 7]] (length-based - equivlent would be [[2-0, 3-2], [2-0, 4-2, 7-4]]). - - Sometimes LoD is called recursive_sequence_length to be more - self-explanatory. In this case, it must be length-based. Due to history - reasons. when LoD is called lod in public API, it might be offset-based. - Users should be careful about it. )DOC") .def("__array__", [](Tensor &self) { return TensorToPyArray(self); }) .def("__init__", @@ -510,7 +602,10 @@ PYBIND11_MODULE(core_noavx, m) { Set LoD of the LoDTensor. Args: - lod (List[List[int]]): the lod to be set. + lod (list[list[int]]): The lod to set. + + Returns: + None. Examples: .. code-block:: python @@ -521,6 +616,7 @@ PYBIND11_MODULE(core_noavx, m) { t = fluid.LoDTensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_lod([[0, 2, 5]]) + print(t.lod()) # [[0, 2, 5]] )DOC") .def("set_recursive_sequence_lengths", [](LoDTensor &self, const std::vector> @@ -539,14 +635,17 @@ PYBIND11_MODULE(core_noavx, m) { self.set_lod(new_offset_lod); }, py::arg("recursive_sequence_lengths"), R"DOC( - Set LoD of the LoDTensor according to recursive sequence length. + Set LoD of the LoDTensor according to recursive sequence lengths. - For example, if recursive_sequence_lengths=[[2, 3]], meaning that + For example, if recursive_sequence_lengths=[[2, 3]], which means there are two sequences with length 2 and 3 respectively, the - corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]]. + corresponding lod would be [[0, 2, 2+3]], i.e., [[0, 2, 5]]. Args: - recursive_sequence_lengths (List[List[int]]): sequence lengths. + recursive_sequence_lengths (list[list[int]]): The recursive sequence lengths. + + Returns: + None. Examples: .. code-block:: python @@ -557,6 +656,8 @@ PYBIND11_MODULE(core_noavx, m) { t = fluid.LoDTensor() t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set_recursive_sequence_lengths([[2, 3]]) + print(t.recursive_sequence_length()) # [[2, 3]] + print(t.lod()) # [[0, 2, 5]] )DOC") .def("lod", [](LoDTensor &self) -> std::vector> { @@ -571,8 +672,8 @@ PYBIND11_MODULE(core_noavx, m) { Return the LoD of the LoDTensor. Returns: - out (List[List[int]]): the lod of the LoDTensor. - + list[list[int]]: The lod of the LoDTensor. + Examples: .. code-block:: python @@ -595,10 +696,11 @@ PYBIND11_MODULE(core_noavx, m) { return new_lod; }, R"DOC( - Return the sequence length of the LoDTensor corresponding to LoD. + Return the recursive sequence lengths corresponding to of the LodD + of the LoDTensor. Returns: - out (List[List[int]): the sequence lengths. + list[list[int]]: The recursive sequence lengths. Examples: .. code-block:: python @@ -618,10 +720,10 @@ PYBIND11_MODULE(core_noavx, m) { return CheckLoD(self.lod(), vectorize(self.dims()).front()); }, R"DOC( - Check whether the lod of the LoDTensor is valid. + Check whether the LoD of the LoDTensor is valid. Returns: - out (bool): whether the lod is valid. + bool: Whether the LoD is valid. Examples: .. code-block:: python -- GitLab