提交 5a7142ac 编写于 作者: L Leo Chen 提交者: Zeng Jinle

Update en APIs of LoDTensor (#20115)

* polish en APIs of LodTensor, test=develop, test=document_dix

* polish en APIs of LoDTensor, test=develop, test=document_fix

* follow comments, test=develop, test=document_dix
上级 7f68b0b5
...@@ -1079,7 +1079,7 @@ paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRe ...@@ -1079,7 +1079,7 @@ paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRe
paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'b94371c3434d7f695bc5b2d6fb5531fd')) paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'b94371c3434d7f695bc5b2d6fb5531fd'))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '25e8432ed1b9a375868bc8911359aa0d')) paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '8ee00d246c952b92e5e8ca2d92a4fc00'))
paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None
paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> bool paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> bool
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]]
......
...@@ -381,33 +381,92 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -381,33 +381,92 @@ PYBIND11_MODULE(core_noavx, m) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type)); return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
}) })
.def("_clear", &Tensor::clear) .def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>, py::arg("array"),
.def("set", PyCPUTensorSetFromArray<int>) py::arg("place"))
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<int>, py::arg("array"),
.def("set", PyCPUTensorSetFromArray<int64_t>) py::arg("place"))
.def("set", PyCPUTensorSetFromArray<bool>) .def("set", PyCPUTensorSetFromArray<double>, py::arg("array"),
.def("set", PyCPUTensorSetFromArray<uint16_t>) py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint8_t>) .def("set", PyCPUTensorSetFromArray<int64_t>, py::arg("array"),
.def("set", PyCPUTensorSetFromArray<int8_t>) py::arg("place"))
.def("set", PyCPUTensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"))
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>, py::arg("array"),
.def("set", PyCUDATensorSetFromArray<int>) py::arg("place"))
.def("set", PyCUDATensorSetFromArray<double>) .def("set", PyCUDATensorSetFromArray<int>, py::arg("array"),
.def("set", PyCUDATensorSetFromArray<int64_t>) py::arg("place"))
.def("set", PyCUDATensorSetFromArray<bool>) .def("set", PyCUDATensorSetFromArray<double>, py::arg("array"),
.def("set", PyCUDATensorSetFromArray<uint16_t>) py::arg("place"))
.def("set", PyCUDATensorSetFromArray<uint8_t>) .def("set", PyCUDATensorSetFromArray<int64_t>, py::arg("array"),
.def("set", PyCUDATensorSetFromArray<int8_t>) py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<bool>, py::arg("array"),
.def("set", PyCUDAPinnedTensorSetFromArray<int>) py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<double>) .def("set", PyCUDATensorSetFromArray<uint16_t>, py::arg("array"),
.def("set", PyCUDAPinnedTensorSetFromArray<int64_t>) py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<bool>) .def("set", PyCUDATensorSetFromArray<uint8_t>, py::arg("array"),
.def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>) py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>) .def("set", PyCUDATensorSetFromArray<int8_t>, py::arg("array"),
.def("set", PyCUDAPinnedTensorSetFromArray<int8_t>) py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<float>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<double>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"), R"DOC(
Set the data of LoDTensor on place with given numpy array.
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|CUDAPinnedPlace): The place where the
LoDTensor is to be set.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace())
)DOC")
#endif #endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC(
Return the shape of LoDTensor.
Returns:
list[int]: The shape of LoDTensor.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace())
print(t.shape()) # [5, 30]
)DOC")
.def("_set_float_element", TensorSetElement<float>) .def("_set_float_element", TensorSetElement<float>)
.def("_get_float_element", TensorGetElement<float>) .def("_get_float_element", TensorGetElement<float>)
.def("_set_double_element", TensorSetElement<double>) .def("_set_double_element", TensorSetElement<double>)
...@@ -421,39 +480,82 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -421,39 +480,82 @@ PYBIND11_MODULE(core_noavx, m) {
return ostr.str(); return ostr.str();
}); });
// TODO(cql): add reference: en_user_guide_lod_tensor
py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC( py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC(
LoDTensor is a Tensor with optional LoD information. LoDTensor is a Tensor with optional LoD (Level of Details) information,
it can be used for variable-length sequences,
np.array(lod_tensor) can convert LoDTensor to numpy array. see :ref:`user_guide_lod_tensor` for details.
lod_tensor.lod() can retrieve the LoD information.
LoDTensor can be converted to numpy array using :code:`numpy.array(lod_tensor)`.
LoD is short for Level of Details and is usually used for varied sequence
length. You can skip the following comment if you don't need optional LoD. You can skip the following explanation if you don't need to know details
of LoDTensor.
For example, a LoDTensor X can look like the example below. It contains
2 sequences. The first has length 2 and the second has length 3, as The following two examples show how to use LODtensor to represent
described by x.lod. variable-length sequences.
The first tensor dimension 5=2+3 is calculated from LoD if it's available. Example 1:
It means the total number of sequence element. In X, each element has 2
columns, hence [5, 2]. Suppose x is a LoDTensor representing a variable-length sequence.
It contains two logical subsequences, the length of first logical sequence
x.lod = [[2, 3]] is 2 (e.g., number of samples is 2), the length of second logical sequence
is 3, and the total length is 5. The data of the first logical sequence is
x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] [1, 2], [3, 4], and the data of the second logical sequence is [5, 6],
[7, 8], [9, 10]. The data dimension of each sample is 2. So, the final
x.shape = [5, 2] shape of the LoDTensor is [5, 2], of which 5 is the total length and 2 is
the dimension of each sample.
LoD can have multiple levels (for example, a paragraph can have multiple
sentences and a sentence can have multiple words). In the following Logically, we can represent the variable-length sequence in two ways: one
LodTensor Y, the lod_level is 2. It means there are 2 sequence, the is in the form of recursive sequence lengths, that is,
first sequence length is 2 (has 2 sub-sequences), the second one's x.recursive_sequence_lengths=[[2, 3]]; the other is in the form of offsets,
length is 1. The first sequence's 2 sub-sequences have length 2 and 2, that is, x.lod=[[0, 2, 2+3]]. These two representations are equivalent, and
respectively. And the second sequence's 1 sub-sequence has length 3. you can set and retrieve recursive_sequence_lengths or LoD through the
corresponding interfaces of LoDTensor introduced later.
y.lod = [[2 1], [2 2 3]]
Actually, in order to access sequence faster, Paddle uses offset to store
y.shape = [2+2+3, ...] different lengths of sequences.
Therefore, the operations on recursive_sequence_lengths will be converted
to the operations on LoD eventually.
.. code-block:: python
y.data = [[1, 2], [3, 4],
[5, 6], [7, 8],
[9, 10], [11, 12], [13, 14]]
y.shape = [2+2+3, 2]
y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
y.lod = [[0, 2, 3], [0, 2, 4, 7]]
Example 2:
LoD may have more than one level (for example, a paragraph may have more
than one sentence and a sentence may have more than one word). Suppose y
is a LoDTensor and its lod_level is 2.
From level = 0, there are two logical sequences, the length of which is
2 and 1, respectively, indicating that the first logical sequence contains
two sub-sequences and the second logical sequence contains one sub-sequence.
From level = 1, the lengths of two sub-sequences contained by the first
logical sequence is 2 and 2, and the length of sub-sequence contained by
the second logical sequence is 3.
Therefore, the LoDTensor is represented in the form of recursive sequence
lengths as y.recursive_sequence_lengths=[[2,1], [2,2,3]]; and equally, in
the form of offset, it is represented as y.lod=[[0,2,3], [0,2,4,7]].
.. code-block:: python
y.data = [[1, 2], [3, 4],
[5, 6], [7, 8],
[9, 10], [11, 12], [13, 14]]
y.shape = [2+2+3, 2]
y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
y.lod = [[0, 2, 3], [0, 2, 4, 7]]
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -462,16 +564,6 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -462,16 +564,6 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor() t = fluid.LoDTensor()
Note:
In above description, LoD is length-based. In Paddle internal
implementation, lod is offset-based. Hence, internally,
y.lod is represented as [[0, 2, 3], [0, 2, 4, 7]] (length-based
equivlent would be [[2-0, 3-2], [2-0, 4-2, 7-4]]).
Sometimes LoD is called recursive_sequence_length to be more
self-explanatory. In this case, it must be length-based. Due to history
reasons. when LoD is called lod in public API, it might be offset-based.
Users should be careful about it.
)DOC") )DOC")
.def("__array__", [](Tensor &self) { return TensorToPyArray(self); }) .def("__array__", [](Tensor &self) { return TensorToPyArray(self); })
.def("__init__", .def("__init__",
...@@ -510,7 +602,10 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -510,7 +602,10 @@ PYBIND11_MODULE(core_noavx, m) {
Set LoD of the LoDTensor. Set LoD of the LoDTensor.
Args: Args:
lod (List[List[int]]): the lod to be set. lod (list[list[int]]): The lod to set.
Returns:
None.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -521,6 +616,7 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -521,6 +616,7 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor() t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set(np.ndarray([5, 30]), fluid.CPUPlace())
t.set_lod([[0, 2, 5]]) t.set_lod([[0, 2, 5]])
print(t.lod()) # [[0, 2, 5]]
)DOC") )DOC")
.def("set_recursive_sequence_lengths", .def("set_recursive_sequence_lengths",
[](LoDTensor &self, const std::vector<std::vector<size_t>> [](LoDTensor &self, const std::vector<std::vector<size_t>>
...@@ -539,14 +635,17 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -539,14 +635,17 @@ PYBIND11_MODULE(core_noavx, m) {
self.set_lod(new_offset_lod); self.set_lod(new_offset_lod);
}, },
py::arg("recursive_sequence_lengths"), R"DOC( py::arg("recursive_sequence_lengths"), R"DOC(
Set LoD of the LoDTensor according to recursive sequence length. Set LoD of the LoDTensor according to recursive sequence lengths.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that For example, if recursive_sequence_lengths=[[2, 3]], which means
there are two sequences with length 2 and 3 respectively, the there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]]. corresponding lod would be [[0, 2, 2+3]], i.e., [[0, 2, 5]].
Args: Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths. recursive_sequence_lengths (list[list[int]]): The recursive sequence lengths.
Returns:
None.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -557,6 +656,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -557,6 +656,8 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor() t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set(np.ndarray([5, 30]), fluid.CPUPlace())
t.set_recursive_sequence_lengths([[2, 3]]) t.set_recursive_sequence_lengths([[2, 3]])
print(t.recursive_sequence_length()) # [[2, 3]]
print(t.lod()) # [[0, 2, 5]]
)DOC") )DOC")
.def("lod", .def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> { [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
...@@ -571,8 +672,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -571,8 +672,8 @@ PYBIND11_MODULE(core_noavx, m) {
Return the LoD of the LoDTensor. Return the LoD of the LoDTensor.
Returns: Returns:
out (List[List[int]]): the lod of the LoDTensor. list[list[int]]: The lod of the LoDTensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -595,10 +696,11 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -595,10 +696,11 @@ PYBIND11_MODULE(core_noavx, m) {
return new_lod; return new_lod;
}, },
R"DOC( R"DOC(
Return the sequence length of the LoDTensor corresponding to LoD. Return the recursive sequence lengths corresponding to of the LodD
of the LoDTensor.
Returns: Returns:
out (List[List[int]): the sequence lengths. list[list[int]]: The recursive sequence lengths.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -618,10 +720,10 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -618,10 +720,10 @@ PYBIND11_MODULE(core_noavx, m) {
return CheckLoD(self.lod(), vectorize(self.dims()).front()); return CheckLoD(self.lod(), vectorize(self.dims()).front());
}, },
R"DOC( R"DOC(
Check whether the lod of the LoDTensor is valid. Check whether the LoD of the LoDTensor is valid.
Returns: Returns:
out (bool): whether the lod is valid. bool: Whether the LoD is valid.
Examples: Examples:
.. code-block:: python .. code-block:: python
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册