提交 5a7142ac 编写于 作者: L Leo Chen 提交者: Zeng Jinle

Update en APIs of LoDTensor (#20115)

* polish en APIs of LodTensor, test=develop, test=document_dix

* polish en APIs of LoDTensor, test=develop, test=document_fix

* follow comments, test=develop, test=document_dix
上级 7f68b0b5
......@@ -1079,7 +1079,7 @@ paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRe
paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'b94371c3434d7f695bc5b2d6fb5531fd'))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '25e8432ed1b9a375868bc8911359aa0d'))
paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '8ee00d246c952b92e5e8ca2d92a4fc00'))
paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None
paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> bool
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]]
......
......@@ -381,33 +381,92 @@ PYBIND11_MODULE(core_noavx, m) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
})
.def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>)
.def("set", PyCPUTensorSetFromArray<int64_t>)
.def("set", PyCPUTensorSetFromArray<bool>)
.def("set", PyCPUTensorSetFromArray<uint16_t>)
.def("set", PyCPUTensorSetFromArray<uint8_t>)
.def("set", PyCPUTensorSetFromArray<int8_t>)
.def("set", PyCPUTensorSetFromArray<float>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<int>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<double>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"))
#ifdef PADDLE_WITH_CUDA
.def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>)
.def("set", PyCUDATensorSetFromArray<int64_t>)
.def("set", PyCUDATensorSetFromArray<bool>)
.def("set", PyCUDATensorSetFromArray<uint16_t>)
.def("set", PyCUDATensorSetFromArray<uint8_t>)
.def("set", PyCUDATensorSetFromArray<int8_t>)
.def("set", PyCUDAPinnedTensorSetFromArray<float>)
.def("set", PyCUDAPinnedTensorSetFromArray<int>)
.def("set", PyCUDAPinnedTensorSetFromArray<double>)
.def("set", PyCUDAPinnedTensorSetFromArray<int64_t>)
.def("set", PyCUDAPinnedTensorSetFromArray<bool>)
.def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>)
.def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>)
.def("set", PyCUDAPinnedTensorSetFromArray<int8_t>)
.def("set", PyCUDATensorSetFromArray<float>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<int>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<double>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<float>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<double>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"), R"DOC(
Set the data of LoDTensor on place with given numpy array.
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|CUDAPinnedPlace): The place where the
LoDTensor is to be set.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace())
)DOC")
#endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); })
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC(
Return the shape of LoDTensor.
Returns:
list[int]: The shape of LoDTensor.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace())
print(t.shape()) # [5, 30]
)DOC")
.def("_set_float_element", TensorSetElement<float>)
.def("_get_float_element", TensorGetElement<float>)
.def("_set_double_element", TensorSetElement<double>)
......@@ -421,39 +480,82 @@ PYBIND11_MODULE(core_noavx, m) {
return ostr.str();
});
// TODO(cql): add reference: en_user_guide_lod_tensor
py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC(
LoDTensor is a Tensor with optional LoD information.
LoDTensor is a Tensor with optional LoD (Level of Details) information,
it can be used for variable-length sequences,
see :ref:`user_guide_lod_tensor` for details.
LoDTensor can be converted to numpy array using :code:`numpy.array(lod_tensor)`.
You can skip the following explanation if you don't need to know details
of LoDTensor.
The following two examples show how to use LODtensor to represent
variable-length sequences.
Example 1:
Suppose x is a LoDTensor representing a variable-length sequence.
It contains two logical subsequences, the length of first logical sequence
is 2 (e.g., number of samples is 2), the length of second logical sequence
is 3, and the total length is 5. The data of the first logical sequence is
[1, 2], [3, 4], and the data of the second logical sequence is [5, 6],
[7, 8], [9, 10]. The data dimension of each sample is 2. So, the final
shape of the LoDTensor is [5, 2], of which 5 is the total length and 2 is
the dimension of each sample.
Logically, we can represent the variable-length sequence in two ways: one
is in the form of recursive sequence lengths, that is,
x.recursive_sequence_lengths=[[2, 3]]; the other is in the form of offsets,
that is, x.lod=[[0, 2, 2+3]]. These two representations are equivalent, and
you can set and retrieve recursive_sequence_lengths or LoD through the
corresponding interfaces of LoDTensor introduced later.
Actually, in order to access sequence faster, Paddle uses offset to store
different lengths of sequences.
Therefore, the operations on recursive_sequence_lengths will be converted
to the operations on LoD eventually.
.. code-block:: python
np.array(lod_tensor) can convert LoDTensor to numpy array.
lod_tensor.lod() can retrieve the LoD information.
y.data = [[1, 2], [3, 4],
[5, 6], [7, 8],
[9, 10], [11, 12], [13, 14]]
LoD is short for Level of Details and is usually used for varied sequence
length. You can skip the following comment if you don't need optional LoD.
y.shape = [2+2+3, 2]
For example, a LoDTensor X can look like the example below. It contains
2 sequences. The first has length 2 and the second has length 3, as
described by x.lod.
y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
The first tensor dimension 5=2+3 is calculated from LoD if it's available.
It means the total number of sequence element. In X, each element has 2
columns, hence [5, 2].
y.lod = [[0, 2, 3], [0, 2, 4, 7]]
x.lod = [[2, 3]]
Example 2:
x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
LoD may have more than one level (for example, a paragraph may have more
than one sentence and a sentence may have more than one word). Suppose y
is a LoDTensor and its lod_level is 2.
From level = 0, there are two logical sequences, the length of which is
2 and 1, respectively, indicating that the first logical sequence contains
two sub-sequences and the second logical sequence contains one sub-sequence.
From level = 1, the lengths of two sub-sequences contained by the first
logical sequence is 2 and 2, and the length of sub-sequence contained by
the second logical sequence is 3.
x.shape = [5, 2]
Therefore, the LoDTensor is represented in the form of recursive sequence
lengths as y.recursive_sequence_lengths=[[2,1], [2,2,3]]; and equally, in
the form of offset, it is represented as y.lod=[[0,2,3], [0,2,4,7]].
LoD can have multiple levels (for example, a paragraph can have multiple
sentences and a sentence can have multiple words). In the following
LodTensor Y, the lod_level is 2. It means there are 2 sequence, the
first sequence length is 2 (has 2 sub-sequences), the second one's
length is 1. The first sequence's 2 sub-sequences have length 2 and 2,
respectively. And the second sequence's 1 sub-sequence has length 3.
.. code-block:: python
y.data = [[1, 2], [3, 4],
[5, 6], [7, 8],
[9, 10], [11, 12], [13, 14]]
y.shape = [2+2+3, 2]
y.lod = [[2 1], [2 2 3]]
y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
y.shape = [2+2+3, ...]
y.lod = [[0, 2, 3], [0, 2, 4, 7]]
Examples:
.. code-block:: python
......@@ -462,16 +564,6 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor()
Note:
In above description, LoD is length-based. In Paddle internal
implementation, lod is offset-based. Hence, internally,
y.lod is represented as [[0, 2, 3], [0, 2, 4, 7]] (length-based
equivlent would be [[2-0, 3-2], [2-0, 4-2, 7-4]]).
Sometimes LoD is called recursive_sequence_length to be more
self-explanatory. In this case, it must be length-based. Due to history
reasons. when LoD is called lod in public API, it might be offset-based.
Users should be careful about it.
)DOC")
.def("__array__", [](Tensor &self) { return TensorToPyArray(self); })
.def("__init__",
......@@ -510,7 +602,10 @@ PYBIND11_MODULE(core_noavx, m) {
Set LoD of the LoDTensor.
Args:
lod (List[List[int]]): the lod to be set.
lod (list[list[int]]): The lod to set.
Returns:
None.
Examples:
.. code-block:: python
......@@ -521,6 +616,7 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace())
t.set_lod([[0, 2, 5]])
print(t.lod()) # [[0, 2, 5]]
)DOC")
.def("set_recursive_sequence_lengths",
[](LoDTensor &self, const std::vector<std::vector<size_t>>
......@@ -539,14 +635,17 @@ PYBIND11_MODULE(core_noavx, m) {
self.set_lod(new_offset_lod);
},
py::arg("recursive_sequence_lengths"), R"DOC(
Set LoD of the LoDTensor according to recursive sequence length.
Set LoD of the LoDTensor according to recursive sequence lengths.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that
For example, if recursive_sequence_lengths=[[2, 3]], which means
there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
corresponding lod would be [[0, 2, 2+3]], i.e., [[0, 2, 5]].
Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths.
recursive_sequence_lengths (list[list[int]]): The recursive sequence lengths.
Returns:
None.
Examples:
.. code-block:: python
......@@ -557,6 +656,8 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace())
t.set_recursive_sequence_lengths([[2, 3]])
print(t.recursive_sequence_length()) # [[2, 3]]
print(t.lod()) # [[0, 2, 5]]
)DOC")
.def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
......@@ -571,7 +672,7 @@ PYBIND11_MODULE(core_noavx, m) {
Return the LoD of the LoDTensor.
Returns:
out (List[List[int]]): the lod of the LoDTensor.
list[list[int]]: The lod of the LoDTensor.
Examples:
.. code-block:: python
......@@ -595,10 +696,11 @@ PYBIND11_MODULE(core_noavx, m) {
return new_lod;
},
R"DOC(
Return the sequence length of the LoDTensor corresponding to LoD.
Return the recursive sequence lengths corresponding to of the LodD
of the LoDTensor.
Returns:
out (List[List[int]): the sequence lengths.
list[list[int]]: The recursive sequence lengths.
Examples:
.. code-block:: python
......@@ -618,10 +720,10 @@ PYBIND11_MODULE(core_noavx, m) {
return CheckLoD(self.lod(), vectorize(self.dims()).front());
},
R"DOC(
Check whether the lod of the LoDTensor is valid.
Check whether the LoD of the LoDTensor is valid.
Returns:
out (bool): whether the lod is valid.
bool: Whether the LoD is valid.
Examples:
.. code-block:: python
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册