未验证 提交 c300b1ba 编写于 作者: W wopeizl 提交者: GitHub

Tensor index (#16223)

* extend the slice function for python
test=develop
上级 0d9d25d4
......@@ -347,7 +347,8 @@ PYBIND11_MODULE(core, m) {
.def("_set_double_element", TensorSetElement<double>)
.def("_get_double_element", TensorGetElement<double>)
.def("_place", [](Tensor &self) { return self.place(); })
.def("_dtype", [](Tensor &self) { return self.type(); });
.def("_dtype", [](Tensor &self) { return self.type(); })
.def("__getitem__", PySliceTensor, py::return_value_policy::reference);
py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC(
LoDTensor is a Tensor with optional LoD information.
......@@ -499,6 +500,13 @@ PYBIND11_MODULE(core, m) {
Returns:
out (bool): whether the lod is valid.
)DOC")
.def("__getitem__", PySliceTensor, py::return_value_policy::reference,
R"DOC(
Slice the original Tensor, and remove the LoD information.
Returns:
out (Tensor): new Tensor(NOT LoDTensor).
)DOC");
py::class_<SelectedRows>(m, "SelectedRows")
......
......@@ -14,16 +14,22 @@ limitations under the License. */
#pragma once
#include <Python.h>
#include <algorithm>
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
namespace py = pybind11;
namespace paddle {
namespace pybind {
namespace details {
......@@ -191,6 +197,253 @@ inline void PyCPUTensorSetFromArray(
std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}
template <typename T, size_t D>
void _sliceCompute(const framework::Tensor *in, framework::Tensor *out,
const platform::CPUDeviceContext &ctx,
const std::vector<int> &axes,
const std::vector<int> &starts) {
auto &eigen_place = *ctx.eigen_device();
auto place = in->place();
auto out_dims = out->dims();
auto in_dims = in->dims();
auto offsets = Eigen::array<int, D>();
auto extents = Eigen::array<int, D>();
for (size_t i = 0; i < D; ++i) {
offsets[i] = 0;
extents[i] = out_dims[i];
}
int start;
for (size_t i = 0; i < axes.size(); ++i) {
start = starts[i];
if (start < 0) {
start = (start + in_dims[axes[i]]);
}
start = std::max(start, 0);
offsets[axes[i]] = start;
}
auto in_t =
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*in);
auto out_t =
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*out);
out_t.device(eigen_place) = in_t.slice(offsets, extents);
}
template <typename T>
void _concatCompute(const std::vector<paddle::framework::Tensor> &ins,
paddle::framework::Tensor *out,
const platform::CPUDeviceContext &ctx, int64_t axis) {
if (axis == 0 && ins.size() < 10) {
size_t output_offset = 0;
for (auto &in : ins) {
auto in_stride = framework::stride_numel(in.dims());
auto out_stride = framework::stride_numel(out->dims());
paddle::operators::StridedNumelCopyWithAxis<T>(
ctx, axis, out->data<T>() + output_offset, out_stride, in.data<T>(),
in_stride, in_stride[axis]);
output_offset += in_stride[axis];
}
} else {
paddle::operators::math::ConcatFunctor<platform::CPUDeviceContext, T>
concat_functor;
concat_functor(ctx, ins, static_cast<int>(axis), out);
}
}
void _getSliceinfo(const framework::Tensor &self, py::object obj,
const int64_t dim, int64_t *pstart, int64_t *pstop,
int64_t *pstep, int64_t *pslicelength) {
auto &start = *pstart;
auto &stop = *pstop;
auto &step = *pstep;
auto &slicelength = *pslicelength;
const framework::DDim &srcDDim = self.dims();
if (dim < 0 || dim >= srcDDim.size()) {
throw py::index_error();
}
if (py::isinstance<py::slice>(obj)) {
size_t lstart, lstop, lstep, lslicelength;
py::slice s = static_cast<py::slice>(obj);
if (!s.compute(srcDDim[dim], &lstart, &lstop, &lstep, &lslicelength)) {
throw py::index_error();
}
start = static_cast<int64_t>(lstart);
stop = static_cast<int64_t>(lstop);
step = static_cast<int64_t>(lstep);
slicelength = static_cast<int64_t>(lslicelength);
} else if (py::isinstance<py::int_>(obj)) {
start = static_cast<int64_t>(static_cast<py::int_>(obj));
if (std::abs(start) >= srcDDim[dim]) {
throw py::index_error();
}
start = (start >= 0) ? start : srcDDim[dim] - start;
stop = start + 1;
step = 1;
slicelength = 1;
} else {
throw py::index_error();
}
}
inline framework::Tensor *_getTensor(const framework::Tensor &self,
const framework::DDim &ddim) {
framework::Tensor *output = new framework::Tensor();
output->Resize(ddim);
auto place = self.place();
if (platform::is_cpu_place(place)) {
output->mutable_data(boost::get<platform::CPUPlace>(place), self.type());
#ifdef PADDLE_WITH_CUDA
} else {
if (platform::is_cuda_pinned_place(place)) {
output->mutable_data(boost::get<platform::CUDAPinnedPlace>(place),
self.type());
} else if ((platform::is_gpu_place(place))) {
output->mutable_data(boost::get<platform::CUDAPlace>(place), self.type());
}
#endif
}
return output;
}
template <typename T>
void _sliceDapper(const framework::Tensor *in, framework::Tensor *out,
const platform::CPUDeviceContext &ctx,
const std::vector<int> &axes, const std::vector<int> &starts,
int size) {
switch (size) {
case 1:
_sliceCompute<T, 1>(in, out, ctx, axes, starts);
break;
case 2:
_sliceCompute<T, 2>(in, out, ctx, axes, starts);
break;
case 3:
_sliceCompute<T, 3>(in, out, ctx, axes, starts);
break;
case 4:
_sliceCompute<T, 4>(in, out, ctx, axes, starts);
break;
case 5:
_sliceCompute<T, 5>(in, out, ctx, axes, starts);
break;
case 6:
_sliceCompute<T, 6>(in, out, ctx, axes, starts);
break;
case 7:
_sliceCompute<T, 7>(in, out, ctx, axes, starts);
break;
case 8:
_sliceCompute<T, 8>(in, out, ctx, axes, starts);
break;
case 9:
_sliceCompute<T, 9>(in, out, ctx, axes, starts);
break;
default:
PADDLE_THROW("dim size not exepected, current is %d", size);
break;
}
}
template <typename T>
inline framework::Tensor *_sliceWrapper(const framework::Tensor &self,
const platform::CPUDeviceContext &ctx,
py::object obj, int dim, int64_t start,
int64_t slicelength) {
framework::DDim dstDDim = self.dims();
dstDDim[dim] = static_cast<int64_t>(slicelength);
std::vector<int> axes({dim});
std::vector<int> starts({static_cast<int>(start)});
framework::Tensor *output = _getTensor(self, dstDDim);
_sliceDapper<T>(&self, output, ctx, axes, starts, dstDDim.size());
return output;
}
template <typename T>
inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self,
py::object obj, int dim) {
platform::CPUDeviceContext ctx;
int64_t start, stop, step, slicelength;
_getSliceinfo(self, obj, dim, &start, &stop, &step, &slicelength);
if (step == 1 || slicelength == 1) {
return _sliceWrapper<T>(self, ctx, obj, dim, start, slicelength);
} else {
std::vector<framework::Tensor> ins;
for (auto i = 0; i < slicelength; ++i, start += step) {
ins.emplace_back(*_sliceWrapper<T>(self, ctx, obj, dim, start, 1));
}
// do the concat operation
framework::DDim dstDDim = self.dims();
dstDDim[dim] = static_cast<int64_t>(slicelength);
framework::Tensor *output1 = _getTensor(self, dstDDim);
_concatCompute<T>(ins, output1, ctx, dim);
return output1;
}
}
inline framework::Tensor *_sliceTensor(const framework::Tensor &self,
py::object obj, int dim) {
auto src_type = self.type();
switch (src_type) {
case framework::proto::VarType::FP16:
return _sliceAndConcat<paddle::platform::float16>(self, obj, dim);
case framework::proto::VarType::FP32:
return _sliceAndConcat<float>(self, obj, dim);
case framework::proto::VarType::FP64:
return _sliceAndConcat<double>(self, obj, dim);
case framework::proto::VarType::INT32:
return _sliceAndConcat<int>(self, obj, dim);
case framework::proto::VarType::INT64:
return _sliceAndConcat<int64_t>(self, obj, dim);
case framework::proto::VarType::BOOL:
return _sliceAndConcat<bool>(self, obj, dim);
case framework::proto::VarType::INT16:
return _sliceAndConcat<bool>(self, obj, dim);
case framework::proto::VarType::UINT8:
return _sliceAndConcat<bool>(self, obj, dim);
default:
PADDLE_THROW("Not support type %d", src_type);
}
}
inline framework::Tensor *_pySliceTensor(const framework::Tensor &self,
py::object obj) {
if (py::isinstance<py::tuple>(obj)) {
py::list l = static_cast<py::list>(obj);
std::unique_ptr<framework::Tensor> target;
framework::Tensor *src = const_cast<framework::Tensor *>(&self);
for (auto i = 0; i < static_cast<int>(l.size()); ++i) {
src = _sliceTensor(*src, l[i], i);
if (i + 1 == static_cast<int>(l.size())) {
return src;
} else {
target.reset(src);
}
}
return nullptr;
} else {
return _sliceTensor(self, obj, 0);
}
}
inline framework::Tensor *PySliceTensor(const framework::Tensor &self,
py::object obj) {
if (platform::is_gpu_place(self.place())) {
std::unique_ptr<framework::Tensor> holder;
framework::Tensor src;
framework::TensorCopySync(self, platform::CPUPlace(), &src);
framework::Tensor *output = _pySliceTensor(src, obj);
holder.reset(output);
framework::Tensor *dst = _getTensor(*output, output->dims());
framework::TensorCopySync(*output, self.place(), dst);
return dst;
} else {
return _pySliceTensor(self, obj);
}
}
#ifdef PADDLE_WITH_CUDA
template <typename T>
void PyCUDATensorSetFromArray(
......
......@@ -627,6 +627,183 @@ class Variable(object):
"""
self.error_clip = error_clip
def _slice_indices(self, slice, length):
"""
Reference implementation for the slice.indices method.
"""
# Compute step and length as integers.
step = 1 if slice.step is None else slice.step
# Raise ValueError for negative length or zero step.
if length < 0:
raise ValueError("length should not be negative")
if step == 0:
raise ValueError("slice step cannot be zero")
# Find lower and upper bounds for start and stop.
lower = -1 if step < 0 else 0
upper = length - 1 if step < 0 else length
# Compute start.
if slice.start is None:
start = upper if step < 0 else lower
else:
start = slice.start
start = max(start + length, lower) if start < 0 else min(start,
upper)
# Compute stop.
if slice.stop is None:
stop = lower if step < 0 else upper
else:
stop = slice.stop
stop = max(stop + length, lower) if stop < 0 else min(stop, upper)
return start, stop, step
def _detectEllipsis(self, item):
has_ellipsis = False
start = 0
end = len(self.shape)
for index, o in enumerate(item):
if o is Ellipsis:
if has_ellipsis:
raise ValueError("Index can have one ellipsis only.")
has_ellipsis = True
start = index
else:
if has_ellipsis:
end = index
return has_ellipsis, start, end
def _reconstructSliceinfo(self, item):
has_ellipsis, start, end = self._detectEllipsis(item)
if has_ellipsis:
newitem = []
for i in range(start):
newitem.append(item[i])
for i in range(start, end):
newitem.append(slice(None, None, None))
for i in range(end, len(item)):
newitem.append(item[i])
return newitem
else:
return None
def _detectContinuesSlice(self, item):
starts = []
ends = []
for index, o in enumerate(item):
if isinstance(o, int):
start = int(o)
if (index > 0 and index >= self.shape[index]) \
or (index < 0 and (index + self.shape[index]) < 0):
raise IndexError("invalid index")
start = max(start + self.shape[index], 0) if start < 0 else min(
start, self.shape[index])
starts.append(start)
ends.append(start + 1)
elif isinstance(o, slice):
start, stop, step = self._slice_indices(o, self.shape[index])
if step == 1 or step == -1:
starts.append(start)
ends.append(stop)
else:
return False, None
else:
raise IndexError("Valid index accept int or slice or ellipsis")
return True, [starts, ends]
def _cloneVar(self, copy=False):
if not copy:
return self.block.create_var(
name=unique_name.generate(".".join(self.name)),
dtype=self.dtype,
persistable=self.persistable,
stop_gradient=self._stop_gradient, )
else:
return self
def _sliceVar(self, axes, starts, ends):
new_var = self._cloneVar()
self.block.append_op(
type="slice",
inputs={'Input': [self]},
outputs={'Out': [new_var]},
attrs={'axes': axes,
'starts': starts,
'ends': ends})
return new_var
def _concatVar(self, inputs, axis):
new_var = self._cloneVar()
self.block.append_op(
type="concat",
inputs={'X': inputs},
outputs={'Out': [new_var]},
attrs={'axis': axis, })
return new_var
def _sliceAndConcatVar(self, item, axis):
if isinstance(item, slice):
if self.shape[axis] < 0:
return self._cloneVar(True)
start, stop, step = self._slice_indices(item, self.shape[axis])
if step == 1:
return self._sliceVar([axis], [start], [stop])
else:
vars = []
if step > 0:
while start < stop:
vars.append(
self._sliceVar([axis], [start], [start + 1]))
start += step
else:
while start > stop:
vars.append(
self._sliceVar([axis], [start], [start + 1]))
start += step
return self._concatVar(vars, axis)
elif isinstance(item, int):
if self.shape[axis] < 0:
return self._cloneVar(True)
index = int(item)
if (index > 0 and index >= self.shape[axis])\
or (index < 0 and (index + self.shape[axis]) < 0):
raise IndexError("invalid index")
return self._sliceVar([axis], [index], [index + 1])
else:
raise IndexError("Valid index accept int or slice or tuple")
def __getitem__(self, item):
"""
Slice the variable.
Args:
item(int/slice/tuple) : the index.
Returns:
Sliced variable
"""
new_var = None
if isinstance(item, tuple):
if len(item) > len(self.shape):
raise IndexError("Too many indexes")
newitem = self._reconstructSliceinfo(item) or item
check, info = self._detectContinuesSlice(newitem)
if check:
starts = info[0]
ends = info[1]
axes = [i for i in range(len(starts))]
return self._sliceVar(axes, starts, ends)
else:
new_var = self
for index, o in enumerate(newitem):
new_var = new_var._sliceAndConcatVar(o, index)
else:
new_var = self._sliceAndConcatVar(item, 0)
return new_var
def get_all_op_protos():
"""
......
......@@ -14,6 +14,7 @@
from __future__ import print_function
import paddle.fluid as fluid
import paddle.fluid.core as core
import unittest
import numpy
......@@ -183,6 +184,58 @@ class TestTensor(unittest.TestCase):
tensor_array = numpy.array(tensor)
self.assertEqual((0, 1), tensor_array.shape)
def run_sliece_tensor(self, place):
tensor = fluid.Tensor()
shape = [3, 3, 3]
tensor._set_dims(shape)
tensor_array = numpy.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
[[10, 11, 12], [13, 14, 15], [16, 17, 18]],
[[19, 20, 21], [22, 23, 24], [25, 26, 27]]])
tensor.set(tensor_array, place)
n1 = tensor[1]
t1 = tensor_array[1]
self.assertTrue((numpy.array(n1) == numpy.array(t1)).all())
n2 = tensor[1:]
t2 = tensor_array[1:]
self.assertTrue((numpy.array(n2) == numpy.array(t2)).all())
n3 = tensor[0:2:]
t3 = tensor_array[0:2:]
self.assertTrue((numpy.array(n3) == numpy.array(t3)).all())
n4 = tensor[2::-2]
t4 = tensor_array[2::-2]
self.assertTrue((numpy.array(n4) == numpy.array(t4)).all())
n5 = tensor[2::-2][0]
t5 = tensor_array[2::-2][0]
self.assertTrue((numpy.array(n5) == numpy.array(t5)).all())
n6 = tensor[2:-1:-1]
t6 = tensor_array[2:-1:-1]
self.assertTrue((numpy.array(n6) == numpy.array(t6)).all())
n7 = tensor[0:, 0:]
t7 = tensor_array[0:, 0:]
self.assertTrue((numpy.array(n7) == numpy.array(t7)).all())
n8 = tensor[0::1, 0::-1, 2:]
t8 = tensor_array[0::1, 0::-1, 2:]
self.assertTrue((numpy.array(n8) == numpy.array(t8)).all())
def test_sliece_tensor(self):
# run cpu first
place = core.CPUPlace()
self.run_sliece_tensor(place)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
self.run_sliece_tensor(place)
if __name__ == '__main__':
unittest.main()
......@@ -16,8 +16,10 @@ from __future__ import print_function
import unittest
from paddle.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_
import paddle.fluid as fluid
import paddle.fluid.core as core
import numpy as np
from test_imperative_base import new_program_scope
class TestVariable(unittest.TestCase):
......@@ -60,6 +62,100 @@ class TestVariable(unittest.TestCase):
name='step_scopes', type=core.VarDesc.VarType.STEP_SCOPES)
self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type)
def _test_slice(self):
b = default_main_program().current_block()
w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0)
for i in range(3):
nw = w[i]
self.assertEqual((1, 100, 100), nw.shape)
nw = w[:]
self.assertEqual((784, 100, 100), nw.shape)
nw = w[:, :, ...]
self.assertEqual((784, 100, 100), nw.shape)
nw = w[::2, ::2, :]
self.assertEqual((392, 50, 100), nw.shape)
nw = w[::-2, ::-2, :]
self.assertEqual((392, 50, 100), nw.shape)
self.assertEqual(0, nw.lod_level)
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
exe = fluid.Executor(place)
tensor_array = np.array(
[[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
[[10, 11, 12], [13, 14, 15], [16, 17, 18]],
[[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32')
var = fluid.layers.assign(tensor_array)
var1 = var[0, 1, 1]
var2 = var[1:]
var3 = var[0:1]
var4 = var[..., ]
var5 = var[2::-2]
var6 = var[1, 1:, 1:]
var7 = var[1, ..., 1:]
var8 = var[1, ...]
local_out = exe.run(main,
fetch_list=[
var, var1, var2, var3, var4, var5, var6,
var7, var8
])
self.assertTrue((np.array(local_out[1]) == np.array(tensor_array[
0, 1, 1])).all())
self.assertTrue((np.array(local_out[2]) == np.array(tensor_array[
1:])).all())
self.assertTrue((np.array(local_out[3]) == np.array(tensor_array[
0:1])).all())
self.assertTrue((np.array(local_out[4]) == np.array(
tensor_array[..., ])).all())
self.assertTrue((np.array(local_out[5]) == np.array(tensor_array[
2::-2])).all())
self.assertTrue((np.array(local_out[6]) == np.array(tensor_array[
1, 1:, 1:])).all())
self.assertTrue((np.array(local_out[7]) == np.array(tensor_array[
1, ..., 1:])).all())
self.assertTrue((np.array(local_out[8]) == np.array(tensor_array[
1, ...])).all())
def test_slice(self):
self._test_slice()
class TestVariableImperative(unittest.TestCase):
def _test_slice(self):
b = default_main_program().current_block()
w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0)
for i in range(3):
nw = w[i]
self.assertEqual([1, 100, 100], nw.shape)
nw = w[:]
self.assertEqual([784, 100, 100], nw.shape)
nw = w[:, :, :]
self.assertEqual([784, 100, 100], nw.shape)
nw = w[::2, ::2, :]
self.assertEqual([392, 50, 100], nw.shape)
nw = w[::-2, ::-2, :]
self.assertEqual([392, 50, 100], nw.shape)
nw = w[0::-2, 0::-2, :]
self.assertEqual([1, 1, 100], nw.shape)
def test_slice(self):
with fluid.imperative.guard():
self._test_slice()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册