From c300b1ba699952b501207957f951f93ff481e6ae Mon Sep 17 00:00:00 2001 From: wopeizl Date: Wed, 27 Mar 2019 15:57:37 +0800 Subject: [PATCH] Tensor index (#16223) * extend the slice function for python test=develop --- paddle/fluid/pybind/pybind.cc | 10 +- paddle/fluid/pybind/tensor_py.h | 253 ++++++++++++++++++ python/paddle/fluid/framework.py | 177 ++++++++++++ .../fluid/tests/unittests/test_tensor.py | 53 ++++ .../fluid/tests/unittests/test_variable.py | 96 +++++++ 5 files changed, 588 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b703cc1a34..d15eadcf30 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -347,7 +347,8 @@ PYBIND11_MODULE(core, m) { .def("_set_double_element", TensorSetElement) .def("_get_double_element", TensorGetElement) .def("_place", [](Tensor &self) { return self.place(); }) - .def("_dtype", [](Tensor &self) { return self.type(); }); + .def("_dtype", [](Tensor &self) { return self.type(); }) + .def("__getitem__", PySliceTensor, py::return_value_policy::reference); py::class_(m, "LoDTensor", R"DOC( LoDTensor is a Tensor with optional LoD information. @@ -499,6 +500,13 @@ PYBIND11_MODULE(core, m) { Returns: out (bool): whether the lod is valid. + )DOC") + .def("__getitem__", PySliceTensor, py::return_value_policy::reference, + R"DOC( + Slice the original Tensor, and remove the LoD information. + + Returns: + out (Tensor): new Tensor(NOT LoDTensor). )DOC"); py::class_(m, "SelectedRows") diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index ecdc8f3dc7..4a780f1cb5 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -14,16 +14,22 @@ limitations under the License. */ #pragma once #include +#include +#include #include #include #include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/math/concat_and_split.h" +#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" +namespace py = pybind11; + namespace paddle { namespace pybind { namespace details { @@ -191,6 +197,253 @@ inline void PyCPUTensorSetFromArray( std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size()); } +template +void _sliceCompute(const framework::Tensor *in, framework::Tensor *out, + const platform::CPUDeviceContext &ctx, + const std::vector &axes, + const std::vector &starts) { + auto &eigen_place = *ctx.eigen_device(); + auto place = in->place(); + auto out_dims = out->dims(); + auto in_dims = in->dims(); + + auto offsets = Eigen::array(); + auto extents = Eigen::array(); + for (size_t i = 0; i < D; ++i) { + offsets[i] = 0; + extents[i] = out_dims[i]; + } + int start; + for (size_t i = 0; i < axes.size(); ++i) { + start = starts[i]; + if (start < 0) { + start = (start + in_dims[axes[i]]); + } + start = std::max(start, 0); + offsets[axes[i]] = start; + } + auto in_t = + framework::EigenTensor::From( + *in); + auto out_t = + framework::EigenTensor::From( + *out); + out_t.device(eigen_place) = in_t.slice(offsets, extents); +} + +template +void _concatCompute(const std::vector &ins, + paddle::framework::Tensor *out, + const platform::CPUDeviceContext &ctx, int64_t axis) { + if (axis == 0 && ins.size() < 10) { + size_t output_offset = 0; + for (auto &in : ins) { + auto in_stride = framework::stride_numel(in.dims()); + auto out_stride = framework::stride_numel(out->dims()); + paddle::operators::StridedNumelCopyWithAxis( + ctx, axis, out->data() + output_offset, out_stride, in.data(), + in_stride, in_stride[axis]); + output_offset += in_stride[axis]; + } + } else { + paddle::operators::math::ConcatFunctor + concat_functor; + concat_functor(ctx, ins, static_cast(axis), out); + } +} + +void _getSliceinfo(const framework::Tensor &self, py::object obj, + const int64_t dim, int64_t *pstart, int64_t *pstop, + int64_t *pstep, int64_t *pslicelength) { + auto &start = *pstart; + auto &stop = *pstop; + auto &step = *pstep; + auto &slicelength = *pslicelength; + const framework::DDim &srcDDim = self.dims(); + if (dim < 0 || dim >= srcDDim.size()) { + throw py::index_error(); + } + if (py::isinstance(obj)) { + size_t lstart, lstop, lstep, lslicelength; + py::slice s = static_cast(obj); + if (!s.compute(srcDDim[dim], &lstart, &lstop, &lstep, &lslicelength)) { + throw py::index_error(); + } + start = static_cast(lstart); + stop = static_cast(lstop); + step = static_cast(lstep); + slicelength = static_cast(lslicelength); + } else if (py::isinstance(obj)) { + start = static_cast(static_cast(obj)); + if (std::abs(start) >= srcDDim[dim]) { + throw py::index_error(); + } + start = (start >= 0) ? start : srcDDim[dim] - start; + stop = start + 1; + step = 1; + slicelength = 1; + } else { + throw py::index_error(); + } +} + +inline framework::Tensor *_getTensor(const framework::Tensor &self, + const framework::DDim &ddim) { + framework::Tensor *output = new framework::Tensor(); + output->Resize(ddim); + auto place = self.place(); + if (platform::is_cpu_place(place)) { + output->mutable_data(boost::get(place), self.type()); +#ifdef PADDLE_WITH_CUDA + } else { + if (platform::is_cuda_pinned_place(place)) { + output->mutable_data(boost::get(place), + self.type()); + } else if ((platform::is_gpu_place(place))) { + output->mutable_data(boost::get(place), self.type()); + } +#endif + } + return output; +} + +template +void _sliceDapper(const framework::Tensor *in, framework::Tensor *out, + const platform::CPUDeviceContext &ctx, + const std::vector &axes, const std::vector &starts, + int size) { + switch (size) { + case 1: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 2: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 3: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 4: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 5: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 6: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 7: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 8: + _sliceCompute(in, out, ctx, axes, starts); + break; + case 9: + _sliceCompute(in, out, ctx, axes, starts); + break; + default: + PADDLE_THROW("dim size not exepected, current is %d", size); + break; + } +} + +template +inline framework::Tensor *_sliceWrapper(const framework::Tensor &self, + const platform::CPUDeviceContext &ctx, + py::object obj, int dim, int64_t start, + int64_t slicelength) { + framework::DDim dstDDim = self.dims(); + dstDDim[dim] = static_cast(slicelength); + std::vector axes({dim}); + std::vector starts({static_cast(start)}); + framework::Tensor *output = _getTensor(self, dstDDim); + _sliceDapper(&self, output, ctx, axes, starts, dstDDim.size()); + return output; +} + +template +inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self, + py::object obj, int dim) { + platform::CPUDeviceContext ctx; + int64_t start, stop, step, slicelength; + _getSliceinfo(self, obj, dim, &start, &stop, &step, &slicelength); + if (step == 1 || slicelength == 1) { + return _sliceWrapper(self, ctx, obj, dim, start, slicelength); + } else { + std::vector ins; + for (auto i = 0; i < slicelength; ++i, start += step) { + ins.emplace_back(*_sliceWrapper(self, ctx, obj, dim, start, 1)); + } + + // do the concat operation + framework::DDim dstDDim = self.dims(); + dstDDim[dim] = static_cast(slicelength); + framework::Tensor *output1 = _getTensor(self, dstDDim); + _concatCompute(ins, output1, ctx, dim); + return output1; + } +} + +inline framework::Tensor *_sliceTensor(const framework::Tensor &self, + py::object obj, int dim) { + auto src_type = self.type(); + switch (src_type) { + case framework::proto::VarType::FP16: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::FP32: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::FP64: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::INT32: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::INT64: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::BOOL: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::INT16: + return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::UINT8: + return _sliceAndConcat(self, obj, dim); + default: + PADDLE_THROW("Not support type %d", src_type); + } +} + +inline framework::Tensor *_pySliceTensor(const framework::Tensor &self, + py::object obj) { + if (py::isinstance(obj)) { + py::list l = static_cast(obj); + std::unique_ptr target; + framework::Tensor *src = const_cast(&self); + for (auto i = 0; i < static_cast(l.size()); ++i) { + src = _sliceTensor(*src, l[i], i); + if (i + 1 == static_cast(l.size())) { + return src; + } else { + target.reset(src); + } + } + return nullptr; + } else { + return _sliceTensor(self, obj, 0); + } +} + +inline framework::Tensor *PySliceTensor(const framework::Tensor &self, + py::object obj) { + if (platform::is_gpu_place(self.place())) { + std::unique_ptr holder; + framework::Tensor src; + framework::TensorCopySync(self, platform::CPUPlace(), &src); + framework::Tensor *output = _pySliceTensor(src, obj); + holder.reset(output); + framework::Tensor *dst = _getTensor(*output, output->dims()); + framework::TensorCopySync(*output, self.place(), dst); + return dst; + } else { + return _pySliceTensor(self, obj); + } +} + #ifdef PADDLE_WITH_CUDA template void PyCUDATensorSetFromArray( diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 3f71247630..85e1916a3a 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -627,6 +627,183 @@ class Variable(object): """ self.error_clip = error_clip + def _slice_indices(self, slice, length): + """ + Reference implementation for the slice.indices method. + """ + # Compute step and length as integers. + step = 1 if slice.step is None else slice.step + + # Raise ValueError for negative length or zero step. + if length < 0: + raise ValueError("length should not be negative") + if step == 0: + raise ValueError("slice step cannot be zero") + + # Find lower and upper bounds for start and stop. + lower = -1 if step < 0 else 0 + upper = length - 1 if step < 0 else length + + # Compute start. + if slice.start is None: + start = upper if step < 0 else lower + else: + start = slice.start + start = max(start + length, lower) if start < 0 else min(start, + upper) + + # Compute stop. + if slice.stop is None: + stop = lower if step < 0 else upper + else: + stop = slice.stop + stop = max(stop + length, lower) if stop < 0 else min(stop, upper) + + return start, stop, step + + def _detectEllipsis(self, item): + has_ellipsis = False + start = 0 + end = len(self.shape) + for index, o in enumerate(item): + if o is Ellipsis: + if has_ellipsis: + raise ValueError("Index can have one ellipsis only.") + has_ellipsis = True + start = index + else: + if has_ellipsis: + end = index + return has_ellipsis, start, end + + def _reconstructSliceinfo(self, item): + has_ellipsis, start, end = self._detectEllipsis(item) + if has_ellipsis: + newitem = [] + for i in range(start): + newitem.append(item[i]) + for i in range(start, end): + newitem.append(slice(None, None, None)) + for i in range(end, len(item)): + newitem.append(item[i]) + return newitem + else: + return None + + def _detectContinuesSlice(self, item): + starts = [] + ends = [] + for index, o in enumerate(item): + if isinstance(o, int): + start = int(o) + if (index > 0 and index >= self.shape[index]) \ + or (index < 0 and (index + self.shape[index]) < 0): + raise IndexError("invalid index") + start = max(start + self.shape[index], 0) if start < 0 else min( + start, self.shape[index]) + starts.append(start) + ends.append(start + 1) + elif isinstance(o, slice): + start, stop, step = self._slice_indices(o, self.shape[index]) + if step == 1 or step == -1: + starts.append(start) + ends.append(stop) + else: + return False, None + else: + raise IndexError("Valid index accept int or slice or ellipsis") + return True, [starts, ends] + + def _cloneVar(self, copy=False): + if not copy: + return self.block.create_var( + name=unique_name.generate(".".join(self.name)), + dtype=self.dtype, + persistable=self.persistable, + stop_gradient=self._stop_gradient, ) + else: + return self + + def _sliceVar(self, axes, starts, ends): + new_var = self._cloneVar() + self.block.append_op( + type="slice", + inputs={'Input': [self]}, + outputs={'Out': [new_var]}, + attrs={'axes': axes, + 'starts': starts, + 'ends': ends}) + return new_var + + def _concatVar(self, inputs, axis): + new_var = self._cloneVar() + self.block.append_op( + type="concat", + inputs={'X': inputs}, + outputs={'Out': [new_var]}, + attrs={'axis': axis, }) + return new_var + + def _sliceAndConcatVar(self, item, axis): + if isinstance(item, slice): + if self.shape[axis] < 0: + return self._cloneVar(True) + start, stop, step = self._slice_indices(item, self.shape[axis]) + if step == 1: + return self._sliceVar([axis], [start], [stop]) + else: + vars = [] + if step > 0: + while start < stop: + vars.append( + self._sliceVar([axis], [start], [start + 1])) + start += step + else: + while start > stop: + vars.append( + self._sliceVar([axis], [start], [start + 1])) + start += step + return self._concatVar(vars, axis) + elif isinstance(item, int): + if self.shape[axis] < 0: + return self._cloneVar(True) + index = int(item) + if (index > 0 and index >= self.shape[axis])\ + or (index < 0 and (index + self.shape[axis]) < 0): + raise IndexError("invalid index") + return self._sliceVar([axis], [index], [index + 1]) + else: + raise IndexError("Valid index accept int or slice or tuple") + + def __getitem__(self, item): + """ + Slice the variable. + + Args: + item(int/slice/tuple) : the index. + + Returns: + Sliced variable + """ + new_var = None + if isinstance(item, tuple): + if len(item) > len(self.shape): + raise IndexError("Too many indexes") + newitem = self._reconstructSliceinfo(item) or item + check, info = self._detectContinuesSlice(newitem) + if check: + starts = info[0] + ends = info[1] + axes = [i for i in range(len(starts))] + return self._sliceVar(axes, starts, ends) + else: + new_var = self + for index, o in enumerate(newitem): + new_var = new_var._sliceAndConcatVar(o, index) + else: + new_var = self._sliceAndConcatVar(item, 0) + return new_var + def get_all_op_protos(): """ diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py index 1822957c23..3c974ea460 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor.py @@ -14,6 +14,7 @@ from __future__ import print_function +import paddle.fluid as fluid import paddle.fluid.core as core import unittest import numpy @@ -183,6 +184,58 @@ class TestTensor(unittest.TestCase): tensor_array = numpy.array(tensor) self.assertEqual((0, 1), tensor_array.shape) + def run_sliece_tensor(self, place): + + tensor = fluid.Tensor() + shape = [3, 3, 3] + tensor._set_dims(shape) + + tensor_array = numpy.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]) + + tensor.set(tensor_array, place) + n1 = tensor[1] + t1 = tensor_array[1] + self.assertTrue((numpy.array(n1) == numpy.array(t1)).all()) + + n2 = tensor[1:] + t2 = tensor_array[1:] + self.assertTrue((numpy.array(n2) == numpy.array(t2)).all()) + + n3 = tensor[0:2:] + t3 = tensor_array[0:2:] + self.assertTrue((numpy.array(n3) == numpy.array(t3)).all()) + + n4 = tensor[2::-2] + t4 = tensor_array[2::-2] + self.assertTrue((numpy.array(n4) == numpy.array(t4)).all()) + + n5 = tensor[2::-2][0] + t5 = tensor_array[2::-2][0] + self.assertTrue((numpy.array(n5) == numpy.array(t5)).all()) + + n6 = tensor[2:-1:-1] + t6 = tensor_array[2:-1:-1] + self.assertTrue((numpy.array(n6) == numpy.array(t6)).all()) + + n7 = tensor[0:, 0:] + t7 = tensor_array[0:, 0:] + self.assertTrue((numpy.array(n7) == numpy.array(t7)).all()) + + n8 = tensor[0::1, 0::-1, 2:] + t8 = tensor_array[0::1, 0::-1, 2:] + self.assertTrue((numpy.array(n8) == numpy.array(t8)).all()) + + def test_sliece_tensor(self): + # run cpu first + place = core.CPUPlace() + self.run_sliece_tensor(place) + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + self.run_sliece_tensor(place) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index 4f3c26ca7b..076ee3baf9 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -16,8 +16,10 @@ from __future__ import print_function import unittest from paddle.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_ +import paddle.fluid as fluid import paddle.fluid.core as core import numpy as np +from test_imperative_base import new_program_scope class TestVariable(unittest.TestCase): @@ -60,6 +62,100 @@ class TestVariable(unittest.TestCase): name='step_scopes', type=core.VarDesc.VarType.STEP_SCOPES) self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type) + def _test_slice(self): + b = default_main_program().current_block() + w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) + + for i in range(3): + nw = w[i] + self.assertEqual((1, 100, 100), nw.shape) + + nw = w[:] + self.assertEqual((784, 100, 100), nw.shape) + + nw = w[:, :, ...] + self.assertEqual((784, 100, 100), nw.shape) + + nw = w[::2, ::2, :] + self.assertEqual((392, 50, 100), nw.shape) + + nw = w[::-2, ::-2, :] + self.assertEqual((392, 50, 100), nw.shape) + + self.assertEqual(0, nw.lod_level) + + place = fluid.CPUPlace() + main = fluid.Program() + with fluid.program_guard(main): + exe = fluid.Executor(place) + tensor_array = np.array( + [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32') + var = fluid.layers.assign(tensor_array) + var1 = var[0, 1, 1] + var2 = var[1:] + var3 = var[0:1] + var4 = var[..., ] + var5 = var[2::-2] + var6 = var[1, 1:, 1:] + var7 = var[1, ..., 1:] + var8 = var[1, ...] + local_out = exe.run(main, + fetch_list=[ + var, var1, var2, var3, var4, var5, var6, + var7, var8 + ]) + + self.assertTrue((np.array(local_out[1]) == np.array(tensor_array[ + 0, 1, 1])).all()) + self.assertTrue((np.array(local_out[2]) == np.array(tensor_array[ + 1:])).all()) + self.assertTrue((np.array(local_out[3]) == np.array(tensor_array[ + 0:1])).all()) + self.assertTrue((np.array(local_out[4]) == np.array( + tensor_array[..., ])).all()) + self.assertTrue((np.array(local_out[5]) == np.array(tensor_array[ + 2::-2])).all()) + self.assertTrue((np.array(local_out[6]) == np.array(tensor_array[ + 1, 1:, 1:])).all()) + self.assertTrue((np.array(local_out[7]) == np.array(tensor_array[ + 1, ..., 1:])).all()) + self.assertTrue((np.array(local_out[8]) == np.array(tensor_array[ + 1, ...])).all()) + + def test_slice(self): + self._test_slice() + + +class TestVariableImperative(unittest.TestCase): + def _test_slice(self): + b = default_main_program().current_block() + w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) + + for i in range(3): + nw = w[i] + self.assertEqual([1, 100, 100], nw.shape) + + nw = w[:] + self.assertEqual([784, 100, 100], nw.shape) + + nw = w[:, :, :] + self.assertEqual([784, 100, 100], nw.shape) + + nw = w[::2, ::2, :] + self.assertEqual([392, 50, 100], nw.shape) + + nw = w[::-2, ::-2, :] + self.assertEqual([392, 50, 100], nw.shape) + + nw = w[0::-2, 0::-2, :] + self.assertEqual([1, 1, 100], nw.shape) + + def test_slice(self): + with fluid.imperative.guard(): + self._test_slice() + if __name__ == '__main__': unittest.main() -- GitLab