From 3304e3454d7492ca87fae235a62ce3ae68441455 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <72954905+Asthestarsfalll@users.noreply.github.com> Date: Fri, 9 Sep 2022 16:50:50 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90PaddlePaddle=20Hackathon=203=20No.20?= =?UTF-8?q?=E3=80=91=E4=B8=BA=20Paddle=20=E6=96=B0=E5=A2=9E=20vsplit=20API?= =?UTF-8?q?=20(#44853)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add paddle vsplit api * update unittest and fix a typo * update * add vsplit to __all__ * update unit test and description of x * fix typo --- python/paddle/__init__.py | 2 + .../fluid/tests/unittests/test_splits_api.py | 168 ++++++++++++++++++ python/paddle/tensor/__init__.py | 2 + python/paddle/tensor/manipulation.py | 42 +++++ 4 files changed, 214 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_splits_api.py diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index dc55260f2ce..4e983f43723 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -165,6 +165,7 @@ from .tensor.manipulation import shard_index # noqa: F401 from .tensor.manipulation import slice # noqa: F401 from .tensor.manipulation import crop # noqa: F401 from .tensor.manipulation import split # noqa: F401 +from .tensor.manipulation import vsplit # noqa: F401 from .tensor.manipulation import squeeze # noqa: F401 from .tensor.manipulation import squeeze_ # noqa: F401 from .tensor.manipulation import stack # noqa: F401 @@ -457,6 +458,7 @@ __all__ = [ # noqa 'searchsorted', 'bucketize', 'split', + 'vsplit', 'logical_and', 'full_like', 'less_than', diff --git a/python/paddle/fluid/tests/unittests/test_splits_api.py b/python/paddle/fluid/tests/unittests/test_splits_api.py new file mode 100644 index 00000000000..4b6254e266b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_splits_api.py @@ -0,0 +1,168 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + + +def func_ref(func, x, num_or_sections): + # Convert the num_or_sections in paddle to indices_or_sections in numpy + # Do not support -1 + if isinstance(num_or_sections, int): + indices_or_sections = num_or_sections + else: + indices_or_sections = np.cumsum(num_or_sections)[:-1] + return func(x, indices_or_sections) + + +# TODO: add other split API, such as dsplit、hsplit +test_list = [ + (paddle.vsplit, np.vsplit), +] + + +class TestSplitsAPI(unittest.TestCase): + + def setUp(self): + self.rtol = 1e-5 + self.atol = 1e-8 + self.set_input() + + def set_input(self): + self.shape = [4, 5, 2] + self.num_or_sections = 2 + self.x_np = np.random.uniform(-1, 1, self.shape).astype('float64') + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + paddle.enable_static() + for func, func_type in test_list: + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = func(x, self.num_or_sections) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = func_ref(func_type, self.x_np, self.num_or_sections) + for n, p in zip(out_ref, res): + np.testing.assert_allclose(n, p, rtol=self.rtol, atol=self.atol) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + for func, func_type in test_list: + out = func(x, self.num_or_sections) + out_ref = func_ref(func_type, self.x_np, self.num_or_sections) + for n, p in zip(out_ref, out): + np.testing.assert_allclose(n, + p.numpy(), + rtol=self.rtol, + atol=self.atol) + paddle.enable_static() + + +class TestSplitsSections(TestSplitsAPI): + """ + Test num_or_sections which is a list and date type is float64. + """ + + def set_input(self): + self.shape = [6, 2, 4] + self.num_or_sections = [2, 1, 3] + self.x_np = np.random.uniform(-1, 1, self.shape).astype('float64') + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + +class TestSplitsFloat32(TestSplitsAPI): + """ + Test num_or_sections which is an integer and data type is float32. + """ + + def set_input(self): + self.shape = [2, 3, 4] + self.num_or_sections = 2 + self.x_np = np.random.uniform(-1, 1, self.shape).astype('float32') + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + +class TestSplitsInt32(TestSplitsAPI): + """ + Test data type int32. + """ + + def set_input(self): + self.shape = [5, 1, 2] + self.num_or_sections = 5 + self.x_np = np.random.uniform(-1, 1, self.shape).astype('int32') + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + +class TestSplitsInt64(TestSplitsAPI): + """ + Test data type int64. + """ + + def set_input(self): + self.shape = [4, 3, 2] + self.num_or_sections = 2 + self.x_np = np.random.uniform(-1, 1, self.shape).astype('int64') + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + +class TestSplitsCPU(TestSplitsAPI): + """ + Test cpu place and num_or_sections which is a tuple. + """ + + def set_input(self): + self.shape = [8, 2, 3, 5] + self.num_or_sections = (2, 3, 3) + self.x_np = np.random.uniform(-1, 1, self.shape).astype('float64') + self.place = paddle.CPUPlace() + + +class TestSplitsError(unittest.TestCase): + """ + Test the situation that input shape less than 2. + """ + + def setUp(self): + self.num_or_sections = 1 + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_error(self): + paddle.enable_static() + for func, _ in test_list: + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.fluid.data('X', [5], 'float32') + self.assertRaises(ValueError, func, x, self.num_or_sections) + + def test_dygraph_error(self): + paddle.disable_static(self.place) + for func, _ in test_list: + x_np = np.random.randn(2) + x = paddle.to_tensor(x_np, dtype='float64') + self.assertRaises(ValueError, func, x, self.num_or_sections) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 58dfa26cfe3..ba7dd5d0cec 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -106,6 +106,7 @@ from .manipulation import scatter_nd # noqa: F401 from .manipulation import shard_index # noqa: F401 from .manipulation import slice # noqa: F401 from .manipulation import split # noqa: F401 +from .manipulation import vsplit # noqa: F401 from .manipulation import squeeze # noqa: F401 from .manipulation import squeeze_ # noqa: F401 from .manipulation import stack # noqa: F401 @@ -428,6 +429,7 @@ tensor_method_func = [ # noqa 'shard_index', 'slice', 'split', + 'vsplit', 'chunk', 'tensordot', 'squeeze', diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 5e05a93e905..d3dcb60ec5c 100755 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -1926,6 +1926,48 @@ def split(x, num_or_sections, axis=0, name=None): return outs +def vsplit(x, num_or_sections, name=None): + """ + Split the input tensor into multiple sub-Tensors along the vertical axis, which is equivalent to ``paddle.split`` with ``axis=0``. + + Args: + x (Tensor): A Tensor whose dimension must be greater than 1. The data type is bool, float16, float32, float64, uint8, int8, int32 or int64. + num_or_sections (int|list|tuple): If ``num_or_sections`` is an int, then ``num_or_sections`` + indicates the number of equal sized sub-Tensors that the ``x`` will be divided into. + If ``num_or_sections`` is a list or tuple, the length of it indicates the number of + sub-Tensors and the elements in it indicate the sizes of sub-Tensors' dimension orderly. + The length of the list must not be larger than the ``x`` 's size of axis 0. + name (str, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . + Returns: + list[Tensor], The list of segmented Tensors. + + Example: + .. code-block:: python + + import paddle + + # x is a Tensor of shape [8, 6, 7] + x = paddle.rand([8, 6, 7]) + out0, out1, out2 = paddle.vsplit(x, num_or_sections=2) + print(out0.shape) # [4, 6, 7] + print(out1.shape) # [4, 6, 7] + out0, out1, out2 = paddle.vsplit(x, num_or_sections=[1, 3, 4]) + print(out0.shape) # [1, 6, 7] + print(out1.shape) # [3, 6, 7] + print(out2.shape) # [4, 6, 7] + out0, out1, out2 = paddle.vsplit(x, num_or_sections=[2, 3, -1]) + print(out0.shape) # [2, 6, 7] + print(out1.shape) # [3, 6, 7] + print(out2.shape) # [3, 6, 7] + """ + if x.ndim < 2: + raise ValueError( + "The input tensor's dimension must be greater than 1, but got {}". + format(x.ndim)) + return split(x, num_or_sections, axis=0, name=name) + + def squeeze(x, axis=None, name=None): """ Squeeze the dimension(s) of size 1 of input tensor x's shape. -- GitLab