From 4b683887cd6b6d04cb90e83fa3f7557bac5950d2 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Thu, 16 Sep 2021 20:36:22 +0800 Subject: [PATCH] Add segment apis to paddle.incubate (#35759) --- .../fluid/tests/unittests/test_segment_ops.py | 62 ++++- python/paddle/incubate/__init__.py | 15 +- python/paddle/incubate/tensor/__init__.py | 25 ++ python/paddle/incubate/tensor/math.py | 225 ++++++++++++++++++ python/setup.py.in | 1 + 5 files changed, 325 insertions(+), 3 deletions(-) create mode 100644 python/paddle/incubate/tensor/__init__.py create mode 100644 python/paddle/incubate/tensor/math.py diff --git a/python/paddle/fluid/tests/unittests/test_segment_ops.py b/python/paddle/fluid/tests/unittests/test_segment_ops.py index b58d66676b..e2aadbedbd 100644 --- a/python/paddle/fluid/tests/unittests/test_segment_ops.py +++ b/python/paddle/fluid/tests/unittests/test_segment_ops.py @@ -15,8 +15,11 @@ from __future__ import print_function import unittest -import numpy as np import sys + +import numpy as np +import paddle + from op_test import OpTest @@ -198,5 +201,62 @@ class TestSegmentMean2(TestSegmentMean): self.attrs = {'pooltype': "MEAN"} +class API_SegmentOpsTest(unittest.TestCase): + def test_static(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data(name="x", shape=[3, 3], dtype="float32") + y = paddle.static.data(name='y', shape=[3], dtype='int32') + + res_sum = paddle.incubate.segment_sum(x, y) + res_mean = paddle.incubate.segment_mean(x, y) + res_max = paddle.incubate.segment_max(x, y) + res_min = paddle.incubate.segment_min(x, y) + + exe = paddle.static.Executor(paddle.CPUPlace()) + data1 = np.array([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + data2 = np.array([0, 0, 1], dtype="int32") + + np_sum = np.array([[4, 4, 4], [4, 5, 6]], dtype="float32") + np_mean = np.array([[2, 2, 2], [4, 5, 6]], dtype="float32") + np_max = np.array([[3, 2, 3], [4, 5, 6]], dtype="float32") + np_min = np.array([[1, 2, 1], [4, 5, 6]], dtype="float32") + + ret = exe.run(feed={'x': data1, + 'y': data2}, + fetch_list=[res_sum, res_mean, res_max, res_min]) + + for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): + self.assertTrue( + np.allclose( + np_res, ret_res, atol=1e-6), + "two value is\ + {}\n{}, check diff!".format(np_res, ret_res)) + + def test_dygraph(self): + device = paddle.CPUPlace() + with paddle.fluid.dygraph.guard(device): + x = paddle.to_tensor( + [[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + y = paddle.to_tensor([0, 0, 1], dtype="int32") + res_sum = paddle.incubate.segment_sum(x, y) + res_mean = paddle.incubate.segment_mean(x, y) + res_max = paddle.incubate.segment_max(x, y) + res_min = paddle.incubate.segment_min(x, y) + + np_sum = np.array([[4, 4, 4], [4, 5, 6]], dtype="float32") + np_mean = np.array([[2, 2, 2], [4, 5, 6]], dtype="float32") + np_max = np.array([[3, 2, 3], [4, 5, 6]], dtype="float32") + np_min = np.array([[1, 2, 1], [4, 5, 6]], dtype="float32") + + ret = [res_sum, res_mean, res_max, res_min] + + for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): + self.assertTrue( + np.allclose( + np_res, ret_res.numpy(), atol=1e-6), + "two value is\ + {}\n{}, check diff!".format(np_res, ret_res)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/incubate/__init__.py b/python/paddle/incubate/__init__.py index efaeda2720..644b934814 100644 --- a/python/paddle/incubate/__init__.py +++ b/python/paddle/incubate/__init__.py @@ -18,7 +18,18 @@ from .checkpoint import auto_checkpoint # noqa: F401 from ..fluid.layer_helper import LayerHelper # noqa: F401 from .operators import softmax_mask_fuse_upper_triangle # noqa: F401 from .operators import softmax_mask_fuse # noqa: F401 +from .tensor import segment_sum +from .tensor import segment_mean +from .tensor import segment_max +from .tensor import segment_min -__all__ = [ # noqa - 'LookAhead', 'ModelAverage', 'softmax_mask_fuse_upper_triangle', 'softmax_mask_fuse' +__all__ = [ + 'LookAhead', + 'ModelAverage', + 'softmax_mask_fuse_upper_triangle', + 'softmax_mask_fuse', + 'segment_sum', + 'segment_mean', + 'segment_max', + 'segment_min', ] diff --git a/python/paddle/incubate/tensor/__init__.py b/python/paddle/incubate/tensor/__init__.py new file mode 100644 index 0000000000..ea1018409a --- /dev/null +++ b/python/paddle/incubate/tensor/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .math import segment_sum +from .math import segment_mean +from .math import segment_max +from .math import segment_min + +__all__ = [ + 'segment_sum', + 'segment_mean', + 'segment_max', + 'segment_min', +] diff --git a/python/paddle/incubate/tensor/math.py b/python/paddle/incubate/tensor/math.py new file mode 100644 index 0000000000..f3cb8d5051 --- /dev/null +++ b/python/paddle/incubate/tensor/math.py @@ -0,0 +1,225 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + 'segment_sum', + 'segment_mean', + 'segment_max', + 'segment_min', +] + +import paddle + +from paddle.fluid.layer_helper import LayerHelper, in_dygraph_mode +from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle import _C_ops + + +def segment_sum(data, segment_ids, name=None): + """ + Segment Sum Operator. + + This operator sums the elements of input `data` which with + the same index in `segment_ids`. + It computes a tensor such that $out_i = \\sum_{j} data_{j}$ + where sum is over j such that `segment_ids[j] == i`. + + Args: + data (Tensor): A tensor, available data type float32, float64. + segment_ids (Tensor): A 1-D tensor, which have the same size + with the first dimension of input data. + Available data type is int32, int64. + Returns: + output (Tensor): the reduced result. + + Examples: + + .. code-block:: python + + import paddle + data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32') + out = paddle.incubate.segment_sum(data, segment_ids) + #Outputs: [[4., 4., 4.], [4., 5., 6.]] + + """ + if in_dygraph_mode(): + out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "SUM") + return out + + check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool") + check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), + "segment_pool") + + helper = LayerHelper("segment_sum", **locals()) + out = helper.create_variable_for_type_inference(dtype=data.dtype) + summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) + helper.append_op( + type="segment_pool", + inputs={"X": data, + "SegmentIds": segment_ids}, + outputs={"Out": out, + "SummedIds": summed_ids}, + attrs={"pooltype": "SUM"}) + return out + + +def segment_mean(data, segment_ids, name=None): + """ + Segment mean Operator. + + Ihis operator calculate the mean value of input `data` which + with the same index in `segment_ids`. + It computes a tensor such that $out_i = \\frac{1}{n_i} \\sum_{j} data[j]$ + where sum is over j such that 'segment_ids[j] == i' and $n_i$ is the number + of all index 'segment_ids[j] == i'. + + Args: + data (tensor): a tensor, available data type float32, float64. + segment_ids (tensor): a 1-d tensor, which have the same size + with the first dimension of input data. + available data type is int32, int64. + + Returns: + output (Tensor): the reduced result. + + Examples: + + .. code-block:: python + + import paddle + data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32') + out = paddle.incubate.segment_mean(data, segment_ids) + #Outputs: [[2., 2., 2.], [4., 5., 6.]] + + """ + if in_dygraph_mode(): + out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MEAN") + return out + + check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool") + check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), + "segment_pool") + + helper = LayerHelper("segment_mean", **locals()) + out = helper.create_variable_for_type_inference(dtype=data.dtype) + summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) + helper.append_op( + type="segment_pool", + inputs={"X": data, + "SegmentIds": segment_ids}, + outputs={"Out": out, + "SummedIds": summed_ids}, + attrs={"pooltype": "MEAN"}) + return out + + +def segment_min(data, segment_ids, name=None): + """ + Segment min operator. + + This operator calculate the minimum elements of input `data` which with + the same index in `segment_ids`. + It computes a tensor such that $out_i = \\min_{j} data_{j}$ + where min is over j such that `segment_ids[j] == i`. + + Args: + data (tensor): a tensor, available data type float32, float64. + segment_ids (tensor): a 1-d tensor, which have the same size + with the first dimension of input data. + available data type is int32, int64. + Returns: + output (Tensor): the reduced result. + + Examples: + + .. code-block:: python + + import paddle + data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32') + out = paddle.incubate.segment_min(data, segment_ids) + #Outputs: [[1., 2., 1.], [4., 5., 6.]] + + """ + if in_dygraph_mode(): + out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MIN") + return out + + check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool") + check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), + "segment_pool") + + helper = LayerHelper("segment_min", **locals()) + out = helper.create_variable_for_type_inference(dtype=data.dtype) + summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) + helper.append_op( + type="segment_pool", + inputs={"X": data, + "SegmentIds": segment_ids}, + outputs={"Out": out, + "SummedIds": summed_ids}, + attrs={"pooltype": "MIN"}) + return out + + +def segment_max(data, segment_ids, name=None): + """ + Segment max operator. + + This operator calculate the maximum elements of input `data` which with + the same index in `segment_ids`. + It computes a tensor such that $out_i = \\min_{j} data_{j}$ + where max is over j such that `segment_ids[j] == i`. + + Args: + data (tensor): a tensor, available data type float32, float64. + segment_ids (tensor): a 1-d tensor, which have the same size + with the first dimension of input data. + available data type is int32, int64. + + Returns: + output (Tensor): the reduced result. + + Examples: + + .. code-block:: python + + import paddle + data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32') + out = paddle.incubate.segment_max(data, segment_ids) + #Outputs: [[3., 2., 3.], [4., 5., 6.]] + + """ + if in_dygraph_mode(): + out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MAX") + return out + + check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool") + check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), + "segment_pool") + + helper = LayerHelper("segment_max", **locals()) + out = helper.create_variable_for_type_inference(dtype=data.dtype) + summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) + helper.append_op( + type="segment_pool", + inputs={"X": data, + "SegmentIds": segment_ids}, + outputs={"Out": out, + "SummedIds": summed_ids}, + attrs={"pooltype": "MAX"}) + return out diff --git a/python/setup.py.in b/python/setup.py.in index 6d3e6201dc..1b2897f230 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -162,6 +162,7 @@ packages=['paddle', 'paddle.incubate.optimizer', 'paddle.incubate.checkpoint', 'paddle.incubate.operators', + 'paddle.incubate.tensor', 'paddle.distributed.fleet', 'paddle.distributed.fleet.base', 'paddle.distributed.fleet.elastic', -- GitLab