From 26ede6e07e051061981498851f50b9c59b8133f3 Mon Sep 17 00:00:00 2001
From: zhulei <563755780@qq.com>
Date: Fri, 30 Oct 2020 17:39:37 +0800
Subject: [PATCH] Add median api. (#28310)

* Add median api.

* Add median api.

* Add median api.

* Add median api.

* Add median api.
---
 python/paddle/__init__.py                     |  1 +
 .../fluid/tests/unittests/test_median.py      | 88 +++++++++++++++++++
 python/paddle/tensor/__init__.py              |  1 +
 python/paddle/tensor/stat.py                  | 88 ++++++++++++++++++-
 4 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_median.py

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index ae4dda166c..c8e0d830f4 100755
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -248,6 +248,7 @@ from .tensor.stat import std  #DEFINE_ALIAS
 from .tensor.stat import var  #DEFINE_ALIAS
 # from .fluid.data import data
 from .tensor.stat import numel  #DEFINE_ALIAS
+from .tensor.stat import median  #DEFINE_ALIAS
 from .device import get_cudnn_version
 from .device import set_device
 from .device import get_device
diff --git a/python/paddle/fluid/tests/unittests/test_median.py b/python/paddle/fluid/tests/unittests/test_median.py
new file mode 100644
index 0000000000..be2206d026
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_median.py
@@ -0,0 +1,88 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import paddle
+from paddle.static import Program, program_guard
+
+DELTA = 1e-6
+
+
+class TestMedian(unittest.TestCase):
+    def check_numpy_res(self, np1, np2):
+        self.assertEqual(np1.shape, np2.shape)
+        mismatch = np.sum((np1 - np2) * (np1 - np2))
+        self.assertAlmostEqual(mismatch, 0, DELTA)
+
+    def static_single_test_median(self, lis_test):
+        paddle.enable_static()
+        x, axis, keepdims = lis_test
+        res_np = np.median(x, axis=axis, keepdims=keepdims)
+        if not isinstance(res_np, np.ndarray):
+            res_np = np.array([res_np])
+        main_program = Program()
+        startup_program = Program()
+        exe = paddle.static.Executor()
+        with program_guard(main_program, startup_program):
+            x_in = paddle.fluid.data(shape=x.shape, dtype=x.dtype, name='x')
+            y = paddle.median(x_in, axis, keepdims)
+            [res_pd] = exe.run(feed={'x': x}, fetch_list=[y])
+            self.check_numpy_res(res_pd, res_np)
+        paddle.disable_static()
+
+    def dygraph_single_test_median(self, lis_test):
+        x, axis, keepdims = lis_test
+        res_np = np.median(x, axis=axis, keepdims=keepdims)
+        if not isinstance(res_np, np.ndarray):
+            res_np = np.array([res_np])
+        res_pd = paddle.median(paddle.to_tensor(x), axis, keepdims)
+        self.check_numpy_res(res_pd.numpy(), res_np)
+
+    def test_median_static(self):
+        h = 3
+        w = 4
+        l = 2
+        x = np.arange(h * w * l).reshape([h, w, l])
+        lis_tests = [[x, axis, keepdims]
+                     for axis in [-1, 0, 1, 2, None]
+                     for keepdims in [False, True]]
+        for lis_test in lis_tests:
+            self.static_single_test_median(lis_test)
+
+    def test_median_dygraph(self):
+        paddle.disable_static()
+        h = 3
+        w = 4
+        l = 2
+        x = np.arange(h * w * l).reshape([h, w, l])
+        lis_tests = [[x, axis, keepdims]
+                     for axis in [-1, 0, 1, 2, None]
+                     for keepdims in [False, True]]
+        for lis_test in lis_tests:
+            self.dygraph_single_test_median(lis_test)
+
+    def test_median_exception(self):
+        paddle.disable_static()
+        x = [1, 2, 3, 4]
+        self.assertRaises(TypeError, paddle.median, x)
+        x = paddle.arange(12).reshape([3, 4])
+        self.assertRaises(ValueError, paddle.median, x, 1.0)
+        self.assertRaises(ValueError, paddle.median, x, 2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py
index eaade22238..43e6c9654c 100755
--- a/python/paddle/tensor/__init__.py
+++ b/python/paddle/tensor/__init__.py
@@ -190,6 +190,7 @@ from .stat import mean  #DEFINE_ALIAS
 from .stat import std  #DEFINE_ALIAS
 from .stat import var  #DEFINE_ALIAS
 from .stat import numel  #DEFINE_ALIAS
+from .stat import median  #DEFINE_ALIAS
 # from .tensor import Tensor        #DEFINE_ALIAS
 # from .tensor import LoDTensor        #DEFINE_ALIAS
 # from .tensor import LoDTensorArray        #DEFINE_ALIAS
diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py
index 24f62bfcd8..5647896066 100644
--- a/python/paddle/tensor/stat.py
+++ b/python/paddle/tensor/stat.py
@@ -14,7 +14,7 @@
 
 # TODO: define statistical functions of a tensor  
 
-__all__ = ['mean', 'std', 'var', 'numel']
+__all__ = ['mean', 'std', 'var', 'numel', 'median']
 
 import numpy as np
 from ..fluid.framework import Variable
@@ -258,3 +258,89 @@ def numel(x, name=None):
         dtype=core.VarDesc.VarType.INT64)
     helper.append_op(type='size', inputs={'Input': x}, outputs={'Out': out})
     return out
+
+
+def median(x, axis=None, keepdim=False, name=None):
+    """
+    Compute the median along the specified axis.
+
+    Args:
+        x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.
+        axis (int, optional): The axis along which to perform median calculations ``axis`` should be int.
+            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
+            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
+            If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
+        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
+            in the output Tensor. If ``keepdim`` is True, the dimensions of
+            the output Tensor is the same as ``x`` except in the reduced
+            dimensions(it is of size 1 in this case). Otherwise, the shape of
+            the output Tensor is squeezed in ``axis`` . Default is False.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        Tensor, results of median along ``axis`` of ``x``. If data type of ``x`` is float64, data type of results will be float64, otherwise data type will be float32.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            x = paddle.arange(12).reshape([3, 4])
+            # x is [[0 , 1 , 2 , 3 ],
+            #       [4 , 5 , 6 , 7 ],
+            #       [8 , 9 , 10, 11]]
+
+            y1 = paddle.median(x)
+            # y1 is [5.5]
+
+            y2 = paddle.median(x, axis=0)
+            # y2 is [4., 5., 6., 7.]
+
+            y3 = paddle.median(x, axis=1)
+            # y3 is [1.5, 5.5, 9.5]
+
+            y4 = paddle.median(x, axis=0, keepdim=True)
+            # y4 is [[4., 5., 6., 7.]]
+
+    """
+    if not isinstance(x, Variable):
+        raise TypeError("In median, the input x should be a Tensor.")
+    is_flatten = axis is None
+    dims = len(x.shape)
+    if is_flatten:
+        x = paddle.flatten(x)
+        axis = 0
+    else:
+        if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
+            raise ValueError(
+                "In median, axis should be none or an integer in range [-rank(x), rank(x))."
+            )
+        if axis < 0:
+            axis += dims
+    sz = x.shape[axis]
+    kth = sz >> 1
+    tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False)
+    dtype = 'float64' if x.dtype == core.VarDesc.VarType.FP64 else 'float32'
+    if sz & 1 == 0:
+        out_tensor = paddle.slice(
+            tensor_topk, axes=[axis], starts=[kth - 1],
+            ends=[kth]) + paddle.slice(
+                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1])
+        out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2
+    else:
+        out_tensor = paddle.cast(
+            paddle.slice(
+                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]),
+            dtype=dtype)
+    if not keepdim or is_flatten:
+        if not is_flatten:
+            newshape = x.shape[:axis] + x.shape[axis + 1:]
+        elif not keepdim:
+            newshape = [1]
+        else:
+            newshape = [1] * dims
+    else:
+        newshape = out_tensor.shape
+    out_tensor = out_tensor.reshape(newshape, name=name)
+    return out_tensor
-- 
GitLab