Add variance computing layer for tensor (#23770)

* Add variance computing layer for tensor * Format input data types * Remove unnecessary expand operation * Some fixes in doc

Add variance computing layer for tensor (#23770)
* Add variance computing layer for tensor * Format input data types * Remove unnecessary expand operation * Some fixes in doc
1b5122ba · Yibing Liu · GitHub · 222a5137 · 1b5122ba · 1b5122ba
3 changed file
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -63,7 +63,7 @@ from .tensor.creation import full_like  #DEFINE_ALIAS
 # from .tensor.stat import mean   #DEFINE_ALIAS
 # from .tensor.stat import reduce_mean   #DEFINE_ALIAS
 # from .tensor.stat import std   #DEFINE_ALIAS
-# from .tensor.stat import var   #DEFINE_ALIAS
+from .tensor.stat import var  #DEFINE_ALIAS
 from .tensor.logic import equal  #DEFINE_ALIAS
 # from .tensor.logic import greater_equal   #DEFINE_ALIAS
 # from .tensor.logic import greater_than   #DEFINE_ALIAS

--- a/python/paddle/fluid/tests/unittests/test_variance_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_variance_layer.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+
+
+class TestVarianceLayer(unittest.TestCase):
+    def setUp(self):
+        self._dtype = "float64"
+        self._input = np.random.random([2, 3, 4, 5]).astype(self._dtype)
+
+    def static(self, axis=None, keepdim=False, unbiased=True):
+        prog = fluid.Program()
+        with fluid.program_guard(prog):
+            data = fluid.data(
+                name="data", dtype=self._dtype, shape=[None, 3, 4, 5])
+            out = prog.current_block().create_var(
+                dtype=self._dtype, shape=[2, 3, 4, 5])
+            paddle.var(input=data,
+                       axis=axis,
+                       keepdim=keepdim,
+                       unbiased=unbiased,
+                       out=out)
+
+        exe = fluid.Executor(self._place)
+        return exe.run(feed={"data": self._input},
+                       program=prog,
+                       fetch_list=[out])[0]
+
+    def dynamic(self, axis=None, keepdim=False, unbiased=True):
+        with fluid.dygraph.guard(self._place):
+            data = fluid.dygraph.to_variable(self._input)
+            out = paddle.var(input=data,
+                             axis=axis,
+                             keepdim=keepdim,
+                             unbiased=unbiased)
+            return out.numpy()
+
+    def numpy(self, axis=None, keepdim=False, unbiased=True):
+        ddof = 1 if unbiased else 0
+        axis = tuple(axis) if isinstance(axis, list) else axis
+        return np.var(self._input, axis=axis, keepdims=keepdim, ddof=ddof)
+
+    def test_equal(self):
+        places = []
+        if fluid.core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for place in places:
+            self._place = place
+            self.assertTrue(np.allclose(self.numpy(), self.static()))
+            self.assertTrue(
+                np.allclose(
+                    self.numpy(axis=[0, 2]), self.dynamic(axis=[0, 2])))
+            self.assertTrue(
+                np.allclose(
+                    self.numpy(
+                        axis=[1, 3], keepdim=True),
+                    self.dynamic(
+                        axis=[1, 3], keepdim=True)))
+            self.assertTrue(
+                np.allclose(
+                    self.numpy(unbiased=False), self.dynamic(unbiased=False)))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/tensor/stat.py
+++ b/python/paddle/tensor/stat.py
@@ -13,4 +13,91 @@
 # limitations under the License.

 # TODO: define statistical functions of a tensor  
-# __all__ = ['mean', 'reduce_mean', 'std', 'var']
+__all__ = [  #'mean', 
+    #'reduce_mean', 
+    #'std', 
+    'var'
+]
+
+import numpy as np
+from ..fluid.layer_helper import LayerHelper
+from ..fluid.framework import in_dygraph_mode
+from ..fluid import layers
+from .search import where
+from ..fluid.data_feeder import convert_dtype
+
+
+def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None):
+    """
+    Computes the variance of the input Variable's elements along the specified 
+    axis.
+
+    Args:
+        input (Variable): The input Variable to be computed variance, with data 
+            type float32 and float64 supported.
+        axis (list|int, optional): The axis along which the variance is computed. 
+            If `None`, compute the variance over all elements of :attr:`input`
+            and return a Variable with a single element, otherwise it must be in 
+            the range :math:`[-rank(input), rank(input))`. If :math:`axis[i] < 0`, 
+            the axis to compute is :math:`rank(input) + axis[i]`.
+        keepdim (bool, optional): Whether to reserve the reduced dimensions in 
+            the output Variable. The dimensions in :attr:`axis` will be squeezed 
+            and the result Variable will have :attr:`len(axis)` fewer dimensions 
+            than the :attr:`input` unless :attr:`keepdim` is true, default False.
+        unbiased (bool, optional): Whether to compute variance via the unbiased 
+            estimator, in which the divisor used in the computation is 
+            :math:`N - 1`, where :math:`N` represents the number of elements 
+            along :attr:`axis`, otherwise the divisor is :math:`N`. Default True.
+        out (Variable, optional): Alternate output Variable to store the result
+            variance. Default None.
+        name (str, optional): The name for this layer. Normally there is no 
+            need for user to set this property.  For more information, please 
+            refer to :ref:`api_guide_Name`. Default None.
+
+    Returns:
+        Variable: The result variance with the same dtype as :attr:`input`. 
+            If :attr:`out = None`, returns a new Variable containing the 
+            variance, otherwise returns a reference to the output Variable.
+
+    Examples:
+        .. code-block:: python
+
+            import numpy as np
+            import paddle
+            import paddle.fluid.dygraph as dg
+
+            a = np.array([[1.0, 2.0], [3.0, 4.0]]).astype("float32")
+            with dg.guard():
+                data = dg.to_variable(a)
+                variance = paddle.var(data, axis=[1])
+                print(variance.numpy())   
+                # [0.5 0.5]
+    """
+    dtype = convert_dtype(input.dtype)
+    if dtype not in ["float32", "float64"]:
+        raise ValueError("Layer tensor.var() only supports floating-point "
+                         "dtypes, but received {}.".format(dtype))
+    rank = len(input.shape)
+    axes = axis if axis != None and axis != [] else range(rank)
+    axes = [e if e >= 0 else e + rank for e in axes]
+    inp_shape = input.shape if in_dygraph_mode() else layers.shape(input)
+    mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name)
+    tmp = layers.reduce_mean(
+        (input - mean)**2, dim=axis, keep_dim=keepdim, name=name)
+
+    if unbiased:
+        n = 1
+        for i in axes:
+            n *= inp_shape[i]
+        if not in_dygraph_mode():
+            n = layers.cast(n, dtype)
+            zero_const = layers.fill_constant(shape=[1], dtype=dtype, value=0.0)
+            factor = where(n > 1.0, n / (n - 1.0), zero_const)
+        else:
+            factor = n / (n - 1.0) if n > 1.0 else 0.0
+        tmp *= factor
+    if out:
+        layers.assign(input=tmp, output=out)
+        return out
+    else:
+        return tmp