From 85f5d2640970bd74b8f8e74816f0b13641fddb18 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Tue, 28 Dec 2021 21:57:46 +0800 Subject: [PATCH] add new API: paddle.cov (#38392) --- .../paddle/fluid/tests/unittests/test_cov.py | 286 ++++++++++++++++++ python/paddle/linalg.py | 2 + python/paddle/tensor/__init__.py | 2 + python/paddle/tensor/linalg.py | 113 +++++++ 4 files changed, 403 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_cov.py diff --git a/python/paddle/fluid/tests/unittests/test_cov.py b/python/paddle/fluid/tests/unittests/test_cov.py new file mode 100644 index 0000000000..93ecf13bdc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cov.py @@ -0,0 +1,286 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import unittest +import numpy as np +import six +import paddle + + +def numpy_cov(np_arr, rowvar=True, ddof=1, fweights=None, aweights=None): + return np.cov(np_arr, + rowvar=rowvar, + ddof=int(ddof), + fweights=fweights, + aweights=aweights) + + +class Cov_Test(unittest.TestCase): + def setUp(self): + self.shape = [20, 10] + self.weightshape = [10] + + def test_tensor_cov_default(self): + typelist = ['float64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + for dtype in typelist: + np_arr = np.random.rand(*self.shape).astype(dtype) + tensor = paddle.to_tensor(np_arr, place=p) + cov = paddle.linalg.cov(tensor, + rowvar=True, + ddof=True, + fweights=None, + aweights=None) + np_cov = numpy_cov( + np_arr, rowvar=True, ddof=1, fweights=None, aweights=None) + self.assertTrue(np.allclose(np_cov, cov.numpy())) + + def test_tensor_cov_rowvar(self): + typelist = ['float64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + for dtype in typelist: + np_arr = np.random.rand(*self.shape).astype(dtype) + tensor = paddle.to_tensor(np_arr, place=p) + cov = paddle.linalg.cov(tensor, + rowvar=False, + ddof=True, + fweights=None, + aweights=None) + np_cov = numpy_cov( + np_arr, rowvar=False, ddof=1, fweights=None, aweights=None) + self.assertTrue(np.allclose(np_cov, cov.numpy())) + + def test_tensor_cov_ddof(self): + typelist = ['float64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + for dtype in typelist: + np_arr = np.random.rand(*self.shape).astype(dtype) + tensor = paddle.to_tensor(np_arr, place=p) + cov = paddle.linalg.cov(tensor, + rowvar=True, + ddof=False, + fweights=None, + aweights=None) + np_cov = numpy_cov( + np_arr, rowvar=True, ddof=0, fweights=None, aweights=None) + self.assertTrue(np.allclose(np_cov, cov.numpy())) + + def test_tensor_cov_fweights(self): + typelist = ['float64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + for dtype in typelist: + np_arr = np.random.rand(*self.shape).astype(dtype) + np_fw = np.random.randint( + 10, size=self.weightshape).astype('int32') + tensor = paddle.to_tensor(np_arr, place=p) + fweights = paddle.to_tensor(np_fw, place=p) + cov = paddle.linalg.cov(tensor, + rowvar=True, + ddof=True, + fweights=fweights, + aweights=None) + np_cov = numpy_cov( + np_arr, rowvar=True, ddof=1, fweights=np_fw, aweights=None) + self.assertTrue(np.allclose(np_cov, cov.numpy())) + + def test_tensor_cov_aweights(self): + typelist = ['float64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + for dtype in typelist: + np_arr = np.random.rand(*self.shape).astype(dtype) + np_aw = np.random.randint( + 10, size=self.weightshape).astype('int32') + tensor = paddle.to_tensor(np_arr, place=p) + aweights = paddle.to_tensor(np_aw, place=p) + cov = paddle.linalg.cov(tensor, + rowvar=True, + ddof=True, + fweights=None, + aweights=aweights) + np_cov = numpy_cov( + np_arr, rowvar=True, ddof=1, fweights=None, aweights=np_aw) + self.assertTrue(np.allclose(np_cov, cov.numpy())) + + def test_tensor_cov_weights(self): + typelist = ['float64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + for dtype in typelist: + np_arr = np.random.rand(*self.shape).astype(dtype) + np_fw = np.random.randint( + 10, size=self.weightshape).astype('int64') + np_aw = np.random.rand(*self.weightshape).astype('float64') + tensor = paddle.to_tensor(np_arr, place=p) + fweights = paddle.to_tensor(np_fw, place=p) + aweights = paddle.to_tensor(np_aw, place=p) + cov = paddle.linalg.cov(tensor, + rowvar=True, + ddof=True, + fweights=fweights, + aweights=aweights) + np_cov = numpy_cov( + np_arr, rowvar=True, ddof=1, fweights=np_fw, aweights=np_aw) + self.assertTrue(np.allclose(np_cov, cov.numpy())) + + +class Cov_Test2(Cov_Test): + def setUp(self): + self.shape = [10] + self.weightshape = [10] + + +# Input(x) only support N-D (1<=N<=2) tensor +class Cov_Test3(unittest.TestCase): + def setUp(self): + self.shape = [2, 5, 10] + self.fweightshape = [10] + self.aweightshape = [10] + self.fw_s = 1. + self.aw_s = 1. + + def test_errors(self): + def test_err(): + np_arr = np.random.rand(*self.shape).astype('float64') + np_fw = self.fw_s * np.random.rand( + *self.fweightshape).astype('int32') + np_aw = self.aw_s * np.random.rand( + *self.aweightshape).astype('float64') + tensor = paddle.to_tensor(np_arr) + fweights = paddle.to_tensor(np_fw) + aweights = paddle.to_tensor(np_aw) + cov = paddle.linalg.cov(tensor, + rowvar=True, + ddof=True, + fweights=fweights, + aweights=aweights) + + self.assertRaises(ValueError, test_err) + + +#Input(fweights) only support N-D (N<=1) tensor +class Cov_Test4(Cov_Test3): + def setUp(self): + self.shape = [5, 10] + self.fweightshape = [2, 10] + self.aweightshape = [10] + self.fw_s = 1. + self.aw_s = 1. + + +#The number of Input(fweights) should equal to x's dim[1] +class Cov_Test5(Cov_Test3): + def setUp(self): + self.shape = [5, 10] + self.fweightshape = [5] + self.aweightshape = [10] + self.fw_s = 1. + self.aw_s = 1. + + +#The value of Input(fweights) cannot be negtive +class Cov_Test6(Cov_Test3): + def setUp(self): + self.shape = [5, 10] + self.fweightshape = [10] + self.aweightshape = [10] + self.fw_s = -1. + self.aw_s = 1. + + +#Input(aweights) only support N-D (N<=1) tensor +class Cov_Test7(Cov_Test3): + def setUp(self): + self.shape = [5, 10] + self.fweightshape = [10] + self.aweightshape = [2, 10] + self.fw_s = 1. + self.aw_s = 1. + + +#The number of Input(aweights) should equal to x's dim[1] +class Cov_Test8(Cov_Test3): + def setUp(self): + self.shape = [5, 10] + self.fweightshape = [10] + self.aweightshape = [5] + self.fw_s = 1. + self.aw_s = 1. + + +#The value of Input(aweights) cannot be negtive +class Cov_Test9(Cov_Test3): + def setUp(self): + self.shape = [5, 10] + self.fweightshape = [10] + self.aweightshape = [10] + self.fw_s = 1. + self.aw_s = -1. + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/linalg.py b/python/paddle/linalg.py index 6b83448d0b..78d82973e0 100644 --- a/python/paddle/linalg.py +++ b/python/paddle/linalg.py @@ -15,6 +15,7 @@ from .tensor.linalg import cholesky # noqa: F401 from .tensor.linalg import norm # noqa: F401 from .tensor.linalg import eig # noqa: F401 +from .tensor.linalg import cov # noqa: F401 from .tensor.linalg import cond # noqa: F401 from .tensor.linalg import matrix_power # noqa: F401 from .tensor.linalg import solve # noqa: F401 @@ -36,6 +37,7 @@ __all__ = [ 'cholesky', #noqa 'norm', 'cond', + 'cov', 'inv', 'eig', 'eigvals', diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 4780d71a8d..f920f92ae7 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -39,6 +39,7 @@ from .creation import empty_like # noqa: F401 from .creation import complex # noqa: F401 from .linalg import matmul # noqa: F401 from .linalg import dot # noqa: F401 +from .linalg import cov # noqa: F401 from .linalg import norm # noqa: F401 from .linalg import cond # noqa: F401 from .linalg import transpose # noqa: F401 @@ -263,6 +264,7 @@ from .einsum import einsum # noqa: F401 tensor_method_func = [ #noqa 'matmul', 'dot', + 'cov', 'norm', 'cond', 'transpose', diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index a8c565f336..37c6043ce5 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -920,6 +920,119 @@ def dot(x, y, name=None): return out +def cov(x, rowvar=True, ddof=True, fweights=None, aweights=None, name=None): + """ + Estimate the covariance matrix of the input variables, given data and weights. + + A covariance matrix is a square matrix, indicate the covariance of each pair variables in the input matrix. + For example, for an N-dimensional samples X=[x1,x2,…xN]T, then the covariance matrix + element Cij is the covariance of xi and xj. The element Cii is the variance of xi itself. + + Parameters: + x(Tensor): A N-D(N<=2) Tensor containing multiple variables and observations. By default, each row of x represents a variable. Also see rowvar below. + rowvar(Bool, optional): If rowvar is True (default), then each row represents a variable, with observations in the columns. Default: True + ddof(Bool, optional): If ddof=True will return the unbiased estimate, and ddof=False will return the simple average. Default: True + fweights(Tensor, optional): 1-D Tensor of integer frequency weights; The number of times each observation vector should be repeated. Default: None + aweights(Tensor, optional): 1-D Tensor of observation vector weights. How important of the observation vector, larger data means this element is more important. Default: None + name(str, optional): Name of the output. Default is None. It's used to print debug info for developers. Details: :ref:`api_guide_Name` + + Returns: + Tensor: The covariance matrix Tensor of the variables. + + Examples: + + .. code-block:: python + + import paddle + + xt = paddle.rand((3,4)) + paddle.linalg.cov(xt) + + ''' + Tensor(shape=[3, 3], dtype=float64, place=CUDAPlace(0), stop_gradient=True, + [[0.07918842, 0.06127326, 0.01493049], + [0.06127326, 0.06166256, 0.00302668], + [0.01493049, 0.00302668, 0.01632146]]) + ''' + """ + op_type = 'cov' + if len(x.shape) > 2 or len(x.shape) < 1: + raise ValueError( + "Input(x) only support N-D (1<=N<=2) tensor in cov, but received " + "length of Input(input) is %s." % len(x.shape)) + check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'cov') + nx = x + if len(x.shape) == 1: + nx = x.reshape((1, -1)) + if not rowvar and nx.shape[0] != 1: + nx = nx.t() + w = None + observation_num = nx.shape[1] + if fweights is not None: + w = fweights.astype(nx.dtype) + if len(w.shape) > 1: + raise ValueError( + "Input(fweights) only support N-D (N<=1) tensor in cov, but received " + "shape of Input(input) is %s." % len(fweights.shape)) + if fweights.shape[0] != observation_num: + raise ValueError( + "The number of Input(fweights) should equal to x's dim[1]: {}, but received " + "size of Input(fweights) is {}.".format(observation_num, + fweights.shape[0])) + if fweights.min() < 0: + raise ValueError( + "The value of Input(fweights) cannot be negtive, but received " + "min of Input(fweights) is {}.".format(fweights.min())) + if not paddle.all(fweights == paddle.round(fweights.astype('float64'))): + raise ValueError("Input(fweights) must be integer ") + + if aweights is not None: + aw = aweights.astype(nx.dtype) + if len(aw.shape) > 1: + raise ValueError( + "Input(aweights) only support N-D (N<=1) tensor in cov, but received " + "length of Input(input) is %s." % len(aweights.shape)) + check_variable_and_dtype(aweights, 'dtype', ['float32', 'float64'], + 'cov') + if aweights.shape[0] != observation_num: + raise ValueError( + "The number of Input(aweights) should equal to x's dim[1]: {}, but received " + "size of Input(aweights) is {}.".format(observation_num, + aweights.shape[0])) + if aweights.min() < 0: + raise ValueError( + "The value of Input(aweights) cannot be negtive, but received " + "min of Input(aweights) is {}.".format(aweights.min())) + if w is not None: + w = w * aw + else: + w = aw + + w_sum = paddle.to_tensor(observation_num, dtype=nx.dtype) + if fweights is not None or aweights is not None: + w_sum = w.sum() + if w_sum.item() == 0: + raise ValueError("The sum of weights is zero, can't be normalized.") + + if w is not None: + nx_w = nx * w + avg = (nx_w).sum(axis=1) / w_sum + else: + avg = nx.sum(axis=1) / w_sum + nx_w = nx + + if w is not None and aweights is not None and ddof == True: + norm_factor = w_sum - (w * aweights).sum() / w_sum + else: + norm_factor = w_sum - ddof + if norm_factor <= 0: + norm_factor = paddle.to_tensor(0, dtype=nx.dtype) + nx = nx - avg.unsqueeze(1) + xxt = paddle.mm(nx, nx_w.t().conj()) + cov = paddle.divide(xxt, norm_factor).squeeze() + return cov + + def t(input, name=None): """ Transpose <=2-D tensor. -- GitLab