未验证 提交 5b4d786b 编写于 作者: W Wang Xin 提交者: GitHub

【Hackathon No.2】为 Paddle 新增 cdist API (#53836)

上级 49bedfd3
......@@ -126,6 +126,7 @@ from .tensor.linalg import norm # noqa: F401
from .tensor.linalg import transpose # noqa: F401
from .tensor.linalg import dist # noqa: F401
from .tensor.linalg import t # noqa: F401
from .tensor.linalg import cdist # noqa: F401
from .tensor.linalg import cross # noqa: F401
from .tensor.linalg import cholesky # noqa: F401
from .tensor.linalg import bmm # noqa: F401
......@@ -536,6 +537,7 @@ __all__ = [ # noqa
'triu',
'sin',
'dist',
'cdist',
'unbind',
'meshgrid',
'arange',
......
......@@ -70,6 +70,7 @@ from .linalg import solve # noqa: F401
from .linalg import cholesky_solve # noqa: F401
from .linalg import lu # noqa: F401
from .linalg import lu_unpack # noqa: F401
from .linalg import cdist # noqa: F401
from .logic import equal # noqa: F401
from .logic import greater_equal # noqa: F401
from .logic import greater_than # noqa: F401
......@@ -518,6 +519,7 @@ tensor_method_func = [ # noqa
'acosh',
'lu',
'lu_unpack',
'cdist',
'as_complex',
'as_real',
'rad2deg',
......
......@@ -3326,3 +3326,119 @@ def corrcoef(x, rowvar=True, name=None):
c = paddle.clip(c, -1, 1)
return c
def cdist(
x, y, p=2.0, compute_mode="use_mm_for_euclid_dist_if_necessary", name=None
):
r"""
Compute the p-norm distance between each pair of the two collections of inputs.
This function is equivalent to `scipy.spatial.distance.cdist(input,'minkowski', p=p)`
if :math:`p \in (0, \infty)`. When :math:`p = 0` it is equivalent to `scipy.spatial.distance.cdist(input, 'hamming') * M`.
When :math:`p = \infty`, the closest scipy function is `scipy.spatial.distance.cdist(xn, lambda x, y: np.abs(x - y).max())`.
Args:
x (Tensor): A tensor with shape :math:`B \times P \times M`.
y (Tensor): A tensor with shape :math:`B \times R \times M`.
p (float, optional): The value for the p-norm distance to calculate between each vector pair. Default: :math:`2.0`.
compute_mode (str, optional): The mode for compute distance.
- ``use_mm_for_euclid_dist_if_necessary`` , for p = 2.0 and (P > 25 or R > 25), it will use matrix multiplication to calculate euclid distance if possible.
- ``use_mm_for_euclid_dist`` , for p = 2.0, it will use matrix multiplication to calculate euclid distance.
- ``donot_use_mm_for_euclid_dist`` , it will not use matrix multiplication to calculate euclid distance.
Default: ``use_mm_for_euclid_dist_if_necessary``.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns:
Tensor, the dtype is same as input tensor.
If x has shape :math:`B \times P \times M` and y has shape :math:`B \times R \times M` then
the output will have shape :math:`B \times P \times R`.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[0.9041, 0.0196], [-0.3108, -2.4423], [-0.4821, 1.059]], dtype=paddle.float32)
y = paddle.to_tensor([[-2.1763, -0.4713], [-0.6986, 1.3702]], dtype=paddle.float32)
distance = paddle.cdist(x, y)
print(distance)
# Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[3.1193, 2.0959], [2.7138, 3.8322], [2.2830, 0.3791]])
"""
check_variable_and_dtype(x, 'x', ('float32', 'float64'), 'cdist')
check_variable_and_dtype(y, 'y', ('float32', 'float64'), 'cdist')
check_type(p, 'p', (float, int), 'cdist')
if compute_mode not in [
'use_mm_for_euclid_dist_if_necessary',
'use_mm_for_euclid_dist',
'donot_use_mm_for_euclid_dist',
]:
raise ValueError(
"The compute_mode should be 'use_mm_for_euclid_dist_if_necessary', "
"'use_mm_for_euclid_dist' or 'donot_use_mm_for_euclid_dist', "
"but received compute_mode is %s." % compute_mode
)
mode = 0
if compute_mode == 'use_mm_for_euclid_dist_if_necessary':
mode = 0
elif compute_mode == 'use_mm_for_euclid_dist':
mode = 1
elif compute_mode == 'donot_use_mm_for_euclid_dist':
mode = 2
x_shape = list(x.shape)
assert len(x_shape) >= 2, (
"The x must be at least 2-dimensional, "
"But received Input x's dimensional is %s.\n" % len(x_shape)
)
y_shape = list(y.shape)
assert len(y_shape) >= 2, (
"The y must be at least 2-dimensional, "
"But received Input y's dimensional is %s.\n" % len(y_shape)
)
assert x_shape[-1] == y_shape[-1], (
"The x and y must have same last dimension, "
"But received Input x's last dimension is {}, "
"Input y's last dimension is {}.\n".format(x_shape[-1], y_shape[-1])
)
assert p >= 0, (
"The p must be greater than or equal to 0, "
"But received p is %s.\n" % p
)
r1 = x.shape[-2]
r2 = y.shape[-2]
c1 = x.shape[-1]
p = float(p)
if r1 == 0 or r2 == 0:
return paddle.empty((r1, r2), dtype=x.dtype)
if c1 == 0:
return paddle.zeros((r1, r2), dtype=x.dtype)
if p == 2.0 and (mode == 1 or (mode == 0 and (r1 > 25 or r2 > 25))):
x_norm = paddle.sum(x.pow(2), axis=-1, keepdim=True)
y_norm = paddle.sum(y.pow(2), axis=-1, keepdim=True)
y_transposed = paddle.transpose(
y, perm=[*range(y.ndim - 2), y.ndim - 1, y.ndim - 2]
)
y_norm_transposed = paddle.transpose(
y_norm,
perm=[*range(y_norm.ndim - 2), y_norm.ndim - 1, y_norm.ndim - 2],
)
res = paddle.matmul(x, y_transposed) * -2 + y_norm_transposed + x_norm
res = paddle.clip(res, min=0.0).sqrt()
return res
return paddle.linalg.norm(
x[..., None, :] - y[..., None, :, :], p=p, axis=-1
)
......@@ -940,9 +940,11 @@ set_tests_properties(test_paddle_save_load_binary PROPERTIES TIMEOUT 120)
if(WIN32)
set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 900)
set_tests_properties(test_paddle_save_load PROPERTIES TIMEOUT 250)
set_tests_properties(test_cdist PROPERTIES TIMEOUT 250)
else()
set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 600)
set_tests_properties(test_paddle_save_load PROPERTIES TIMEOUT 250)
set_tests_properties(test_cdist PROPERTIES TIMEOUT 120)
endif()
if(WITH_NV_JETSON)
set_tests_properties(test_concat_op PROPERTIES TIMEOUT 1200)
......
# # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# # http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
import unittest
import numpy as np
import paddle
def ref_cdist(x, y, p=2.0):
r1 = x.shape[-2]
r2 = y.shape[-2]
if r1 == 0 or r2 == 0:
return np.empty((r1, r2), x.dtype)
return np.linalg.norm(x[..., None, :] - y[..., None, :, :], ord=p, axis=-1)
class TestCdistAPI(unittest.TestCase):
def setUp(self):
np.random.seed(1024)
self.x = np.random.rand(10, 20).astype('float32')
self.y = np.random.rand(11, 20).astype('float32')
self.p = 2.0
self.compute_mode = "use_mm_for_euclid_dist_if_necessary"
self.init_input()
self.place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
def init_input(self):
pass
def test_static_api(self):
paddle.enable_static()
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data('x', self.x.shape, dtype=self.x.dtype)
y = paddle.static.data('y', self.y.shape, dtype=self.y.dtype)
out = paddle.cdist(x, y, self.p, self.compute_mode)
exe = paddle.static.Executor(self.place)
res = exe.run(feed={'x': self.x, 'y': self.y}, fetch_list=[out])
out_ref = ref_cdist(self.x, self.y, self.p)
np.testing.assert_allclose(out_ref, res[0], rtol=1e-5, atol=1e-5)
def test_dygraph_api(self):
paddle.disable_static(self.place)
x = paddle.to_tensor(self.x)
y = paddle.to_tensor(self.y)
out = paddle.cdist(x, y, self.p, self.compute_mode)
out_ref = ref_cdist(self.x, self.y, self.p)
np.testing.assert_allclose(out_ref, out.numpy(), rtol=1e-5, atol=1e-5)
paddle.enable_static()
class TestCdistAPICase1(TestCdistAPI):
def init_input(self):
self.p = 0
class TestCdistAPICase2(TestCdistAPI):
def init_input(self):
self.p = 1.0
class TestCdistAPICase3(TestCdistAPI):
def init_input(self):
self.p = 3.0
class TestCdistAPICase4(TestCdistAPI):
def init_input(self):
self.p = 1.5
class TestCdistAPICase5(TestCdistAPI):
def init_input(self):
self.p = 2.5
class TestCdistAPICase6(TestCdistAPI):
def init_input(self):
self.p = float('inf')
class TestCdistAPICase7(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(50, 20).astype('float64')
self.y = np.random.rand(40, 20).astype('float64')
self.compute_mode = "use_mm_for_euclid_dist"
class TestCdistAPICase8(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(50, 20).astype('float64')
self.y = np.random.rand(40, 20).astype('float64')
self.compute_mode = "donot_use_mm_for_euclid_dist"
class TestCdistAPICase9(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(500, 100).astype('float64')
self.y = np.random.rand(400, 100).astype('float64')
class TestCdistAPICase10(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(3, 500, 100).astype('float64')
self.y = np.random.rand(3, 400, 100).astype('float64')
class TestCdistAPICase11(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(3, 4, 500, 100).astype('float64')
self.y = np.random.rand(3, 4, 400, 100).astype('float64')
class TestCdistAPICase12(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(3, 4, 500, 100).astype('float64')
self.y = np.random.rand(3, 4, 400, 100).astype('float64')
self.p = 3.0
# test for different compute mode output same result
class TestCdistAPICase13(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(3, 4, 500, 100).astype('float64')
self.y = np.random.rand(3, 4, 400, 100).astype('float64')
def test_static_api(self):
paddle.enable_static()
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data('x', self.x.shape, dtype=self.x.dtype)
y = paddle.static.data('y', self.y.shape, dtype=self.y.dtype)
out0 = paddle.cdist(x, y, self.p, self.compute_mode)
out1 = paddle.cdist(x, y, self.p, "donot_use_mm_for_euclid_dist")
out2 = paddle.cdist(x, y, self.p, "use_mm_for_euclid_dist")
exe = paddle.static.Executor(self.place)
res = exe.run(
feed={'x': self.x, 'y': self.y}, fetch_list=[out0, out1, out2]
)
out_ref = ref_cdist(self.x, self.y, self.p)
np.testing.assert_allclose(out_ref, res[0])
np.testing.assert_allclose(out_ref, res[2])
np.testing.assert_allclose(out_ref, res[2])
def test_dygraph_api(self):
paddle.disable_static(self.place)
x = paddle.to_tensor(self.x)
y = paddle.to_tensor(self.y)
out0 = paddle.cdist(x, y, self.p, self.compute_mode)
out1 = paddle.cdist(x, y, self.p, "donot_use_mm_for_euclid_dist")
out2 = paddle.cdist(x, y, self.p, "use_mm_for_euclid_dist")
out_ref = ref_cdist(self.x, self.y, self.p)
np.testing.assert_allclose(out_ref, out0.numpy())
np.testing.assert_allclose(out_ref, out1.numpy())
np.testing.assert_allclose(out_ref, out2.numpy())
paddle.enable_static()
# test for broadcast
class TestCdistAPICase14(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(3, 4, 500, 100).astype('float64')
self.y = np.random.rand(1, 4, 400, 100).astype('float64')
# test for broadcast
class TestCdistAPICase15(TestCdistAPI):
def init_input(self):
self.x = np.random.rand(3, 4, 500, 100).astype('float64')
self.y = np.random.rand(4, 400, 100).astype('float64')
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册