diff --git a/paddle/fluid/operators/dist_op.cc b/paddle/fluid/operators/dist_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0e9bddf01e82f82d15d2d4bbe481009898f7c414 --- /dev/null +++ b/paddle/fluid/operators/dist_op.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/dist_op.h" +#include +#include +#include "paddle/fluid/framework/op_registry.h" +namespace paddle { +namespace operators { + +class DistOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Dist"); + OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "Dist"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Dist"); + ctx->SetOutputDim("Out", {1}); + } +}; + +class DistOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The input Tensor of Dist Op."); + AddInput("Y", "The Right-hand-side input Tensor of Dist Op."); + AddOutput("Out", + "The output of Dist Op, " + "which is the p-norm of (X - Y)"); + AddAttr("p", "the norm to be computed.").SetDefault(2.0f); + AddComment(R"DOC( +Dist Operator. +Given two tensors X and Y, compute Lp-norm of (X-Y). It is not a norm in a strict sense, +only as a measure of distance. The shapes of X and Y must be broadcastable. Where, Z = X - Y, + +When p = 0, defining $0^0 = 0$, the zero-norm of Z is simply the number of non-zero elements of z. +$$ +||Z||_{0} = \lim_{p \rightarrow 0} \sum_{i=1}^{m} |z_i|^p +$$ + +When p = inf, the inf-norm of Z is the maximum element of Z. +$$ +||Z||_\infty=\max_i |z_i| +$$ + +When p = -inf, the negative-inf-norm of Z is the minimum element of Z. +$$ +||Z||_{-\infty}=\min_i |z_i| +$$ + +Otherwise, the p-norm of Z follows the formula, +$$ +||Z||_{p} = (\sum_{i=i}^{m} |z_i|^p)^{1/p} +$$ + )DOC"); + } +}; + +class DistOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + } + if (ctx->HasOutput(framework::GradVarName("Y"))) { + ctx->SetOutputDim(framework::GradVarName("Y"), y_dims); + } + } +}; + +template +class DistGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType(this->ForwardOpType() + "_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Y", this->Input("Y")); + op->SetInput("Out", this->Output("Out")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y")); + op->SetAttrMap(this->Attrs()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(dist, ops::DistOp, ops::DistOpMaker, + ops::DistGradOpMaker, + ops::DistGradOpMaker); +REGISTER_OPERATOR(dist_grad, ops::DistOpGrad); +REGISTER_OP_CPU_KERNEL( + dist, ops::DistKernel, + ops::DistKernel); +REGISTER_OP_CPU_KERNEL( + dist_grad, ops::DistGradKernel, + ops::DistGradKernel) diff --git a/paddle/fluid/operators/dist_op.cu b/paddle/fluid/operators/dist_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..499f5572910dd7666973bf077bf919a0378cfe52 --- /dev/null +++ b/paddle/fluid/operators/dist_op.cu @@ -0,0 +1,23 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/dist_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + dist, ops::DistKernel, + ops::DistKernel); +REGISTER_OP_CUDA_KERNEL( + dist_grad, ops::DistGradKernel, + ops::DistGradKernel); diff --git a/paddle/fluid/operators/dist_op.h b/paddle/fluid/operators/dist_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b35b4ce1d2787c801c8f0af46d1f7125d6ddb5ad --- /dev/null +++ b/paddle/fluid/operators/dist_op.h @@ -0,0 +1,286 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +using EigenTensor = framework::EigenTensor; +using framework::Tensor; + +template +static void GetBraodcastDims(const framework::DDim& x_dims, + const framework::DDim& y_dims, + Eigen::DSizes* x_bcast_dims, + Eigen::DSizes* y_bcast_dims) { + int bcast_dims_remainder = 0; + for (int i = 0; i < x_dims.size(); ++i) { + if (x_dims[i] >= y_dims[i]) { + (*x_bcast_dims)[i] = 1; + (*y_bcast_dims)[i] = x_dims[i] / y_dims[i]; + bcast_dims_remainder += x_dims[i] % y_dims[i]; + } else { + (*y_bcast_dims)[i] = 1; + (*x_bcast_dims)[i] = y_dims[i] / x_dims[i]; + bcast_dims_remainder += y_dims[i] % x_dims[i]; + } + } + PADDLE_ENFORCE_EQ(bcast_dims_remainder, 0, + platform::errors::PreconditionNotMet( + "The input tensor of Op(dist) could not be broadcast, " + "X's shape is [%s], Y's shape is [%s].", + x_dims, y_dims)); +} + +static framework::DDim GetNewDims(const framework::DDim& in_dims, int rank) { + std::vector new_dims_vec(rank); + if (in_dims.size() < rank) { + for (int i = 0; i < rank - in_dims.size(); ++i) { + new_dims_vec[i] = 1; + } + for (int i = 0; i < in_dims.size(); ++i) { + new_dims_vec[i + rank - in_dims.size()] = in_dims[i]; + } + } else { + new_dims_vec = vectorize(in_dims); + } + return framework::make_ddim(new_dims_vec); +} + +template +static void DistFunction(const framework::ExecutionContext& context) { + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* out = context.Output("Out"); + auto p = context.Attr("p"); + out->mutable_data(context.GetPlace()); + + auto x_dims = context.Input("X")->dims(); + auto y_dims = context.Input("Y")->dims(); + + // new dims with same size as rank, e.g. (rank=3, (4, 3) => (1, 4, 3)) + framework::DDim x_new_dims = GetNewDims(x_dims, Rank); + framework::DDim y_new_dims = GetNewDims(y_dims, Rank); + + auto x_t = EigenTensor::From(*x, x_new_dims); + auto y_t = EigenTensor::From(*y, y_new_dims); + auto out_t = EigenTensor::From(*out); + auto& place = + *context.template device_context().eigen_device(); + + Eigen::DSizes x_bcast_dims; + Eigen::DSizes y_bcast_dims; + GetBraodcastDims(x_new_dims, y_new_dims, &x_bcast_dims, &y_bcast_dims); + // p=0 means number of non-zero elements of (x-y) + // p=inf means the maximum of |x-y| + // p=-inf means the minimum of |x-y| + // otherwise, Lp-norm = pow(sum(pow(|x-y|, p)), 1/p) + if (p == 0) { + out_t.device(place) = + (x_t.broadcast(x_bcast_dims) != y_t.broadcast(y_bcast_dims)) + .template cast() + .sum(); + } else if (p == INFINITY) { + out_t.device(place) = + (x_t.broadcast(x_bcast_dims) - y_t.broadcast(y_bcast_dims)) + .abs() + .maximum(); + } else if (p == -INFINITY) { + out_t.device(place) = + (x_t.broadcast(x_bcast_dims) - y_t.broadcast(y_bcast_dims)) + .abs() + .minimum(); + } else { + out_t.device(place) = + (x_t.broadcast(x_bcast_dims) - y_t.broadcast(y_bcast_dims)) + .abs() + .pow(p) + .sum() + .pow(1.0 / p); + } +} + +template +static void DistGradFunction(const framework::ExecutionContext& context) { + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* out = context.Input("Out"); + auto p = context.Attr("p"); + + auto x_grad = context.Output(framework::GradVarName("X")); + auto y_grad = context.Output(framework::GradVarName("Y")); + auto out_grad = context.Input(framework::GradVarName("Out")); + + auto x_dims = context.Input("X")->dims(); + auto y_dims = context.Input("Y")->dims(); + auto out_dims = context.Input("Out")->dims(); + + framework::DDim x_new_dims = GetNewDims(x_dims, Rank); + framework::DDim y_new_dims = GetNewDims(y_dims, Rank); + framework::DDim out_new_dims = GetNewDims(out_dims, Rank); + auto x_t = EigenTensor::From(*x, x_new_dims); + auto y_t = EigenTensor::From(*y, y_new_dims); + auto out_t = EigenTensor::From(*out, out_new_dims); + + Eigen::DSizes x_bcast_dims; + Eigen::DSizes y_bcast_dims; + Eigen::DSizes out_bcast_dims; + + GetBraodcastDims(x_new_dims, y_new_dims, &x_bcast_dims, &y_bcast_dims); + std::vector new_dims_vec(Rank); + for (int i = 0; i < Rank; ++i) { + new_dims_vec[i] = std::max(x_new_dims[i], y_new_dims[i]); + out_bcast_dims[i] = new_dims_vec[i]; + } + framework::DDim new_dims = framework::make_ddim(new_dims_vec); + + auto& place = + *context.template device_context().eigen_device(); + auto out_grad_t = EigenTensor::From(*out_grad, out_new_dims); + framework::Tensor grad; + grad.mutable_data(new_dims, context.GetPlace()); + auto grad_t = EigenTensor::From(grad); + + auto x_minux_y = x_t.broadcast(x_bcast_dims) - y_t.broadcast(y_bcast_dims); + auto x_minux_y_abs = x_minux_y.abs(); + auto sign = + (x_minux_y > static_cast(0)).template cast() * static_cast(1.0) + + (x_minux_y < static_cast(0)).template cast() * static_cast(-1.0); + + // 1: Lp-norm(z), z = x-y, compute dz + if (p == 0) { + grad_t.device(place) = grad_t * static_cast(0); + } else if (p == INFINITY || p == -INFINITY) { + // p=inf or -inf, Lp-norm = |z_i|, the j-th element of dz tends to 0 if + // j!=i, or equals to sign(z_i) * dout if j=i. + grad_t.device(place) = + (x_minux_y_abs == out_t.broadcast(out_bcast_dims)).template cast() * + sign * out_grad_t.broadcast(out_bcast_dims); + } else { + // dz = pow(abs(x-y)/out, p-1) * sign(x-y) * dout + grad_t.device(place) = + (x_minux_y_abs / out_t.broadcast(out_bcast_dims)).pow(p - 1) * sign * + out_grad_t.broadcast(out_bcast_dims); + } + + Eigen::DSizes x_reshape_dims; + Eigen::DSizes y_reshape_dims; + Eigen::DSizes reduce_dims; + for (int i = 0; i < x_new_dims.size(); ++i) { + x_reshape_dims[2 * i] = x_bcast_dims[i]; + x_reshape_dims[2 * i + 1] = x_new_dims[i]; + y_reshape_dims[2 * i] = y_bcast_dims[i]; + y_reshape_dims[2 * i + 1] = y_new_dims[i]; + reduce_dims[i] = 2 * i; + } + + // 2: if x or y is broadcasted in forward function, + // the grad need to be sum along the broadcasted dimensions + if (x_grad) { + x_grad->mutable_data(context.GetPlace()); + auto x_grad_t = EigenTensor::From(*x_grad, x_new_dims); + x_grad_t.device(place) = grad_t.reshape(x_reshape_dims) + .sum(reduce_dims) + .reshape(x_grad_t.dimensions()); + } + if (y_grad) { + y_grad->mutable_data(context.GetPlace()); + auto y_grad_t = EigenTensor::From(*y_grad, y_new_dims); + y_grad_t.device(place) = -grad_t.reshape(y_reshape_dims) + .sum(reduce_dims) + .reshape(y_grad_t.dimensions()); + } +} + +template +class DistKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto x_rank = context.Input("X")->dims().size(); + auto y_rank = context.Input("Y")->dims().size(); + auto rank = std::max(x_rank, y_rank); + PADDLE_ENFORCE_LE(rank, 6, + platform::errors::Unimplemented( + "Op(dist) only support tensors with no more than 6 " + "dimensions, but X's rank is %d, Y's rank is %d.", + x_rank, y_rank)); + switch (rank) { + case 1: + DistFunction(context); + break; + case 2: + DistFunction(context); + break; + case 3: + DistFunction(context); + break; + case 4: + DistFunction(context); + break; + case 5: + DistFunction(context); + break; + case 6: + DistFunction(context); + break; + } + } +}; + +template +class DistGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto x_rank = context.Input("X")->dims().size(); + auto y_rank = context.Input("Y")->dims().size(); + auto rank = std::max(x_rank, y_rank); + PADDLE_ENFORCE_LE(rank, 6, + platform::errors::Unimplemented( + "Op(dist) only support tensors with no more than 6 " + "dimensions, but X's rank is %d, Y's rank is %d.", + x_rank, y_rank)); + switch (rank) { + case 1: + DistGradFunction(context); + break; + case 2: + DistGradFunction(context); + break; + case 3: + DistGradFunction(context); + break; + case 4: + DistGradFunction(context); + break; + case 5: + DistGradFunction(context); + break; + case 6: + DistGradFunction(context); + break; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 72609882d713414d910b42a15c3706383dc7dd4d..32c4c68168da1a893706331e082f68d22f3a7674 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -152,7 +152,7 @@ from .tensor.linalg import matmul #DEFINE_ALIAS # from .tensor.linalg import einsum #DEFINE_ALIAS # from .tensor.linalg import morm #DEFINE_ALIAS # from .tensor.linalg import transpose #DEFINE_ALIAS -# from .tensor.linalg import dist #DEFINE_ALIAS +from .tensor.linalg import dist #DEFINE_ALIAS # from .tensor.linalg import t #DEFINE_ALIAS # from .tensor.linalg import cross #DEFINE_ALIAS # from .tensor.linalg import cholesky #DEFINE_ALIAS diff --git a/python/paddle/fluid/tests/unittests/test_dist_op.py b/python/paddle/fluid/tests/unittests/test_dist_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1f46e0e7f9ca97409a7c6ea634ed96421e593f5f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_op.py @@ -0,0 +1,165 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + + +def dist(x, y, p): + if p == 0.: + out = np.count_nonzero(x - y) + elif p == float("inf"): + out = np.max(np.abs(x - y)) + elif p == float("-inf"): + out = np.min(np.abs(x - y)) + else: + out = np.power(np.sum(np.power(np.abs(x - y), p)), 1.0 / p) + return np.array(out).astype(x.dtype) + + +class TestDistOp(OpTest): + def setUp(self): + self.op_type = 'dist' + self.attrs = {} + self.init_case() + self.inputs = { + "X": np.random.random(self.x_shape).astype("float64"), + "Y": np.random.random(self.y_shape).astype("float64") + } + + self.attrs["p"] = self.p + self.outputs = { + "Out": dist(self.inputs["X"], self.inputs["Y"], self.attrs["p"]) + } + self.gradient = self.calc_gradient() + + def init_case(self): + self.x_shape = (120) + self.y_shape = (120) + self.p = 0. + + def calc_gradient(self): + x = self.inputs["X"] + y = self.inputs["Y"] + p = self.attrs["p"] + if p == 0: + grad = np.zeros(x.shape) + elif p in [float("inf"), float("-inf")]: + norm = dist(x, y, p) + x_minux_y_abs = np.abs(x - y) + grad = np.sign(x - y) + grad[x_minux_y_abs != norm] = 0 + else: + norm = dist(x, y, p) + grad = np.power(norm, 1 - p) * np.power(np.abs(x - y), + p - 1) * np.sign(x - y) + + def get_reduce_dims(x, y): + x_reduce_dims = [] + y_reduce_dims = [] + + if x.ndim >= y.ndim: + y_reshape = tuple([1] * (x.ndim - y.ndim) + list(y.shape)) + y = y.reshape(y_reshape) + else: + x_reshape = tuple([1] * (y.ndim - x.ndim) + list(x.shape)) + x = x.reshape(x_reshape) + for i in range(x.ndim): + if x.shape[i] > y.shape[i]: + y_reduce_dims.append(i) + elif x.shape[i] < y.shape[i]: + x_reduce_dims.append(i) + return x_reduce_dims, y_reduce_dims + + x_reduce_dims, y_reduce_dims = get_reduce_dims(x, y) + if len(x_reduce_dims) != 0: + x_grad = np.sum(grad, tuple(x_reduce_dims)).reshape(x.shape) + else: + x_grad = grad + if len(y_reduce_dims) != 0: + y_grad = -np.sum(grad, tuple(y_reduce_dims)).reshape(y.shape) + else: + y_grad = -grad + + return x_grad, y_grad + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X", "Y"], "Out", user_defined_grads=self.gradient) + + +class TestDistOpCase1(TestDistOp): + def init_case(self): + self.x_shape = (3, 5, 5, 6) + self.y_shape = (5, 5, 6) + self.p = 1. + + +class TestDistOpCase2(TestDistOp): + def init_case(self): + self.x_shape = (10, 10) + self.y_shape = (4, 10, 10) + self.p = 2. + + +class TestDistOpCase3(TestDistOp): + def init_case(self): + self.x_shape = (15, 10) + self.y_shape = (15, 10) + self.p = float("inf") + + +class TestDistOpCase4(TestDistOp): + def init_case(self): + self.x_shape = (2, 3, 4, 5, 8) + self.y_shape = (3, 1, 5, 8) + self.p = float("-inf") + + +class TestDistOpCase5(TestDistOp): + def init_case(self): + self.x_shape = (4, 1, 4, 8) + self.y_shape = (2, 2, 1, 4, 4, 8) + self.p = 1.5 + + +class TestDistAPI(unittest.TestCase): + def test_api(self): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + x = fluid.data(name='x', shape=[2, 3, 4, 5], dtype='float64') + y = fluid.data(name='y', shape=[3, 1, 5], dtype='float64') + p = 2 + x_i = np.random.random((2, 3, 4, 5)).astype("float64") + y_i = np.random.random((3, 1, 5)).astype("float64") + result = paddle.dist(x, y, p) + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + out = exe.run(fluid.default_main_program(), + feed={'x': x_i, + 'y': y_i}, + fetch_list=[result]) + self.assertTrue(np.allclose(dist(x_i, y_i, p), out[0])) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index fb4296f0013390b8cf03b636c1d032bf07ec8f91..40476b49a371a5881ae627d443c8678af196babb 100644 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -127,7 +127,7 @@ from .linalg import matmul #DEFINE_ALIAS # from .linalg import einsum #DEFINE_ALIAS # from .linalg import morm #DEFINE_ALIAS # from .linalg import transpose #DEFINE_ALIAS -# from .linalg import dist #DEFINE_ALIAS +from .linalg import dist #DEFINE_ALIAS # from .linalg import t #DEFINE_ALIAS # from .linalg import cross #DEFINE_ALIAS # from .linalg import cholesky #DEFINE_ALIAS diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 974bc0d37a0bad3b42e550c2ceb42203152ba05f..7baba355180ccecd3119999f6b2cfd91b7e350c4 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from paddle.common_ops_import import * +from ..fluid.layer_helper import LayerHelper +from ..fluid.data_feeder import check_variable_and_dtype, check_type +from ..fluid.framework import in_dygraph_mode # TODO: define functions of linear algebra __all__ = [ @@ -20,7 +23,7 @@ __all__ = [ # 'einsum', # 'morm', # 'transpose', - # 'dist', + 'dist', # 't', # 'cross', # 'cholesky', @@ -156,3 +159,78 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): outputs={'Out': out}, attrs=attrs) return out + + +def dist(x, y, p=2): + """ + This OP returns the p-norm of (x - y). It is not a norm in a strict sense, only as a measure + of distance. The shapes of x and y must be broadcastable. + + Where, z = x - y, + + When p = 0, defining $0^0=0$, the zero-norm of z is simply the number of non-zero elements of z. + + .. math:: + + ||z||_{0}=\lim_{p \\rightarrow 0}\sum_{i=1}^{m}|z_i|^{p} + + When p = inf, the inf-norm of z is the maximum element of z. + + .. math:: + + ||z||_\infty=\max_i |z_i| + + When p = -inf, the negative-inf-norm of z is the minimum element of z. + + .. math:: + + ||z||_{-\infty}=\min_i |z_i| + + Otherwise, the p-norm of z follows the formula, + + .. math:: + + ||z||_{p}=(\sum_{i=1}^{m}|z_i|^p)^{\\frac{1}{p}} + + Args: + x (Variable): 1-D to 6-D Tensor, its data type is float32 or float64. + y (Variable): 1-D to 6-D Tensor, its data type is float32 or float64. + p (float, optional): The norm to be computed, its data type is float32 or float64. Default: 2. + + Returns: + Variable: Tensor that is the p-norm of (x - y). + + Examples: + .. code-block:: python + + import paddle + import paddle.fluid as fluid + import numpy as np + + with fluid.dygraph.guard(): + x = fluid.dygraph.to_variable(np.array([[3, 3],[3, 3]]).astype(np.float32)) + y = fluid.dygraph.to_variable(np.array([[3, 3],[3, 1]]).astype(np.float32)) + out = paddle.dist(x, y, 0) + print(out.numpy()) # out = [1.] + + out = paddle.dist(x, y, 2) + print(out.numpy()) # out = [2.] + + out = paddle.dist(x, y, float("inf")) + print(out.numpy()) # out = [2.] + + out = paddle.dist(x, y, float("-inf")) + print(out.numpy()) # out = [0.] + """ + check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'dist') + check_variable_and_dtype(y, 'dtype', ['float32', 'float64'], 'dist') + check_type(p, 'p', (float, int), 'dist') + helper = LayerHelper("dist", **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + + inputs = {"X": [x], "Y": [y]} + outputs = {'Out': [out]} + attrs = {"p": float(p)} + helper.append_op( + type='dist', inputs=inputs, outputs={'Out': out}, attrs=attrs) + return out