diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 055c0bc57c51d7c091601a4f98eb58677148a18f..bdf402397dd38f66484abc8497185c8b8a762035 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -20,6 +20,9 @@ #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/diag_op.h" +#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/functors.h" #include "paddle/fluid/operators/math/math_function.h" @@ -89,7 +92,6 @@ struct PowFunctor { }; static std::vector GetBroadcastShape(InTensors ins) { - // TODO(xiongkun03) check the operators and output PADDLE_ENFORCE_EQ(ins.size(), 2, platform::errors::InvalidArgument( "GetBroadcastShape Receive 2 tensors" "but got [%d]", @@ -125,6 +127,19 @@ static std::vector GetBroadcastShape(InTensors ins) { return broadcast_shape; } +#define DITO_TRANSPOSE_RANK_CASE(N) \ + case N: { \ + math::Transpose trans; \ + trans(dev_ctx, x, &ret, axis); \ + break; \ + } + +#define DITO_SLICE_RANK_CASE(N) \ + case N: { \ + EigenSliceWrapper(&x, offset, extends, &ret); \ + break; \ + } + template struct DeviceIndependenceTensorOperations { // 1. Device indenpendence, for kernel reuse. @@ -153,20 +168,25 @@ struct DeviceIndependenceTensorOperations { framework::Tensor Matmul(const framework::Tensor& mat_a, const framework::Tensor& mat_b, bool trans_a = false, bool trans_b = false) { - framework::AttributeMap attrs; - attrs["trans_x"] = trans_a; - attrs["trans_y"] = trans_b; - NameInTensorMap inputs({{"X", {&mat_a}}, {"Y", {&mat_b}}}); + framework::Tensor ret; auto a_dim = mat_a.dims(); auto b_dim = mat_b.dims(); std::vector x_vec = framework::vectorize(a_dim); x_vec[x_vec.size() - 2] = a_dim[a_dim.size() - (trans_a ? 1 : 2)]; x_vec[x_vec.size() - 1] = b_dim[b_dim.size() - (trans_b ? 2 : 1)]; - return CreateOpRunAndReturnTensor("matmul_v2", inputs, attrs, x_vec); + ret.Resize(framework::make_ddim(x_vec)); + ret.mutable_data(context.GetPlace()); + auto blas = GetBlas(); + auto mat_a_discrib = math::CreateMatrixDescriptor(a_dim, 0, trans_a); + auto mat_b_discrib = math::CreateMatrixDescriptor(b_dim, 0, trans_b); + blas.MatMul(mat_a, mat_a_discrib, mat_b, mat_b_discrib, T(1.0), &ret, + T(0.0)); + return ret; } - // transpose the last two dimision + framework::Tensor Transpose(const framework::Tensor& x) { - framework::Tensor out; + // transpose the last two dimision + framework::Tensor ret; auto x_dim = x.dims(); auto x_vec = framework::vectorize(x_dim); int rank = x_vec.size(); @@ -177,26 +197,42 @@ struct DeviceIndependenceTensorOperations { axis[i] = i; } std::swap(axis[rank - 1], axis[rank - 2]); - framework::AttributeMap attrs; - attrs["axis"] = axis; - NameInTensorMap inputs({{"X", {&x}}}); - return CreateOpRunAndReturnTensor("transpose2", inputs, attrs, out_shape, - {"Out", "XShape"}); + auto& dev_ctx = context.template device_context(); + ret.Resize(framework::make_ddim(x_vec)); + ret.mutable_data(context.GetPlace()); + switch (rank) { + DITO_TRANSPOSE_RANK_CASE(2); + DITO_TRANSPOSE_RANK_CASE(3); + DITO_TRANSPOSE_RANK_CASE(4); + DITO_TRANSPOSE_RANK_CASE(5); + DITO_TRANSPOSE_RANK_CASE(6); + default: { + PADDLE_THROW(platform::errors::InvalidArgument( + "Invalid Rank number, " + "currently only support rank between 2~6")); + } + } + return ret; } - framework::Tensor Diag(const framework::Tensor& x, int offset = 0, + // FIXME link error int padding_value = 0) { - framework::AttributeMap attrs; - attrs["offset"] = offset; - attrs["padding_value"] = padding_value; - NameInTensorMap inputs({{"X", {&x}}}); + PADDLE_ENFORCE_EQ(padding_value, 0, + platform::errors::InvalidArgument( + "Current diag only support padding_value = 0")); + PADDLE_ENFORCE_EQ(offset, 0, + platform::errors::InvalidArgument( + "Current diag only support offset = 0," + "you can use DiagOp instead(not recommend)")); + + framework::Tensor ret; int x_rank = x.dims().size(); std::vector out_shape; if (x_rank == 2) { - PADDLE_ENFORCE_EQ(x.dims()[0], x.dims()[1], - platform::errors::InvalidArgument( - "if X is a Matrix, then X must be square")); - out_shape.push_back(x.dims()[0]); + PADDLE_THROW(platform::errors::InvalidArgument( + "Current diag only support vector" + "-> diagonalized matrix, not support matrix -> vector," + " Use DiagOp instead.")); } else if (x_rank == 1) { out_shape.push_back(x.dims()[0]); out_shape.push_back(x.dims()[0]); @@ -204,42 +240,73 @@ struct DeviceIndependenceTensorOperations { PADDLE_THROW( platform::errors::InvalidArgument("Rank must less or equal than 2")); } - return CreateOpRunAndReturnTensor("diag_v2", inputs, attrs, out_shape); + ret = Fill({out_shape[0], out_shape[0]}, 0.0); + T* output = ret.mutable_data(context.GetPlace()); + auto for_range = GetForRange(x.numel()); + for_range(DiagFunctor(x.data(), x.numel(), output)); + return ret; + } + framework::Tensor Div(const framework::Tensor& x, + const framework::Tensor& y) { + framework::Tensor ret; + std::vector out_shape = GetBroadcastShape({&x, &y}); + ret.Resize(framework::make_ddim(out_shape)); + ElementwiseComputeEx, DeviceContext, T>( + context, &x, &y, -1, DivFunctor(), &ret); + return ret; } - framework::Tensor Add(const framework::Tensor& x, const framework::Tensor& y) { - InTensors ins({&x, &y}); - framework::AttributeMap attrs; - attrs["axis"] = -1; + // element wise add, support numpy broadcast. + framework::Tensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}}); - return CreateOpRunAndReturnTensor("elementwise_add", inputs, attrs, - out_shape); + ret.Resize(framework::make_ddim(out_shape)); + ElementwiseComputeEx, DeviceContext, T>( + context, &x, &y, -1, AddFunctor(), &ret); + return ret; } - framework::Tensor Mul(const framework::Tensor& x, const framework::Tensor& y) { - InTensors ins({&x, &y}); - framework::AttributeMap attrs; - attrs["axis"] = -1; + framework::Tensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}}); - return CreateOpRunAndReturnTensor("elementwise_mul", inputs, attrs, - out_shape); + ret.Resize(framework::make_ddim(out_shape)); + ElementwiseComputeEx, DeviceContext, T>( + context, &x, &y, -1, MulFunctor(), &ret); + return ret; + } + + framework::Tensor ReduceSum(const framework::Tensor& x, + std::vector out_dim) { + framework::AttributeMap attrs; + attrs["dim"] = std::vector{-1}; + NameInTensorMap inputs({{"X", {&x}}}); + return CreateOpRunAndReturnTensor("reduce_sum", inputs, attrs, out_dim); + } + + framework::Tensor ReduceMax(const framework::Tensor& x, + std::vector out_dim) { + framework::AttributeMap attrs; + attrs["dim"] = std::vector{-1}; + NameInTensorMap inputs({{"X", {&x}}}); + return CreateOpRunAndReturnTensor("reduce_max", inputs, attrs, out_dim); } framework::Tensor Sub(const framework::Tensor& x, const framework::Tensor& y) { - InTensors ins({&x, &y}); - framework::AttributeMap attrs; - attrs["axis"] = -1; + framework::Tensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}}); - return CreateOpRunAndReturnTensor("elementwise_sub", inputs, attrs, - out_shape); + ret.Resize(framework::make_ddim(out_shape)); + if (x.dims().size() >= y.dims().size()) { + ElementwiseComputeEx, DeviceContext, T>( + context, &x, &y, -1, SubFunctor(), &ret); + } else { + ElementwiseComputeEx, DeviceContext, T>( + // This is copyed from elementwise_sub, which means we + // need reverse will xrank < yrank + context, &x, &y, -1, InverseSubFunctor(), &ret); + } + return ret; } - const framework::Tensor Unsqueeze(const framework::Tensor& x, int axis = 0) { // don't copy data, only change the dims framework::Tensor out; @@ -255,40 +322,29 @@ struct DeviceIndependenceTensorOperations { out.Resize(framework::make_ddim(out_shape)); return out; } - - framework::Tensor Zeros(std::vector shape, - framework::proto::VarType::Type dtype, - float fill_value) { - framework::AttributeMap attrs; - attrs["dtype"] = dtype; - attrs["shape"] = shape; - attrs["value"] = fill_value; - NameInTensorMap inputs({}); - return CreateOpRunAndReturnTensor("fill_constant", inputs, attrs, shape); + framework::Tensor Fill(std::vector shape, float fill_value) { + framework::Tensor ret; + ret.Resize(framework::make_ddim(shape)); + ret.mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + SetConstant()(dev_ctx, &ret, T(fill_value)); + return ret; } - - framework::Tensor Infinits(std::vector shape, - framework::proto::VarType::Type dtype) { - framework::AttributeMap attrs; - attrs["dtype"] = dtype; - attrs["shape"] = shape; - attrs["str_value"] = std::string("inf"); - NameInTensorMap inputs({}); - return CreateOpRunAndReturnTensor("fill_constant", inputs, attrs, shape); + framework::Tensor Infinits(std::vector shape) { + auto value = static_cast(std::numeric_limits::infinity()); + return Fill(shape, value); } - - framework::Tensor Eye(int n, framework::proto::VarType::Type dtype) { - auto output = Zeros({n}, dtype, 1); + framework::Tensor Eye(int n) { + auto output = Fill({n}, 1); auto ret = Diag(output); return ret; } - framework::Tensor Slice(const framework::Tensor& x, std::vector axes, std::vector starts, std::vector ends) { + framework::Tensor ret; std::vector new_axes = axes; - NameInTensorMap inputs({{"Input", {&x}}}); std::vector out_shape = framework::vectorize(x.dims()); - int rank = out_shape.size(); + size_t rank = out_shape.size(); PADDLE_ENFORCE_EQ( axes.size(), starts.size(), platform::errors::InvalidArgument("Slice Operator Argument Invalided")); @@ -306,27 +362,31 @@ struct DeviceIndependenceTensorOperations { "C++ Slice Operation Not Support End < Start")); out_shape[axis] = ed - st; } - framework::AttributeMap attrs; - attrs["axes"] = new_axes; - attrs["starts"] = starts; - attrs["ends"] = ends; - return CreateOpRunAndReturnTensor("slice", inputs, attrs, out_shape); - } - - framework::Tensor ReduceSum(const framework::Tensor& x, - std::vector out_dim) { - framework::AttributeMap attrs; - attrs["dim"] = std::vector{-1}; - NameInTensorMap inputs({{"X", {&x}}}); - return CreateOpRunAndReturnTensor("reduce_sum", inputs, attrs, out_dim); - } - - framework::Tensor ReduceMax(const framework::Tensor& x, - std::vector out_dim) { - framework::AttributeMap attrs; - attrs["dim"] = std::vector{-1}; - NameInTensorMap inputs({{"X", {&x}}}); - return CreateOpRunAndReturnTensor("reduce_max", inputs, attrs, out_dim); + std::vector offset(rank), extends(rank); + for (size_t i = 0; i < rank; ++i) { + offset[i] = 0; + extends[i] = x.dims()[i]; + } + for (size_t i = 0; i < new_axes.size(); ++i) { + offset[new_axes[i]] = starts[i]; + extends[new_axes[i]] = ends[i] - starts[i]; + } + ret.Resize(framework::make_ddim(out_shape)); + ret.mutable_data(context.GetPlace()); + switch (rank) { + DITO_SLICE_RANK_CASE(1); + DITO_SLICE_RANK_CASE(2); + DITO_SLICE_RANK_CASE(3); + DITO_SLICE_RANK_CASE(4); + DITO_SLICE_RANK_CASE(5); + DITO_SLICE_RANK_CASE(6); + default: { + PADDLE_THROW(platform::errors::InvalidArgument( + "Invalid Rank number, " + "currently only support rank between 2~6")); + } + } + return ret; } private: @@ -338,14 +398,40 @@ struct DeviceIndependenceTensorOperations { auto& dev_ctx = context.template device_context(); return platform::ForRange(dev_ctx, numel); } - + template + void EigenSliceWrapper(const framework::Tensor* in, + const std::vector& start, + const std::vector& end, framework::Tensor* out) { + // Slice by call Eigen Tensor Function `.slice()` + size_t rank = in->dims().size(); + PADDLE_ENFORCE_EQ(start.size(), rank, + platform::errors::InvalidArgument( + "EigenSliceWrapper function start " + "argument must have the same length as input rank.")); + PADDLE_ENFORCE_EQ(end.size(), rank, + platform::errors::InvalidArgument( + "EigenSliceWrapper function end " + "argument must have the same length as input rank.")); + auto eigen_place_ptr = + context.template device_context().eigen_device(); + auto eigen_place = *eigen_place_ptr; + auto out_t = framework::EigenTensor::From(*out, out->dims()); + auto in_t = framework::EigenTensor::From(*in, in->dims()); + Eigen::DSizes offsets_32bit, extents_32bit; + for (size_t i = 0; i < D; i++) { + offsets_32bit[i] = start[i]; + extents_32bit[i] = end[i]; + } + EigenSlice, T, D>::Eval( + eigen_place, framework::To32BitIndex(out_t), + framework::To32BitIndex(in_t), offsets_32bit, extents_32bit); + } framework::Tensor CreateOpRunAndReturnTensor( const std::string& type, const NameInTensorMap& inputs, const framework::AttributeMap& attrs, std::vector out_shape, NameOutTensor out_str = {"Out"}) { // varialble set dims must be LoDTensor / SelectedRowTensor framework::Scope& local_scope = context.scope().NewScope(); - framework::VariableNameMap op_outputs; for (auto out_name : out_str) { local_scope.Var("tmp_" + out_name)->GetMutable(); @@ -373,6 +459,7 @@ struct DeviceIndependenceTensorOperations { } op_inputs[item.first] = name_vector; } + auto op = framework::OpRegistry::CreateOp(type, op_inputs, op_outputs, attrs); op->Run(local_scope, context.GetPlace()); diff --git a/paddle/fluid/operators/svd_op.h b/paddle/fluid/operators/svd_op.h index 1910effbeaa54d6b718fc39f4957a56d83902d77..f387dca7b7f9b2c4e741d8f495a58b05a46c6c6f 100644 --- a/paddle/fluid/operators/svd_op.h +++ b/paddle/fluid/operators/svd_op.h @@ -54,7 +54,6 @@ class SvdCPUKernel : public framework::OpKernel { size_t(batches * col_v * cols * sizeof(math::Real))); auto* S_out = S->mutable_data>( context.GetPlace(), size_t(batches * k * sizeof(math::Real))); - /*SVD Use the Eigen Library*/ math::BatchSvd(x_data, U_out, VH_out, S_out, rows, cols, batches, full); } @@ -96,7 +95,7 @@ class SvdGradKernel : public framework::OpKernel { auto s_square = dito.Pow(S, 2); auto F = dito.Sub(dito.Unsqueeze(s_square, -2), dito.Unsqueeze(s_square, -1)); - F = dito.Add(F, dito.Diag(dito.Infinits({k}, U.type()))); + F = dito.Add(F, dito.Diag(dito.Infinits({k}))); F = dito.Pow(F, -1); Tensor sigma_term; Tensor u_term; @@ -115,8 +114,7 @@ class SvdGradKernel : public framework::OpKernel { u_term = dito.Mul(dito.Mul(dito.Sub(UTG, GTU), F), dito.Unsqueeze(S, -2)); u_term = dito.Matmul(U, u_term); if (m > k) { - auto project = - dito.Sub(dito.Eye(m, U.type()), dito.Matmul(U, U, false, true)); + auto project = dito.Sub(dito.Eye(m), dito.Matmul(U, U, false, true)); u_term = dito.Add(u_term, dito.Mul(dito.Matmul(project, dU), dito.Unsqueeze(s_inverse, -2))); } @@ -129,8 +127,7 @@ class SvdGradKernel : public framework::OpKernel { v_term = dito.Mul(dito.Matmul(dito.Mul(dito.Sub(UTG, GTU), F), VH), dito.Unsqueeze(S, -1)); if (n > k) { - auto project = - dito.Sub(dito.Eye(n, U.type()), dito.Matmul(VH, VH, true, false)); + auto project = dito.Sub(dito.Eye(n), dito.Matmul(VH, VH, true, false)); v_term = dito.Add(v_term, dito.Mul(dito.Matmul(dVH, project), dito.Unsqueeze(s_inverse, -1))); } diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index d0b705cde6aa1cb4937ea207d0ab90340a665799..0ee9f4eed82df44d3bc02e2414fb782e78ae1b6f 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -100,7 +100,6 @@ from .tensor.linalg import bmm # noqa: F401 from .tensor.linalg import histogram # noqa: F401 from .tensor.linalg import mv # noqa: F401 from .tensor.linalg import matrix_power # noqa: F401 -from .tensor.linalg import svd # noqa: F401 from .tensor.logic import equal # noqa: F401 from .tensor.logic import greater_equal # noqa: F401 from .tensor.logic import greater_than # noqa: F401 @@ -498,7 +497,6 @@ __all__ = [ # noqa 'sqrt', 'cholesky', 'matrix_power', - 'svd', 'randperm', 'linspace', 'reshape', diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 24448905b955c61b4e237518c7e55654a59095a8..4a70cd3c7e10b094040b2e62b91e5996e0d8e317 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -889,7 +889,7 @@ set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPER set_tests_properties(test_lstm_cudnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_stack_op PROPERTIES TIMEOUT 120) set_tests_properties(test_bilinear_interp_v2_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_svd_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_svd_op PROPERTIES TIMEOUT 40) set_tests_properties(test_deformable_psroi_pooling PROPERTIES TIMEOUT 120) set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 120) diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 73369a6e8ea14464eb46d26b664c73f25ef48e02..19624cf6b8fda908784768638dabc9edfd7598e4 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -45,7 +45,6 @@ from .linalg import bmm # noqa: F401 from .linalg import histogram # noqa: F401 from .linalg import mv # noqa: F401 from .linalg import matrix_power # noqa: F401 -from .linalg import svd # noqa: F401 from .logic import equal # noqa: F401 from .logic import greater_equal # noqa: F401 from .logic import greater_than # noqa: F401 @@ -226,7 +225,6 @@ tensor_method_func = [ #noqa 'histogram', 'mv', 'matrix_power', - 'svd', 'abs', 'acos', 'all', diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 39c67b072b60ad18206a930705b75aab6c196cb9..b50643471eed7d36e491bee38b89e6122f70df87 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -1036,46 +1036,51 @@ def mv(x, vec, name=None): def svd(x, full_matrices=False, name=None): r""" - Computes the singular value decomposition of one - matrix or batches of regular matrice. + Computes the singular value decomposition of one matrix or a batch of regular matrices. + + Let :math:`X` be the input matrix or a batch of input matrices, the output should satisfies: + + .. math:: + X = U * diag(S) * VT + Args: x (Tensor): The input tensor. Its shape should be `[..., N, M]`, - where ... is zero or more batch dimensions. N and M can be arbitraty + where `...` is zero or more batch dimensions. N and M can be arbitraty positive number. Note that if x is sigular matrices, the grad is numerical - instability. The data type of x should be float32 or float64. - - full_matrices(bool): A flag to control the behavor of svd. + instable. The data type of x should be float32 or float64. + full_matrices (bool): A flag to control the behavor of svd. If full_matrices = True, svd op will compute full U and V matrics, - which means shape of U is `[..., N, N]`, shape of V is `[..., M, M]`. + which means shape of U is `[..., N, N]`, shape of V is `[..., M, M]`. K = min(M, N). If full_matrices = False, svd op will use a economic method to store U and V. - which means shape of U is `[..., N, K]`, shape of V is `[..., M, K]` + which means shape of U is `[..., N, K]`, shape of V is `[..., M, K]`. K = min(M, N). + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor: Tensor U, the shape of U is controlled by full_matrices flag. - Tensor: Tensor S, the singular value of X. the shape of S is [..., K] - Tensor: Tensor VH, the conjugate transpose of V. the shape of V is controlled by full_matrices flag. + Tuple of 3 tensors: (U, S, VH). VH is the conjugate transpose of V. S is the singlar value vectors of matrics with shape `[..., K]` - import numpy as np + Examples: + .. code-block:: python + + import paddle x = paddle.to_tensor([[1.0, 2.0], [1.0, 3.0], [4.0, 6.0]]).astype('float64') x = x.reshape([3, 2]) - u, s, vt = paddle.linalg.svd(x) + u, s, vh = paddle.linalg.svd(x) print (u) - print (s) - print (vt) - #U = [[ 0.27364809, -0.21695147 ], # [ 0.37892198, -0.87112408 ], # [ 0.8840446 , 0.44053933 ]] + print (s) #S = [8.14753743, 0.78589688] - + print (vh) #VT= [[ 0.51411221, 0.85772294], # [ 0.85772294, -0.51411221]] - # one can verify : U * S * VT = X ; - # U * UH = I ; - # V * VH = I + # one can verify : U * S * VT == X + # U * UH == I + # V * VH == I """ if in_dygraph_mode():