未验证 提交 f521a30d 编写于 作者: X xiongkun 提交者: GitHub

refine svd; unexpose tensor.svd; fix english document; set timeout=40 (#35635)

上级 86a6be1a
......@@ -20,6 +20,9 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/diag_op.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/functors.h"
#include "paddle/fluid/operators/math/math_function.h"
......@@ -89,7 +92,6 @@ struct PowFunctor {
};
static std::vector<int> GetBroadcastShape(InTensors ins) {
// TODO(xiongkun03) check the operators and output
PADDLE_ENFORCE_EQ(ins.size(), 2, platform::errors::InvalidArgument(
"GetBroadcastShape Receive 2 tensors"
"but got [%d]",
......@@ -125,6 +127,19 @@ static std::vector<int> GetBroadcastShape(InTensors ins) {
return broadcast_shape;
}
#define DITO_TRANSPOSE_RANK_CASE(N) \
case N: { \
math::Transpose<DeviceContext, T, N> trans; \
trans(dev_ctx, x, &ret, axis); \
break; \
}
#define DITO_SLICE_RANK_CASE(N) \
case N: { \
EigenSliceWrapper<N>(&x, offset, extends, &ret); \
break; \
}
template <typename DeviceContext, typename T>
struct DeviceIndependenceTensorOperations {
// 1. Device indenpendence, for kernel reuse.
......@@ -153,20 +168,25 @@ struct DeviceIndependenceTensorOperations {
framework::Tensor Matmul(const framework::Tensor& mat_a,
const framework::Tensor& mat_b, bool trans_a = false,
bool trans_b = false) {
framework::AttributeMap attrs;
attrs["trans_x"] = trans_a;
attrs["trans_y"] = trans_b;
NameInTensorMap inputs({{"X", {&mat_a}}, {"Y", {&mat_b}}});
framework::Tensor ret;
auto a_dim = mat_a.dims();
auto b_dim = mat_b.dims();
std::vector<int> x_vec = framework::vectorize<int>(a_dim);
x_vec[x_vec.size() - 2] = a_dim[a_dim.size() - (trans_a ? 1 : 2)];
x_vec[x_vec.size() - 1] = b_dim[b_dim.size() - (trans_b ? 2 : 1)];
return CreateOpRunAndReturnTensor("matmul_v2", inputs, attrs, x_vec);
ret.Resize(framework::make_ddim(x_vec));
ret.mutable_data<T>(context.GetPlace());
auto blas = GetBlas();
auto mat_a_discrib = math::CreateMatrixDescriptor(a_dim, 0, trans_a);
auto mat_b_discrib = math::CreateMatrixDescriptor(b_dim, 0, trans_b);
blas.MatMul(mat_a, mat_a_discrib, mat_b, mat_b_discrib, T(1.0), &ret,
T(0.0));
return ret;
}
// transpose the last two dimision
framework::Tensor Transpose(const framework::Tensor& x) {
framework::Tensor out;
// transpose the last two dimision
framework::Tensor ret;
auto x_dim = x.dims();
auto x_vec = framework::vectorize<int>(x_dim);
int rank = x_vec.size();
......@@ -177,26 +197,42 @@ struct DeviceIndependenceTensorOperations {
axis[i] = i;
}
std::swap(axis[rank - 1], axis[rank - 2]);
framework::AttributeMap attrs;
attrs["axis"] = axis;
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("transpose2", inputs, attrs, out_shape,
{"Out", "XShape"});
auto& dev_ctx = context.template device_context<DeviceContext>();
ret.Resize(framework::make_ddim(x_vec));
ret.mutable_data<T>(context.GetPlace());
switch (rank) {
DITO_TRANSPOSE_RANK_CASE(2);
DITO_TRANSPOSE_RANK_CASE(3);
DITO_TRANSPOSE_RANK_CASE(4);
DITO_TRANSPOSE_RANK_CASE(5);
DITO_TRANSPOSE_RANK_CASE(6);
default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid Rank number, "
"currently only support rank between 2~6"));
}
}
return ret;
}
framework::Tensor Diag(const framework::Tensor& x, int offset = 0,
// FIXME link error
int padding_value = 0) {
framework::AttributeMap attrs;
attrs["offset"] = offset;
attrs["padding_value"] = padding_value;
NameInTensorMap inputs({{"X", {&x}}});
PADDLE_ENFORCE_EQ(padding_value, 0,
platform::errors::InvalidArgument(
"Current diag only support padding_value = 0"));
PADDLE_ENFORCE_EQ(offset, 0,
platform::errors::InvalidArgument(
"Current diag only support offset = 0,"
"you can use DiagOp instead(not recommend)"));
framework::Tensor ret;
int x_rank = x.dims().size();
std::vector<int> out_shape;
if (x_rank == 2) {
PADDLE_ENFORCE_EQ(x.dims()[0], x.dims()[1],
platform::errors::InvalidArgument(
"if X is a Matrix, then X must be square"));
out_shape.push_back(x.dims()[0]);
PADDLE_THROW(platform::errors::InvalidArgument(
"Current diag only support vector"
"-> diagonalized matrix, not support matrix -> vector,"
" Use DiagOp instead."));
} else if (x_rank == 1) {
out_shape.push_back(x.dims()[0]);
out_shape.push_back(x.dims()[0]);
......@@ -204,42 +240,73 @@ struct DeviceIndependenceTensorOperations {
PADDLE_THROW(
platform::errors::InvalidArgument("Rank must less or equal than 2"));
}
return CreateOpRunAndReturnTensor("diag_v2", inputs, attrs, out_shape);
ret = Fill({out_shape[0], out_shape[0]}, 0.0);
T* output = ret.mutable_data<T>(context.GetPlace());
auto for_range = GetForRange(x.numel());
for_range(DiagFunctor<T>(x.data<T>(), x.numel(), output));
return ret;
}
framework::Tensor Div(const framework::Tensor& x,
const framework::Tensor& y) {
framework::Tensor ret;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
ret.Resize(framework::make_ddim(out_shape));
ElementwiseComputeEx<DivFunctor<T>, DeviceContext, T>(
context, &x, &y, -1, DivFunctor<T>(), &ret);
return ret;
}
framework::Tensor Add(const framework::Tensor& x,
const framework::Tensor& y) {
InTensors ins({&x, &y});
framework::AttributeMap attrs;
attrs["axis"] = -1;
// element wise add, support numpy broadcast.
framework::Tensor ret;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}});
return CreateOpRunAndReturnTensor("elementwise_add", inputs, attrs,
out_shape);
ret.Resize(framework::make_ddim(out_shape));
ElementwiseComputeEx<AddFunctor<T>, DeviceContext, T>(
context, &x, &y, -1, AddFunctor<T>(), &ret);
return ret;
}
framework::Tensor Mul(const framework::Tensor& x,
const framework::Tensor& y) {
InTensors ins({&x, &y});
framework::AttributeMap attrs;
attrs["axis"] = -1;
framework::Tensor ret;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}});
return CreateOpRunAndReturnTensor("elementwise_mul", inputs, attrs,
out_shape);
ret.Resize(framework::make_ddim(out_shape));
ElementwiseComputeEx<MulFunctor<T>, DeviceContext, T>(
context, &x, &y, -1, MulFunctor<T>(), &ret);
return ret;
}
framework::Tensor ReduceSum(const framework::Tensor& x,
std::vector<int> out_dim) {
framework::AttributeMap attrs;
attrs["dim"] = std::vector<int>{-1};
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("reduce_sum", inputs, attrs, out_dim);
}
framework::Tensor ReduceMax(const framework::Tensor& x,
std::vector<int> out_dim) {
framework::AttributeMap attrs;
attrs["dim"] = std::vector<int>{-1};
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("reduce_max", inputs, attrs, out_dim);
}
framework::Tensor Sub(const framework::Tensor& x,
const framework::Tensor& y) {
InTensors ins({&x, &y});
framework::AttributeMap attrs;
attrs["axis"] = -1;
framework::Tensor ret;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}});
return CreateOpRunAndReturnTensor("elementwise_sub", inputs, attrs,
out_shape);
ret.Resize(framework::make_ddim(out_shape));
if (x.dims().size() >= y.dims().size()) {
ElementwiseComputeEx<SubFunctor<T>, DeviceContext, T>(
context, &x, &y, -1, SubFunctor<T>(), &ret);
} else {
ElementwiseComputeEx<InverseSubFunctor<T>, DeviceContext, T>(
// This is copyed from elementwise_sub, which means we
// need reverse will xrank < yrank
context, &x, &y, -1, InverseSubFunctor<T>(), &ret);
}
return ret;
}
const framework::Tensor Unsqueeze(const framework::Tensor& x, int axis = 0) {
// don't copy data, only change the dims
framework::Tensor out;
......@@ -255,40 +322,29 @@ struct DeviceIndependenceTensorOperations {
out.Resize(framework::make_ddim(out_shape));
return out;
}
framework::Tensor Zeros(std::vector<int> shape,
framework::proto::VarType::Type dtype,
float fill_value) {
framework::AttributeMap attrs;
attrs["dtype"] = dtype;
attrs["shape"] = shape;
attrs["value"] = fill_value;
NameInTensorMap inputs({});
return CreateOpRunAndReturnTensor("fill_constant", inputs, attrs, shape);
framework::Tensor Fill(std::vector<int> shape, float fill_value) {
framework::Tensor ret;
ret.Resize(framework::make_ddim(shape));
ret.mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<DeviceContext>();
SetConstant<DeviceContext, T>()(dev_ctx, &ret, T(fill_value));
return ret;
}
framework::Tensor Infinits(std::vector<int> shape,
framework::proto::VarType::Type dtype) {
framework::AttributeMap attrs;
attrs["dtype"] = dtype;
attrs["shape"] = shape;
attrs["str_value"] = std::string("inf");
NameInTensorMap inputs({});
return CreateOpRunAndReturnTensor("fill_constant", inputs, attrs, shape);
framework::Tensor Infinits(std::vector<int> shape) {
auto value = static_cast<T>(std::numeric_limits<double>::infinity());
return Fill(shape, value);
}
framework::Tensor Eye(int n, framework::proto::VarType::Type dtype) {
auto output = Zeros({n}, dtype, 1);
framework::Tensor Eye(int n) {
auto output = Fill({n}, 1);
auto ret = Diag(output);
return ret;
}
framework::Tensor Slice(const framework::Tensor& x, std::vector<int> axes,
std::vector<int> starts, std::vector<int> ends) {
framework::Tensor ret;
std::vector<int> new_axes = axes;
NameInTensorMap inputs({{"Input", {&x}}});
std::vector<int> out_shape = framework::vectorize<int>(x.dims());
int rank = out_shape.size();
size_t rank = out_shape.size();
PADDLE_ENFORCE_EQ(
axes.size(), starts.size(),
platform::errors::InvalidArgument("Slice Operator Argument Invalided"));
......@@ -306,27 +362,31 @@ struct DeviceIndependenceTensorOperations {
"C++ Slice Operation Not Support End < Start"));
out_shape[axis] = ed - st;
}
framework::AttributeMap attrs;
attrs["axes"] = new_axes;
attrs["starts"] = starts;
attrs["ends"] = ends;
return CreateOpRunAndReturnTensor("slice", inputs, attrs, out_shape);
std::vector<int> offset(rank), extends(rank);
for (size_t i = 0; i < rank; ++i) {
offset[i] = 0;
extends[i] = x.dims()[i];
}
framework::Tensor ReduceSum(const framework::Tensor& x,
std::vector<int> out_dim) {
framework::AttributeMap attrs;
attrs["dim"] = std::vector<int>{-1};
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("reduce_sum", inputs, attrs, out_dim);
for (size_t i = 0; i < new_axes.size(); ++i) {
offset[new_axes[i]] = starts[i];
extends[new_axes[i]] = ends[i] - starts[i];
}
framework::Tensor ReduceMax(const framework::Tensor& x,
std::vector<int> out_dim) {
framework::AttributeMap attrs;
attrs["dim"] = std::vector<int>{-1};
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("reduce_max", inputs, attrs, out_dim);
ret.Resize(framework::make_ddim(out_shape));
ret.mutable_data<T>(context.GetPlace());
switch (rank) {
DITO_SLICE_RANK_CASE(1);
DITO_SLICE_RANK_CASE(2);
DITO_SLICE_RANK_CASE(3);
DITO_SLICE_RANK_CASE(4);
DITO_SLICE_RANK_CASE(5);
DITO_SLICE_RANK_CASE(6);
default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid Rank number, "
"currently only support rank between 2~6"));
}
}
return ret;
}
private:
......@@ -338,14 +398,40 @@ struct DeviceIndependenceTensorOperations {
auto& dev_ctx = context.template device_context<DeviceContext>();
return platform::ForRange<DeviceContext>(dev_ctx, numel);
}
template <size_t D>
void EigenSliceWrapper(const framework::Tensor* in,
const std::vector<int>& start,
const std::vector<int>& end, framework::Tensor* out) {
// Slice by call Eigen Tensor Function `.slice()`
size_t rank = in->dims().size();
PADDLE_ENFORCE_EQ(start.size(), rank,
platform::errors::InvalidArgument(
"EigenSliceWrapper function start "
"argument must have the same length as input rank."));
PADDLE_ENFORCE_EQ(end.size(), rank,
platform::errors::InvalidArgument(
"EigenSliceWrapper function end "
"argument must have the same length as input rank."));
auto eigen_place_ptr =
context.template device_context<DeviceContext>().eigen_device();
auto eigen_place = *eigen_place_ptr;
auto out_t = framework::EigenTensor<T, D>::From(*out, out->dims());
auto in_t = framework::EigenTensor<T, D>::From(*in, in->dims());
Eigen::DSizes<int, D> offsets_32bit, extents_32bit;
for (size_t i = 0; i < D; i++) {
offsets_32bit[i] = start[i];
extents_32bit[i] = end[i];
}
EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
eigen_place, framework::To32BitIndex(out_t),
framework::To32BitIndex(in_t), offsets_32bit, extents_32bit);
}
framework::Tensor CreateOpRunAndReturnTensor(
const std::string& type, const NameInTensorMap& inputs,
const framework::AttributeMap& attrs, std::vector<int> out_shape,
NameOutTensor out_str = {"Out"}) {
// varialble set dims must be LoDTensor / SelectedRowTensor
framework::Scope& local_scope = context.scope().NewScope();
framework::VariableNameMap op_outputs;
for (auto out_name : out_str) {
local_scope.Var("tmp_" + out_name)->GetMutable<framework::LoDTensor>();
......@@ -373,6 +459,7 @@ struct DeviceIndependenceTensorOperations {
}
op_inputs[item.first] = name_vector;
}
auto op =
framework::OpRegistry::CreateOp(type, op_inputs, op_outputs, attrs);
op->Run(local_scope, context.GetPlace());
......
......@@ -54,7 +54,6 @@ class SvdCPUKernel : public framework::OpKernel<T> {
size_t(batches * col_v * cols * sizeof(math::Real<T>)));
auto* S_out = S->mutable_data<math::Real<T>>(
context.GetPlace(), size_t(batches * k * sizeof(math::Real<T>)));
/*SVD Use the Eigen Library*/
math::BatchSvd<T>(x_data, U_out, VH_out, S_out, rows, cols, batches, full);
}
......@@ -96,7 +95,7 @@ class SvdGradKernel : public framework::OpKernel<T> {
auto s_square = dito.Pow(S, 2);
auto F =
dito.Sub(dito.Unsqueeze(s_square, -2), dito.Unsqueeze(s_square, -1));
F = dito.Add(F, dito.Diag(dito.Infinits({k}, U.type())));
F = dito.Add(F, dito.Diag(dito.Infinits({k})));
F = dito.Pow(F, -1);
Tensor sigma_term;
Tensor u_term;
......@@ -115,8 +114,7 @@ class SvdGradKernel : public framework::OpKernel<T> {
u_term = dito.Mul(dito.Mul(dito.Sub(UTG, GTU), F), dito.Unsqueeze(S, -2));
u_term = dito.Matmul(U, u_term);
if (m > k) {
auto project =
dito.Sub(dito.Eye(m, U.type()), dito.Matmul(U, U, false, true));
auto project = dito.Sub(dito.Eye(m), dito.Matmul(U, U, false, true));
u_term = dito.Add(u_term, dito.Mul(dito.Matmul(project, dU),
dito.Unsqueeze(s_inverse, -2)));
}
......@@ -129,8 +127,7 @@ class SvdGradKernel : public framework::OpKernel<T> {
v_term = dito.Mul(dito.Matmul(dito.Mul(dito.Sub(UTG, GTU), F), VH),
dito.Unsqueeze(S, -1));
if (n > k) {
auto project =
dito.Sub(dito.Eye(n, U.type()), dito.Matmul(VH, VH, true, false));
auto project = dito.Sub(dito.Eye(n), dito.Matmul(VH, VH, true, false));
v_term = dito.Add(v_term, dito.Mul(dito.Matmul(dVH, project),
dito.Unsqueeze(s_inverse, -1)));
}
......
......@@ -100,7 +100,6 @@ from .tensor.linalg import bmm # noqa: F401
from .tensor.linalg import histogram # noqa: F401
from .tensor.linalg import mv # noqa: F401
from .tensor.linalg import matrix_power # noqa: F401
from .tensor.linalg import svd # noqa: F401
from .tensor.logic import equal # noqa: F401
from .tensor.logic import greater_equal # noqa: F401
from .tensor.logic import greater_than # noqa: F401
......@@ -498,7 +497,6 @@ __all__ = [ # noqa
'sqrt',
'cholesky',
'matrix_power',
'svd',
'randperm',
'linspace',
'reshape',
......
......@@ -889,7 +889,7 @@ set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPER
set_tests_properties(test_lstm_cudnn_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_stack_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_bilinear_interp_v2_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_svd_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_svd_op PROPERTIES TIMEOUT 40)
set_tests_properties(test_deformable_psroi_pooling PROPERTIES TIMEOUT 120)
set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 120)
......
......@@ -45,7 +45,6 @@ from .linalg import bmm # noqa: F401
from .linalg import histogram # noqa: F401
from .linalg import mv # noqa: F401
from .linalg import matrix_power # noqa: F401
from .linalg import svd # noqa: F401
from .logic import equal # noqa: F401
from .logic import greater_equal # noqa: F401
from .logic import greater_than # noqa: F401
......@@ -226,7 +225,6 @@ tensor_method_func = [ #noqa
'histogram',
'mv',
'matrix_power',
'svd',
'abs',
'acos',
'all',
......
......@@ -1036,46 +1036,51 @@ def mv(x, vec, name=None):
def svd(x, full_matrices=False, name=None):
r"""
Computes the singular value decomposition of one
matrix or batches of regular matrice.
Computes the singular value decomposition of one matrix or a batch of regular matrices.
Let :math:`X` be the input matrix or a batch of input matrices, the output should satisfies:
.. math::
X = U * diag(S) * VT
Args:
x (Tensor): The input tensor. Its shape should be `[..., N, M]`,
where ... is zero or more batch dimensions. N and M can be arbitraty
where `...` is zero or more batch dimensions. N and M can be arbitraty
positive number. Note that if x is sigular matrices, the grad is numerical
instability. The data type of x should be float32 or float64.
full_matrices(bool): A flag to control the behavor of svd.
instable. The data type of x should be float32 or float64.
full_matrices (bool): A flag to control the behavor of svd.
If full_matrices = True, svd op will compute full U and V matrics,
which means shape of U is `[..., N, N]`, shape of V is `[..., M, M]`.
which means shape of U is `[..., N, N]`, shape of V is `[..., M, M]`. K = min(M, N).
If full_matrices = False, svd op will use a economic method to store U and V.
which means shape of U is `[..., N, K]`, shape of V is `[..., M, K]`
which means shape of U is `[..., N, K]`, shape of V is `[..., M, K]`. K = min(M, N).
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor: Tensor U, the shape of U is controlled by full_matrices flag.
Tensor: Tensor S, the singular value of X. the shape of S is [..., K]
Tensor: Tensor VH, the conjugate transpose of V. the shape of V is controlled by full_matrices flag.
Tuple of 3 tensors: (U, S, VH). VH is the conjugate transpose of V. S is the singlar value vectors of matrics with shape `[..., K]`
import numpy as np
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[1.0, 2.0], [1.0, 3.0], [4.0, 6.0]]).astype('float64')
x = x.reshape([3, 2])
u, s, vt = paddle.linalg.svd(x)
u, s, vh = paddle.linalg.svd(x)
print (u)
print (s)
print (vt)
#U = [[ 0.27364809, -0.21695147 ],
# [ 0.37892198, -0.87112408 ],
# [ 0.8840446 , 0.44053933 ]]
print (s)
#S = [8.14753743, 0.78589688]
print (vh)
#VT= [[ 0.51411221, 0.85772294],
# [ 0.85772294, -0.51411221]]
# one can verify : U * S * VT = X ;
# U * UH = I ;
# V * VH = I
# one can verify : U * S * VT == X
# U * UH == I
# V * VH == I
"""
if in_dygraph_mode():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册