未验证 提交 7e5fb462 编写于 作者: X xiongkun 提交者: GitHub

Add SVD Op and it's GPU and CPU kernel (#34953)

* Add SVD Op and it's GPU and CPU kernel

* Remove CUDAPlace in test_svd_op, make the test available in CPU package

* modfity the file

* fix windows bug/ fix ROCM / fix test timeout

* for pass the CIs

* improve error report

* for code review

* some modification to test_svd_op

* change python code style

* expose the svd interface for document
上级 e57a88b3
...@@ -183,6 +183,7 @@ function(op_library TARGET) ...@@ -183,6 +183,7 @@ function(op_library TARGET)
list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc") list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc") list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu") list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "svd_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu") list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu") list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS} hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/src/Core/util/Constants.h>
#include <Eigen/Dense>
#include <Eigen/SVD>
#include <iostream>
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/functors.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
namespace paddle {
namespace operators {
namespace math {
using Tensor = framework::Tensor;
using InTensors = std::vector<const Tensor*>;
using OutTensors = std::vector<Tensor*>;
using OpName = std::string;
template <typename T>
void EigenSvd(const T* X, T* U, T* VH, T* S, int rows, int cols,
int full = false) {
auto flag = Eigen::DecompositionOptions::ComputeThinU |
Eigen::DecompositionOptions::ComputeThinV;
if (full) {
flag = Eigen::DecompositionOptions::ComputeFullU |
Eigen::DecompositionOptions::ComputeFullV;
}
Eigen::BDCSVD<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
svd(2, 2, flag);
/*NOTE(xiongkun03) Eigen::Matrix API need non-const pointer.*/
T* input = const_cast<T*>(X);
auto m = Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
input, rows, cols);
svd.compute(m);
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> V_trans =
svd.matrixV().transpose();
memcpy(U, svd.matrixU().data(), svd.matrixU().size() * sizeof(T));
memcpy(VH, V_trans.data(), V_trans.size() * sizeof(T));
memcpy(S, svd.singularValues().data(),
svd.singularValues().size() * sizeof(T));
}
template <typename T>
void BatchSvd(const T* X, T* U, T* VH, T* S, int rows, int cols, int batches,
int full = false) {
int stride = rows * cols;
int k = std::min(rows, cols);
int stride_u = full ? rows * rows : k * rows;
int stride_v = full ? cols * cols : k * cols;
for (int i = 0; i < batches; ++i) {
EigenSvd<T>(X + i * stride, U + i * stride_u, VH + i * stride_v, S + i * k,
rows, cols, full);
}
return;
}
template <typename T>
struct PowFunctor {
PowFunctor(const T* input, T* output, int64_t numel, float exp)
: input_(input), output_(output), numel_(numel), exp_(exp) {}
HOSTDEVICE void operator()(int64_t idx) const {
output_[idx] = pow(input_[idx], exp_);
}
const T* input_;
T* output_;
int64_t numel_;
float exp_;
};
static std::vector<int> GetBroadcastShape(InTensors ins) {
// TODO(xiongkun03) check the operators and output
PADDLE_ENFORCE_EQ(ins.size(), 2, platform::errors::InvalidArgument(
"GetBroadcastShape Receive 2 tensors"
"but got [%d]",
ins.size()));
auto x_dim = ins[0]->dims();
auto y_dim = ins[1]->dims();
std::vector<int> broadcast_shape =
(x_dim.size() > y_dim.size() ? framework::vectorize<int>(x_dim)
: framework::vectorize<int>(y_dim));
int rank_min = std::min(x_dim.size(), y_dim.size());
int rank_x = x_dim.size();
int rank_y = y_dim.size();
int final_rank = broadcast_shape.size();
for (int i = 1; i <= rank_min; ++i) {
if (x_dim[rank_x - i] == y_dim[rank_y - i]) {
broadcast_shape[final_rank - i] = x_dim[rank_x - i];
continue;
}
if (x_dim[rank_x - i] == 1) {
broadcast_shape[final_rank - i] = y_dim[rank_y - i];
continue;
}
if (y_dim[rank_y - i] == 1) {
broadcast_shape[final_rank - i] = x_dim[rank_x - i];
continue;
}
PADDLE_THROW(platform::errors::InvalidArgument(
"Wrong Input Shape in broadcast operator: "
"Input(X)'s shape must follow the broadcast rule with Input(Y)'s "
"shape, but received [%s] (X) vs [%s] (Y).",
x_dim, y_dim));
}
return broadcast_shape;
}
template <typename DeviceContext, typename T>
struct DeviceIndependenceTensorOperations {
// 1. Device indenpendence, for kernel reuse.
// 2. Input and output is always tensor type.
// 3. output Tensor is alway allocated
// 4. Basic Tensor operator is supported
// 5. The Reused Operator Kernel should only be considered as
// a wrap function
using NameInTensorMap =
std::map<std::string, std::vector<const framework::Tensor*>>;
using NameOutTensor = std::vector<std::string>;
explicit DeviceIndependenceTensorOperations(
const framework::ExecutionContext& context)
: context(context) {}
framework::Tensor Pow(const framework::Tensor& x, float exp) {
framework::Tensor out;
auto for_range = GetForRange(x.numel());
int numel = x.numel();
PowFunctor<T> functor(x.data<T>(), out.mutable_data<T>(x.dims(), x.place()),
numel, exp);
for_range(functor);
return out;
}
framework::Tensor Matmul(const framework::Tensor& mat_a,
const framework::Tensor& mat_b, bool trans_a = false,
bool trans_b = false) {
framework::AttributeMap attrs;
attrs["trans_x"] = trans_a;
attrs["trans_y"] = trans_b;
NameInTensorMap inputs({{"X", {&mat_a}}, {"Y", {&mat_b}}});
auto a_dim = mat_a.dims();
auto b_dim = mat_b.dims();
std::vector<int> x_vec = framework::vectorize<int>(a_dim);
x_vec[x_vec.size() - 2] = a_dim[a_dim.size() - (trans_a ? 1 : 2)];
x_vec[x_vec.size() - 1] = b_dim[b_dim.size() - (trans_b ? 2 : 1)];
return CreateOpRunAndReturnTensor("matmul_v2", inputs, attrs, x_vec);
}
// transpose the last two dimision
framework::Tensor Transpose(const framework::Tensor& x) {
framework::Tensor out;
auto x_dim = x.dims();
auto x_vec = framework::vectorize<int>(x_dim);
int rank = x_vec.size();
std::swap(x_vec[rank - 1], x_vec[rank - 2]);
std::vector<int> out_shape = x_vec;
std::vector<int> axis(rank);
for (int i = 0; i < rank; ++i) {
axis[i] = i;
}
std::swap(axis[rank - 1], axis[rank - 2]);
framework::AttributeMap attrs;
attrs["axis"] = axis;
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("transpose2", inputs, attrs, out_shape,
{"Out", "XShape"});
}
framework::Tensor Diag(const framework::Tensor& x, int offset = 0,
int padding_value = 0) {
framework::AttributeMap attrs;
attrs["offset"] = offset;
attrs["padding_value"] = padding_value;
NameInTensorMap inputs({{"X", {&x}}});
int x_rank = x.dims().size();
std::vector<int> out_shape;
if (x_rank == 2) {
PADDLE_ENFORCE_EQ(x.dims()[0], x.dims()[1],
platform::errors::InvalidArgument(
"if X is a Matrix, then X must be square"));
out_shape.push_back(x.dims()[0]);
} else if (x_rank == 1) {
out_shape.push_back(x.dims()[0]);
out_shape.push_back(x.dims()[0]);
} else {
PADDLE_THROW(
platform::errors::InvalidArgument("Rank must less or equal than 2"));
}
return CreateOpRunAndReturnTensor("diag_v2", inputs, attrs, out_shape);
}
framework::Tensor Add(const framework::Tensor& x,
const framework::Tensor& y) {
InTensors ins({&x, &y});
framework::AttributeMap attrs;
attrs["axis"] = -1;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}});
return CreateOpRunAndReturnTensor("elementwise_add", inputs, attrs,
out_shape);
}
framework::Tensor Mul(const framework::Tensor& x,
const framework::Tensor& y) {
InTensors ins({&x, &y});
framework::AttributeMap attrs;
attrs["axis"] = -1;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}});
return CreateOpRunAndReturnTensor("elementwise_mul", inputs, attrs,
out_shape);
}
framework::Tensor Sub(const framework::Tensor& x,
const framework::Tensor& y) {
InTensors ins({&x, &y});
framework::AttributeMap attrs;
attrs["axis"] = -1;
std::vector<int> out_shape = GetBroadcastShape({&x, &y});
NameInTensorMap inputs({{"X", {&x}}, {"Y", {&y}}});
return CreateOpRunAndReturnTensor("elementwise_sub", inputs, attrs,
out_shape);
}
const framework::Tensor Unsqueeze(const framework::Tensor& x, int axis = 0) {
// don't copy data, only change the dims
framework::Tensor out;
out.ShareDataWith(x);
std::vector<int> out_shape = framework::vectorize<int>(x.dims());
if (axis >= 0) {
auto index = (out_shape.begin() + axis);
out_shape.insert(index, 1);
} else if (axis < 0) {
auto index = (out_shape.end() + axis + 1);
out_shape.insert(index, 1);
}
out.Resize(framework::make_ddim(out_shape));
return out;
}
framework::Tensor Zeros(std::vector<int> shape,
framework::proto::VarType::Type dtype,
float fill_value) {
framework::AttributeMap attrs;
attrs["dtype"] = dtype;
attrs["shape"] = shape;
attrs["value"] = fill_value;
NameInTensorMap inputs({});
return CreateOpRunAndReturnTensor("fill_constant", inputs, attrs, shape);
}
framework::Tensor Infinits(std::vector<int> shape,
framework::proto::VarType::Type dtype) {
framework::AttributeMap attrs;
attrs["dtype"] = dtype;
attrs["shape"] = shape;
attrs["str_value"] = std::string("inf");
NameInTensorMap inputs({});
return CreateOpRunAndReturnTensor("fill_constant", inputs, attrs, shape);
}
framework::Tensor Eye(int n, framework::proto::VarType::Type dtype) {
auto output = Zeros({n}, dtype, 1);
auto ret = Diag(output);
return ret;
}
framework::Tensor Slice(const framework::Tensor& x, std::vector<int> axes,
std::vector<int> starts, std::vector<int> ends) {
std::vector<int> new_axes = axes;
NameInTensorMap inputs({{"Input", {&x}}});
std::vector<int> out_shape = framework::vectorize<int>(x.dims());
int rank = out_shape.size();
PADDLE_ENFORCE_EQ(
axes.size(), starts.size(),
platform::errors::InvalidArgument("Slice Operator Argument Invalided"));
PADDLE_ENFORCE_EQ(
ends.size(), starts.size(),
platform::errors::InvalidArgument("Slice Operator Argument Invalided"));
for (unsigned int i = 0; i < axes.size(); ++i) {
int axis = axes[i];
if (axis < 0) axis = rank + axis;
new_axes[i] = axis; // change negative to positive
int st = starts[i];
int ed = ends[i];
PADDLE_ENFORCE_GT(ed, st,
platform::errors::InvalidArgument(
"C++ Slice Operation Not Support End < Start"));
out_shape[axis] = ed - st;
}
framework::AttributeMap attrs;
attrs["axes"] = new_axes;
attrs["starts"] = starts;
attrs["ends"] = ends;
return CreateOpRunAndReturnTensor("slice", inputs, attrs, out_shape);
}
private:
const framework::ExecutionContext& context;
BlasT<DeviceContext, T> GetBlas() {
return math::GetBlas<DeviceContext, T>(context);
}
platform::ForRange<DeviceContext> GetForRange(int numel) {
auto& dev_ctx = context.template device_context<DeviceContext>();
return platform::ForRange<DeviceContext>(dev_ctx, numel);
}
framework::Tensor CreateOpRunAndReturnTensor(
const std::string& type, const NameInTensorMap& inputs,
const framework::AttributeMap& attrs, std::vector<int> out_shape,
NameOutTensor out_str = {"Out"}) {
// varialble set dims must be LoDTensor / SelectedRowTensor
framework::Scope& local_scope = context.scope().NewScope();
framework::VariableNameMap op_outputs;
for (auto out_name : out_str) {
local_scope.Var("tmp_" + out_name)->GetMutable<framework::LoDTensor>();
op_outputs[out_name].emplace_back("tmp_" + out_name);
}
auto out_var = local_scope.Var("tmp_Out"); // return the Out
// create Out Tensor and allocat memory
out_var->GetMutable<framework::LoDTensor>()->mutable_data<T>(
framework::make_ddim(out_shape), context.GetPlace());
// framework::make_ddim(out_shape)
framework::VariableNameMap op_inputs;
int counter = 0;
for (auto item : inputs) {
auto& tensors = item.second;
std::vector<std::string> name_vector;
for (auto each_tensor : tensors) {
// create score variable and reset the tensor.
std::string _name = "tmp" + std::to_string(counter++);
auto in_var = local_scope.Var(_name); // create
framework::LoDTensor tmp_tns;
tmp_tns.ShareDataWith(*each_tensor); // tensor -> lodtensor
(*in_var->GetMutable<framework::LoDTensor>()) =
tmp_tns; // initialize and set value
name_vector.emplace_back(_name);
}
op_inputs[item.first] = name_vector;
}
auto op =
framework::OpRegistry::CreateOp(type, op_inputs, op_outputs, attrs);
op->Run(local_scope, context.GetPlace());
framework::Tensor out;
out.ShareDataWith(*(out_var->GetMutable<framework::LoDTensor>()));
out.Resize(framework::make_ddim(out_shape));
context.scope().DeleteScope(&local_scope);
return out;
}
};
} // namespace math
} // namespace operators
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/svd_op.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/ddim.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
using DDim = framework::DDim;
static DDim UDDim(const DDim& x_dim, int k) {
// get x_dim and return the ddim of U
auto x_vec = vectorize(x_dim);
x_vec[x_vec.size() - 1] = k;
return framework::make_ddim(x_vec);
}
static DDim VHDDim(const DDim& x_dim, int k) {
// get x_dim and return the ddim of U
auto x_vec = vectorize(x_dim);
x_vec[x_vec.size() - 2] = k;
return framework::make_ddim(x_vec);
}
static DDim SDDim(const DDim& x_dim, int k) {
// get x_dim and return the ddim of U
auto x_vec = vectorize(x_dim);
x_vec[x_vec.size() - 2] = k;
x_vec.erase(x_vec.end() - 1); // rank - 1
return framework::make_ddim(x_vec);
}
class SvdOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "svd");
OP_INOUT_CHECK(ctx->HasOutput("U"), "Output", "U", "svd");
OP_INOUT_CHECK(ctx->HasOutput("VH"), "Output", "VH", "svd");
OP_INOUT_CHECK(ctx->HasOutput("S"), "Output", "S", "svd");
auto in_dims = ctx->GetInputDim("X");
int x_rank = in_dims.size();
PADDLE_ENFORCE_GE(in_dims.size(), 2,
platform::errors::InvalidArgument(
"the rank of input must greater than 2"));
int m = in_dims[x_rank - 2];
int n = in_dims[x_rank - 1];
int k = std::min(m, n);
const bool full_uv = ctx->Attrs().Get<bool>("full_matrices");
ctx->SetOutputDim("U", !full_uv ? UDDim(in_dims, k) : UDDim(in_dims, m));
ctx->SetOutputDim("VH", !full_uv ? VHDDim(in_dims, k) : VHDDim(in_dims, n));
ctx->SetOutputDim("S", SDDim(in_dims, k));
ctx->ShareLoD("X", /*->*/ "U");
ctx->ShareLoD("X", /*->*/ "VH");
ctx->ShareLoD("X", /*->*/ "S");
}
};
class SvdOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor), The input tensor of svd op.");
AddOutput("U", "(Tensor), The output U tensor of svd op.");
AddOutput("S", "(Tensor), The output S tensor of svd op.");
AddOutput("VH", "(Tensor), The output VH tensor of svd op.");
AddAttr<bool>("full_matrices",
"(bool, default false) Only Compute the thin U and V"
"when set as True, the gradient have some random "
"attribute.")
.SetDefault(false);
AddComment(R"DOC(
Svd Operator.
This operator is used to perform SVD operation for batched matrics $X$.
$$U, S, VH = svd(X)$$
)DOC");
}
};
class SvdGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("U")), "Input",
"U@Grad", "SvdGrad");
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("VH")), "Input",
"VH@Grad", "SvdGrad");
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("S")), "Input",
"S@Grad", "SvdGrad");
OP_INOUT_CHECK(ctx->HasInput("U"), "Input", "U", "SvdGrad");
OP_INOUT_CHECK(ctx->HasInput("S"), "Input", "S", "SvdGrad");
OP_INOUT_CHECK(ctx->HasInput("VH"), "Input", "VH", "SvdGrad");
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output",
"X@Grad", "SvdGrad");
auto d_x = ctx->GetInputDim(("X"));
ctx->SetOutputDim(framework::GradVarName("X"), d_x);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(dtype, ctx.GetPlace());
}
};
template <typename T>
class SvdGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> retv) const override {
retv->SetType("svd_grad");
retv->SetInput(framework::GradVarName("U"), this->OutputGrad("U"));
retv->SetInput(framework::GradVarName("VH"), this->OutputGrad("VH"));
retv->SetInput(framework::GradVarName("S"), this->OutputGrad("S"));
retv->SetInput("U", this->Output("U"));
retv->SetInput("VH", this->Output("VH"));
retv->SetInput("S", this->Output("S"));
retv->SetInput("X", this->Input("X"));
retv->SetAttrMap(this->Attrs());
retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(svd, ops::SvdOp, ops::SvdOpMaker,
ops::SvdGradMaker<paddle::framework::OpDesc>,
ops::SvdGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(svd_grad, ops::SvdGradOp);
REGISTER_OP_CPU_KERNEL(svd, ops::SvdCPUKernel<float>,
ops::SvdCPUKernel<double>);
REGISTER_OP_CPU_KERNEL(
svd_grad, ops::SvdGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SvdGradKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_WITH_HIP
// HIP not support cusolver
#include <thrust/device_vector.h>
#include <algorithm>
#include <vector>
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/svd_op.h"
#include "paddle/fluid/platform/dynload/cusolver.h"
namespace paddle {
namespace operators {
template <typename T>
class SvdGPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto& dev_ctx =
context.template device_context<platform::CUDADeviceContext>();
const Tensor* x = context.Input<Tensor>("X");
Tensor* U = context.Output<Tensor>("U");
Tensor* VH = context.Output<Tensor>("VH");
Tensor* S = context.Output<Tensor>("S");
const bool full_matrices = context.Attr<bool>("full_matrices");
auto& dims = x->dims();
int batch_count = 1;
for (int i = 0; i < dims.size() - 2; i++) {
batch_count *= dims[i];
}
int rank = dims.size();
int m = dims[rank - 2];
int n = dims[rank - 1];
auto* vh_data = VH->mutable_data<T>(context.GetPlace());
auto* s_data = S->mutable_data<T>(context.GetPlace());
auto* u_data = U->mutable_data<T>(context.GetPlace());
// NOTE:(@xiongkun03)
// matrices are assumed to be stored in column-major order in cusolver
// then view A as n x m and do A^T SVD, we can avoid transpose
// Must Copy X once, because the gesvdj will change the origin input matrix
Tensor x_tmp;
TensorCopy(*x, context.GetPlace(), &x_tmp);
auto info = memory::Alloc(dev_ctx, sizeof(int) * batch_count);
int* info_ptr = reinterpret_cast<int*>(info->ptr());
GesvdjBatched(dev_ctx, batch_count, n, m, std::min(m, n),
x_tmp.mutable_data<T>(context.GetPlace()), vh_data, u_data,
s_data, info_ptr, !full_matrices);
framework::DDim UT_dim = U->dims();
std::swap(UT_dim[rank - 1], UT_dim[rank - 2]); // Get the dim of UT_dim
U->Resize(UT_dim); // U is entirely UT
auto dito =
math::DeviceIndependenceTensorOperations<platform::CUDADeviceContext,
T>(context);
auto tmp_U = dito.Transpose(*U);
U->ShareDataWith(tmp_U); // U becomse UT, aka VT
}
void GesvdjBatched(const platform::CUDADeviceContext& dev_ctx, int batchSize,
int m, int n, int k, T* A, T* U, T* V, T* S, int* info,
int thin_UV = 1) const;
};
template <>
void SvdGPUKernel<float>::GesvdjBatched(
const platform::CUDADeviceContext& dev_ctx, int batchSize, int m, int n,
int k, float* A, float* U, float* V, float* S, int* info,
int thin_UV) const {
/* compute singular vectors */
const cusolverEigMode_t jobz =
CUSOLVER_EIG_MODE_VECTOR; /* compute singular vectors */
gesvdjInfo_t gesvdj_params = NULL;
int lda = m;
int ldu = m;
int ldt = n;
int lwork = 0;
auto handle = dev_ctx.cusolver_dn_handle();
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cusolverDnCreateGesvdjInfo(&gesvdj_params));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cusolverDnSgesvdj_bufferSize(
handle, jobz, thin_UV, m, n, A, lda, S, U, ldu, V, ldt, &lwork,
gesvdj_params));
auto workspace = memory::Alloc(dev_ctx, lwork * sizeof(float));
float* workspace_ptr = reinterpret_cast<float*>(workspace->ptr());
int stride_A = lda * n;
int stride_U = ldu * (thin_UV ? k : m);
int stride_V = ldt * (thin_UV ? k : n);
for (int i = 0; i < batchSize; ++i) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cusolverDnSgesvdj(
handle, jobz, thin_UV, m, n, A + stride_A * i, lda, S + k * i,
U + stride_U * i, ldu, V + stride_V * i, ldt, workspace_ptr, lwork,
info, gesvdj_params));
// check the error info
int error_info;
memory::Copy(platform::CPUPlace(), &error_info,
BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), info,
sizeof(int), dev_ctx.stream());
PADDLE_ENFORCE_EQ(
error_info, 0,
platform::errors::PreconditionNotMet(
"For batch [%d]: CUSolver SVD is not zero. [%d]", i, error_info));
}
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cusolverDnDestroyGesvdjInfo(gesvdj_params));
}
template <>
void SvdGPUKernel<double>::GesvdjBatched(
const platform::CUDADeviceContext& dev_ctx, int batchSize, int m, int n,
int k, double* A, double* U, double* V, double* S, int* info,
int thin_UV) const {
/* compute singular vectors */
const cusolverEigMode_t jobz =
CUSOLVER_EIG_MODE_VECTOR; /* compute singular vectors */
gesvdjInfo_t gesvdj_params = NULL;
int lda = m;
int ldu = m;
int ldt = n;
int lwork = 0;
auto handle = dev_ctx.cusolver_dn_handle();
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cusolverDnCreateGesvdjInfo(&gesvdj_params));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cusolverDnDgesvdj_bufferSize(
handle, jobz, thin_UV, m, n, A, lda, S, U, ldu, V, ldt, &lwork,
gesvdj_params));
auto workspace = memory::Alloc(dev_ctx, lwork * sizeof(double));
double* workspace_ptr = reinterpret_cast<double*>(workspace->ptr());
int stride_A = lda * n;
int stride_U = ldu * (thin_UV ? k : m);
int stride_V = ldt * (thin_UV ? k : n);
for (int i = 0; i < batchSize; ++i) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cusolverDnDgesvdj(
handle, jobz, thin_UV, m, n, A + stride_A * i, lda, S + k * i,
U + stride_U * i, ldu, V + stride_V * i, ldt, workspace_ptr, lwork,
info, gesvdj_params));
// check the error info
int error_info;
memory::Copy(platform::CPUPlace(), &error_info,
BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), info,
sizeof(int), dev_ctx.stream());
PADDLE_ENFORCE_EQ(
error_info, 0,
platform::errors::PreconditionNotMet(
"For batch [%d]: CUSolver SVD is not zero. [%d]", i, error_info));
}
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cusolverDnDestroyGesvdjInfo(gesvdj_params));
}
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(svd, ops::SvdGPUKernel<float>,
ops::SvdGPUKernel<double>);
REGISTER_OP_CUDA_KERNEL(
svd_grad, ops::SvdGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SvdGradKernel<paddle::platform::CUDADeviceContext, double>);
#endif // not PADDLE_WITH_HIP
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdarg>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/operators/svd_helper.h"
#include "paddle/fluid/platform/for_range.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
template <typename T>
class SvdCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* x = context.Input<Tensor>("X");
Tensor* U = context.Output<Tensor>("U");
Tensor* VH = context.Output<Tensor>("VH");
Tensor* S = context.Output<Tensor>("S");
int full = context.Attr<bool>("full_matrices");
/*Create Tensors and output, set the dim ...*/
auto numel = x->numel();
auto* x_data = x->data<T>();
auto x_dims = x->dims();
int rows = x_dims[x_dims.size() - 2];
int cols = x_dims[x_dims.size() - 1];
int k = std::min(rows, cols);
int col_u = full ? rows : k;
int col_v = full ? cols : k;
int batches = numel / (rows * cols);
auto* U_out = U->mutable_data<math::Real<T>>(
context.GetPlace(),
size_t(batches * rows * col_u * sizeof(math::Real<T>)));
auto* VH_out = VH->mutable_data<math::Real<T>>(
context.GetPlace(),
size_t(batches * col_v * cols * sizeof(math::Real<T>)));
auto* S_out = S->mutable_data<math::Real<T>>(
context.GetPlace(), size_t(batches * k * sizeof(math::Real<T>)));
/*SVD Use the Eigen Library*/
math::BatchSvd<T>(x_data, U_out, VH_out, S_out, rows, cols, batches, full);
}
};
template <typename DeviceContext, typename T>
class SvdGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
const framework::Tensor& U_const = *ctx.Input<framework::Tensor>("U");
const framework::Tensor& VH_const = *ctx.Input<framework::Tensor>("VH");
const framework::Tensor& S = *ctx.Input<framework::Tensor>("S");
framework::Tensor& dX =
*ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const framework::Tensor& dU_const =
*ctx.Input<framework::Tensor>(framework::GradVarName("U"));
const framework::Tensor& dVH_const =
*ctx.Input<framework::Tensor>(framework::GradVarName("VH"));
const bool full = ctx.Attr<bool>("full_matrices");
int m = dX.dims()[dX.dims().size() - 2];
int n = dX.dims()[dX.dims().size() - 1];
int k = S.dims()[S.dims().size() - 1];
auto dito = math::DeviceIndependenceTensorOperations<DeviceContext, T>(ctx);
framework::Tensor U, VH, dU, dV, dVH;
if (full) {
// if full_matrices is set, slice the U and VT to k columns
U = dito.Slice(U_const, {-1}, {0}, {k});
VH = dito.Slice(VH_const, {-2}, {0}, {k});
dU = dito.Slice(dU_const, {-1}, {0}, {k});
dVH = dito.Slice(dVH_const, {-2}, {0}, {k});
} else {
U = U_const;
VH = VH_const;
dU = dU_const;
dVH = dVH_const;
}
auto s_inverse = dito.Pow(S, -1);
auto s_square = dito.Pow(S, 2);
auto F =
dito.Sub(dito.Unsqueeze(s_square, -2), dito.Unsqueeze(s_square, -1));
F = dito.Add(F, dito.Diag(dito.Infinits({k}, U.type())));
F = dito.Pow(F, -1);
Tensor sigma_term;
Tensor u_term;
Tensor v_term;
if (ctx.HasInput(framework::GradVarName("S"))) {
const framework::Tensor& gS =
*ctx.Input<framework::Tensor>(framework::GradVarName("S"));
sigma_term = dito.Mul(dito.Unsqueeze(gS, -2), U);
sigma_term = dito.Matmul(sigma_term, VH);
}
if (ctx.HasInput(framework::GradVarName("U"))) {
auto UTG = dito.Matmul(U, dU, true, false);
auto GTU = dito.Matmul(dU, U, true, false);
u_term = dito.Mul(dito.Mul(dito.Sub(UTG, GTU), F), dito.Unsqueeze(S, -2));
u_term = dito.Matmul(U, u_term);
if (m > k) {
auto project =
dito.Sub(dito.Eye(m, U.type()), dito.Matmul(U, U, false, true));
u_term = dito.Add(u_term, dito.Mul(dito.Matmul(project, dU),
dito.Unsqueeze(s_inverse, -2)));
}
u_term = dito.Matmul(u_term, VH);
}
if (ctx.HasInput(framework::GradVarName("VH"))) {
auto UTG = dito.Matmul(VH, dVH, false, true);
auto GTU = dito.Matmul(dVH, VH, false, true);
v_term = dito.Mul(dito.Matmul(dito.Mul(dito.Sub(UTG, GTU), F), VH),
dito.Unsqueeze(S, -1));
if (n > k) {
auto project =
dito.Sub(dito.Eye(n, U.type()), dito.Matmul(VH, VH, true, false));
v_term = dito.Add(v_term, dito.Mul(dito.Matmul(dVH, project),
dito.Unsqueeze(s_inverse, -1)));
}
v_term = dito.Matmul(U, v_term);
}
dX.ShareDataWith(dito.Add(dito.Add(u_term, sigma_term), v_term));
}
};
} // namespace operators
} // namespace paddle
...@@ -55,7 +55,13 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP); ...@@ -55,7 +55,13 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
#if CUDA_VERSION >= 9020 #if CUDA_VERSION >= 9020
#define CUSOLVER_ROUTINE_EACH_R1(__macro) \ #define CUSOLVER_ROUTINE_EACH_R1(__macro) \
__macro(cusolverDnSpotrfBatched); \ __macro(cusolverDnSpotrfBatched); \
__macro(cusolverDnDpotrfBatched); __macro(cusolverDnDpotrfBatched); \
__macro(cusolverDnSgesvdj_bufferSize); \
__macro(cusolverDnDestroyGesvdjInfo); \
__macro(cusolverDnCreateGesvdjInfo); \
__macro(cusolverDnDgesvdj_bufferSize); \
__macro(cusolverDnSgesvdj); \
__macro(cusolverDnDgesvdj);
CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP) CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
#endif #endif
......
...@@ -100,6 +100,7 @@ from .tensor.linalg import bmm # noqa: F401 ...@@ -100,6 +100,7 @@ from .tensor.linalg import bmm # noqa: F401
from .tensor.linalg import histogram # noqa: F401 from .tensor.linalg import histogram # noqa: F401
from .tensor.linalg import mv # noqa: F401 from .tensor.linalg import mv # noqa: F401
from .tensor.linalg import matrix_power # noqa: F401 from .tensor.linalg import matrix_power # noqa: F401
from .tensor.linalg import svd # noqa: F401
from .tensor.logic import equal # noqa: F401 from .tensor.logic import equal # noqa: F401
from .tensor.logic import greater_equal # noqa: F401 from .tensor.logic import greater_equal # noqa: F401
from .tensor.logic import greater_than # noqa: F401 from .tensor.logic import greater_than # noqa: F401
...@@ -493,6 +494,7 @@ __all__ = [ # noqa ...@@ -493,6 +494,7 @@ __all__ = [ # noqa
'sqrt', 'sqrt',
'cholesky', 'cholesky',
'matrix_power', 'matrix_power',
'svd',
'randperm', 'randperm',
'linspace', 'linspace',
'reshape', 'reshape',
......
...@@ -858,6 +858,7 @@ set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPER ...@@ -858,6 +858,7 @@ set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPER
set_tests_properties(test_lstm_cudnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_lstm_cudnn_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_stack_op PROPERTIES TIMEOUT 120) set_tests_properties(test_stack_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_bilinear_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_bilinear_interp_v2_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_svd_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_deformable_psroi_pooling PROPERTIES TIMEOUT 120) set_tests_properties(test_deformable_psroi_pooling PROPERTIES TIMEOUT 120)
set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 120)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
from gradient_checker import grad_check
from decorator_helper import prog_scope
class TestSvdOp(OpTest):
def setUp(self):
paddle.enable_static()
self.generate_input()
self.generate_output()
self.op_type = "svd"
assert (hasattr(self, "_output_data"))
self.inputs = {"X": self._input_data}
self.attrs = {'full_matrices': self.get_full_matrices_option()}
self.outputs = {
"U": self._output_data[0],
"S": self._output_data[1],
"VH": self._output_data[2]
}
def generate_input(self):
""" return a input_data and input_shape
"""
self._input_shape = (100, 1)
self._input_data = np.random.random(self._input_shape).astype("float64")
def get_full_matrices_option(self):
return False
def generate_output(self):
assert (hasattr(self, "_input_data"))
self._output_data = np.linalg.svd(self._input_data)
def test_check_output(self):
self.check_output(no_check_set=['U', 'VH'])
def test_svd_forward(self):
""" u matmul diag(s) matmul vt must become X
"""
single_input = self._input_data.reshape(
[-1, self._input_shape[-2], self._input_shape[-1]])[0]
paddle.disable_static()
dy_x = paddle.to_tensor(single_input)
dy_u, dy_s, dy_vt = paddle.linalg.svd(dy_x)
dy_out_x = dy_u.matmul(paddle.diag(dy_s)).matmul(dy_vt)
if (paddle.abs(dy_out_x - dy_x) < 1e-7).all():
...
else:
print("EXPECTED:\n", dy_x)
print("GOT :\n", dy_out_x)
raise RuntimeError("Check SVD Failed")
paddle.enable_static()
def check_S_grad(self):
self.check_grad(['X'], ['S'], numeric_grad_delta=0.001)
def check_U_grad(self):
self.check_grad(['X'], ['U'], numeric_grad_delta=0.001)
def check_V_grad(self):
self.check_grad(['X'], ['VH'], numeric_grad_delta=0.001)
def test_check_grad(self):
"""
remember the input matrix must be the full rank matrix, otherwise the gradient will stochatic because the u / v 's (n-k) freedom vectors
"""
self.check_S_grad()
self.check_U_grad()
self.check_V_grad()
class TestSvdCheckGrad2(TestSvdOp):
# NOTE(xiongkun03): because we want to construct some full rank matrics,
# so we can't specifize matrices which numel() > 100
no_need_check_grad = True
def generate_input(self):
""" return a deterministic matrix, the range matrix;
vander matrix must be a full rank matrix.
"""
self._input_shape = (5, 5)
self._input_data = np.vander(
[2, 3, 4, 5, 6]).astype("float64").reshape(self._input_shape)
class TestSvdNormalMatrixSmall(TestSvdCheckGrad2):
def generate_input(self):
""" small matrix SVD.
"""
self._input_shape = (1, 1)
self._input_data = np.random.random(self._input_shape).astype("float64")
class TestSvdNormalMatrix6x3(TestSvdCheckGrad2):
def generate_input(self):
""" return a deterministic matrix, the range matrix;
vander matrix must be a full rank matrix.
"""
self._input_shape = (6, 3)
self._input_data = np.array(
[[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0],
[2.0, 4.0, 9.0], [3.0, 6.0, 8.0],
[3.0, 1.0, 0.0]]).astype("float64")
class TestSvdNormalMatrix3x6(TestSvdCheckGrad2):
def generate_input(self):
""" return a deterministic matrix, the range matrix;
vander matrix must be a full rank matrix.
"""
self._input_shape = (3, 6)
self._input_data = np.array(
[[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0],
[2.0, 4.0, 9.0], [3.0, 6.0, 8.0],
[3.0, 1.0, 0.0]]).astype("float64")
self._input_data = self._input_data.transpose((-1, -2))
class TestSvdNormalMatrix6x3Batched(TestSvdOp):
def generate_input(self):
self._input_shape = (10, 6, 3)
self._input_data = np.array(
[[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0],
[2.0, 4.0, 9.0], [3.0, 6.0, 8.0],
[3.0, 1.0, 0.0]]).astype("float64")
self._input_data = np.stack([self._input_data] * 10, axis=0)
def test_svd_forward(self):
""" test_svd_forward not support batched input, so disable this test.
"""
pass
class TestSvdNormalMatrix3x6Batched(TestSvdOp):
def generate_input(self):
""" return a deterministic matrix, the range matrix;
vander matrix must be a full rank matrix.
"""
self._input_shape = (10, 3, 6)
self._input_data = np.array(
[[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0],
[2.0, 4.0, 9.0], [3.0, 6.0, 8.0],
[3.0, 1.0, 0.0]]).astype("float64")
self._input_data = self._input_data.transpose((-1, -2))
self._input_data = np.stack([self._input_data] * 10, axis=0)
def test_svd_forward(self):
""" test_svd_forward not support batched input, so disable this test.
"""
pass
class TestSvdNormalMatrix3x3x3x6Batched(TestSvdOp):
def generate_input(self):
""" return a deterministic matrix, the range matrix;
vander matrix must be a full rank matrix.
"""
self._input_shape = (3, 3, 3, 6)
self._input_data = np.array(
[[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0],
[2.0, 4.0, 9.0], [3.0, 6.0, 8.0],
[3.0, 1.0, 0.0]]).astype("float64")
self._input_data = self._input_data.transpose((-1, -2))
self._input_data = np.stack(
[self._input_data, self._input_data, self._input_data], axis=0)
self._input_data = np.stack(
[self._input_data, self._input_data, self._input_data], axis=0)
def test_svd_forward(self):
""" test_svd_forward not support batched input, so disable this test.
"""
pass
@skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " +
"however it is desirable to cover the forward pass")
class TestSvdNormalMatrixBig(TestSvdOp):
def generate_input(self):
""" big matrix SVD.
"""
self._input_shape = (2, 200, 300)
self._input_data = np.random.random(self._input_shape).astype("float64")
def test_svd_forward(self):
""" test_svd_forward not support batched input, so disable this test.
"""
pass
def test_check_grad(self):
pass
class TestSvdNormalMatrixBig2(TestSvdOp):
def generate_input(self):
""" big matrix SVD.
"""
self._input_shape = (1, 100)
self._input_data = np.random.random(self._input_shape).astype("float64")
class TestSvdNormalMatrixFullMatrices(unittest.TestCase):
def setUp(self):
paddle.disable_static()
def tearDown(self):
paddle.enable_static()
def test_full_matrices(self):
mat_shape = (2, 3)
mat = np.random.random(mat_shape).astype("float64")
x = paddle.to_tensor(mat)
u, s, vh = paddle.linalg.svd(x, full_matrices=True)
assert (u.shape == [2, 2])
assert (vh.shape == [3, 3])
x_recover = u.matmul(paddle.diag(s)).matmul(vh[0:2])
if ((paddle.abs(x_recover - x) > 1e-4).any()):
raise RuntimeError("mat can't be recovered\n")
class TestSvdFullMatriceGrad(TestSvdNormalMatrix6x3):
def get_full_matrices_option(self):
return True
def test_svd_forward(self):
""" test_svd_forward not support full matrices, so disable this test.
"""
pass
def test_check_grad(self):
"""
remember the input matrix must be the full rank matrix, otherwise the gradient will stochatic because the u / v 's (n-k) freedom vectors
"""
self.check_S_grad()
#self.check_U_grad() // don't check U grad, because U have freedom vector
self.check_V_grad()
class TestSvdAPI(unittest.TestCase):
def test_dygraph(self):
paddle.disable_static()
a = np.random.rand(5, 5)
x = paddle.to_tensor(a)
u, s, vh = paddle.linalg.svd(x)
gt_u, gt_s, gt_vh = np.linalg.svd(a, full_matrices=False)
self.assertTrue(np.allclose(s, gt_s))
def test_static(self):
paddle.enable_static()
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for place in places:
with fluid.program_guard(fluid.Program(), fluid.Program()):
a = np.random.rand(5, 5)
x = paddle.fluid.data(
name="input", shape=[5, 5], dtype='float64')
u, s, vh = paddle.linalg.svd(x)
exe = fluid.Executor(place)
gt_u, gt_s, gt_vh = np.linalg.svd(a, full_matrices=False)
fetches = exe.run(fluid.default_main_program(),
feed={"input": a},
fetch_list=[s])
self.assertTrue(np.allclose(fetches[0], gt_s))
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
...@@ -31,5 +31,6 @@ no_check_set_white_list = [ ...@@ -31,5 +31,6 @@ no_check_set_white_list = [
'rnn', 'rnn',
'fusion_lstm', 'fusion_lstm',
'softmax_with_cross_entropy', 'softmax_with_cross_entropy',
'svd',
'class_center_sample', 'class_center_sample',
] ]
...@@ -46,6 +46,7 @@ NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST = [ ...@@ -46,6 +46,7 @@ NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST = [
'cudnn_lstm', \ 'cudnn_lstm', \
'rnn', \ 'rnn', \
'lgamma', \ 'lgamma', \
'svd', \
'matrix_power', \ 'matrix_power', \
] ]
......
...@@ -16,10 +16,12 @@ from .tensor.linalg import cholesky # noqa: F401 ...@@ -16,10 +16,12 @@ from .tensor.linalg import cholesky # noqa: F401
from .tensor.linalg import norm # noqa: F401 from .tensor.linalg import norm # noqa: F401
from .tensor.linalg import matrix_power # noqa: F401 from .tensor.linalg import matrix_power # noqa: F401
from .tensor import inverse as inv # noqa: F401 from .tensor import inverse as inv # noqa: F401
from .tensor.linalg import svd
__all__ = [ __all__ = [
'cholesky', #noqa 'cholesky', #noqa
'norm', 'norm',
'inv', 'inv',
'svd',
'matrix_power' 'matrix_power'
] ]
...@@ -45,6 +45,7 @@ from .linalg import bmm # noqa: F401 ...@@ -45,6 +45,7 @@ from .linalg import bmm # noqa: F401
from .linalg import histogram # noqa: F401 from .linalg import histogram # noqa: F401
from .linalg import mv # noqa: F401 from .linalg import mv # noqa: F401
from .linalg import matrix_power # noqa: F401 from .linalg import matrix_power # noqa: F401
from .linalg import svd # noqa: F401
from .logic import equal # noqa: F401 from .logic import equal # noqa: F401
from .logic import greater_equal # noqa: F401 from .logic import greater_equal # noqa: F401
from .logic import greater_than # noqa: F401 from .logic import greater_than # noqa: F401
...@@ -223,6 +224,7 @@ tensor_method_func = [ #noqa ...@@ -223,6 +224,7 @@ tensor_method_func = [ #noqa
'histogram', 'histogram',
'mv', 'mv',
'matrix_power', 'matrix_power',
'svd',
'abs', 'abs',
'acos', 'acos',
'all', 'all',
......
...@@ -943,6 +943,70 @@ def mv(x, vec, name=None): ...@@ -943,6 +943,70 @@ def mv(x, vec, name=None):
return out return out
def svd(x, full_matrices=False, name=None):
r"""
Computes the singular value decomposition of one
matrix or batches of regular matrice.
Args:
x (Tensor): The input tensor. Its shape should be `[..., N, M]`,
where ... is zero or more batch dimensions. N and M can be arbitraty
positive number. Note that if x is sigular matrices, the grad is numerical
instability. The data type of x should be float32 or float64.
full_matrices(bool): A flag to control the behavor of svd.
If full_matrices = True, svd op will compute full U and V matrics,
which means shape of U is `[..., N, N]`, shape of V is `[..., M, M]`.
If full_matrices = False, svd op will use a economic method to store U and V.
which means shape of U is `[..., N, K]`, shape of V is `[..., M, K]`
Returns:
Tensor: Tensor U, the shape of U is controlled by full_matrices flag.
Tensor: Tensor S, the singular value of X. the shape of S is [..., K]
Tensor: Tensor VH, the conjugate transpose of V. the shape of V is controlled by full_matrices flag.
import numpy as np
x = paddle.to_tensor([[1.0, 2.0], [1.0, 3.0], [4.0, 6.0]]).astype('float64')
x = x.reshape([3, 2])
u, s, vt = paddle.linalg.svd(x)
print (u)
print (s)
print (vt)
#U = [[ 0.27364809, -0.21695147 ],
# [ 0.37892198, -0.87112408 ],
# [ 0.8840446 , 0.44053933 ]]
#S = [8.14753743, 0.78589688]
#VT= [[ 0.51411221, 0.85772294],
# [ 0.85772294, -0.51411221]]
# one can verify : U * S * VT = X ;
# U * UH = I ;
# V * VH = I
"""
if in_dygraph_mode():
return _C_ops.svd(x, 'full_matrices', full_matrices)
check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'svd')
check_type(full_matrices, 'full_matrices', bool, 'svd')
helper = LayerHelper('svd', **locals())
u = helper.create_variable_for_type_inference(dtype=x.dtype)
vh = helper.create_variable_for_type_inference(dtype=x.dtype)
s = helper.create_variable_for_type_inference(dtype=x.dtype)
attrs = dict()
attrs['full_matrices'] = full_matrices
helper.append_op(
type='svd',
inputs={'X': [x]},
outputs={'U': u,
'VH': vh,
'S': s},
attr=attrs, )
return u, s, vh
def matrix_power(x, n, name=None): def matrix_power(x, n, name=None):
r""" r"""
Computes the n-th power of a square matrix or a batch of square matrices. Computes the n-th power of a square matrix or a batch of square matrices.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册