From effb70f46a5a93c086e5838e4f93564237f8fa45 Mon Sep 17 00:00:00 2001 From: crystal <62974595+Zjq9409@users.noreply.github.com> Date: Sun, 26 Sep 2021 18:46:56 +0800 Subject: [PATCH] [cherry-pick]CPU forward calculation replaces Eigen with Lapack (#35916) (#36091) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cherry-pick #35916,CPU前向计算将Eigen替换为Lapack,修改linalg暴露规则 --- paddle/fluid/operators/eigh_op.cc | 17 +- paddle/fluid/operators/eigh_op.cu | 17 +- paddle/fluid/operators/eigh_op.h | 7 +- .../operators/math/eigen_values_vectors.h | 273 ++++++++---------- .../fluid/operators/math/lapack_function.cc | 43 +++ paddle/fluid/operators/math/lapack_function.h | 13 +- paddle/fluid/platform/dynload/lapack.h | 21 ++ python/paddle/__init__.py | 1 - python/paddle/tensor/linalg.py | 4 +- 9 files changed, 217 insertions(+), 179 deletions(-) diff --git a/paddle/fluid/operators/eigh_op.cc b/paddle/fluid/operators/eigh_op.cc index 5577dfb8f8..6835951a23 100644 --- a/paddle/fluid/operators/eigh_op.cc +++ b/paddle/fluid/operators/eigh_op.cc @@ -147,18 +147,17 @@ REGISTER_OPERATOR(eigh, ops::EighOp, ops::EignOpMaker, REGISTER_OPERATOR(eigh_grad, ops::EighGradOp); REGISTER_OP_CPU_KERNEL( - eigh, ops::EighKernel, - ops::EighKernel, - ops::EighKernel, + ops::EighKernel, + ops::EighKernel>, - ops::EighKernel>); REGISTER_OP_CPU_KERNEL( - eigh_grad, - ops::EighGradKernel, - ops::EighGradKernel, - ops::EighGradKernel, + ops::EighGradKernel, + ops::EighGradKernel>, - ops::EighGradKernel>); diff --git a/paddle/fluid/operators/eigh_op.cu b/paddle/fluid/operators/eigh_op.cu index 61d2b66ea5..827c551637 100644 --- a/paddle/fluid/operators/eigh_op.cu +++ b/paddle/fluid/operators/eigh_op.cu @@ -16,18 +16,17 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - eigh, ops::EighKernel, - ops::EighKernel, - ops::EighKernel, + ops::EighKernel, + ops::EighKernel>, - ops::EighKernel>); REGISTER_OP_CUDA_KERNEL( - eigh_grad, - ops::EighGradKernel, - ops::EighGradKernel, - ops::EighGradKernel, + ops::EighGradKernel, + ops::EighGradKernel>, - ops::EighGradKernel>); diff --git a/paddle/fluid/operators/eigh_op.h b/paddle/fluid/operators/eigh_op.h index 085e7531dd..ad9b0f5983 100644 --- a/paddle/fluid/operators/eigh_op.h +++ b/paddle/fluid/operators/eigh_op.h @@ -22,7 +22,7 @@ namespace operators { using Tensor = framework::Tensor; -template +template class EighKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -31,15 +31,16 @@ class EighKernel : public framework::OpKernel { auto output_v = ctx.Output("Eigenvectors"); std::string lower = ctx.Attr("UPLO"); bool is_lower = (lower == "L"); - math::MatrixEighFunctor functor; + math::MatrixEighFunctor functor; functor(ctx, *input, output_w, output_v, is_lower, true); } }; -template +template class EighGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { + using ValueType = math::Real; auto& x_grad = *ctx.Output(framework::GradVarName("X")); x_grad.mutable_data(ctx.GetPlace()); auto& output_w = *ctx.Input("Eigenvalues"); diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 3c793c8906..01f05530e3 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -14,8 +14,8 @@ #pragma once -#include "Eigen/Core" #include "paddle/fluid/memory/memory.h" +#include "paddle/fluid/operators/math/lapack_function.h" #include "paddle/fluid/operators/svd_helper.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/dynload/cusolver.h" @@ -25,84 +25,6 @@ namespace paddle { namespace operators { namespace math { -template -using InputMatrixMap = Eigen::Map< - const Eigen::Matrix>; - -template -using OutputMatrixMap = Eigen::Map< - Eigen::Matrix>; - -template -inline void ComputeFloatEigenvaluesAndVectors(ValueType *x_data, - ValueType *eigenvalues_data, - ValueType *eigenvectors_data, - int batches, int rows, int cols, - bool has_vectors) { - int stride = rows * cols; - for (int i = 0; i < batches; i++) { - auto m = InputMatrixMap(x_data + i * stride, rows, cols); - auto eigenvalues = - OutputMatrixMap(eigenvalues_data + i * rows, 1, rows); - auto eigenvectors = - OutputMatrixMap(eigenvectors_data + i * stride, rows, cols); - - Eigen::SelfAdjointEigenSolver> - eigen_solver(m, has_vectors ? Eigen::ComputeEigenvectors - : Eigen::EigenvaluesOnly); - PADDLE_ENFORCE_EQ( - eigen_solver.info(), Eigen::Success, - platform::errors::InvalidArgument( - "Self Adjoint Eigen decomposition is not successful. " - "The %d-th input matrice might not be not be positive definite.", - i)); - - eigenvalues = eigen_solver.eigenvalues().transpose(); - if (has_vectors) { - eigenvectors = eigen_solver.eigenvectors(); - } - } -} - -template -inline void ComputeComplexEigenvaluesAndVectors(T *x_data, - ValueType *eigenvalues_data, - T *eigenvectors_data, - int batches, int rows, int cols, - bool has_vectors) { - using Complex = std::complex; - Complex *input = reinterpret_cast(x_data); - Complex *eigenvectors_data_ = reinterpret_cast(eigenvectors_data); - - int stride = rows * cols; - for (int i = 0; i < batches; i++) { - auto m = InputMatrixMap(input + i * stride, rows, cols); - auto eigenvalues = - OutputMatrixMap(eigenvalues_data + i * rows, 1, rows); - auto eigenvectors = - OutputMatrixMap(eigenvectors_data_ + i * stride, rows, cols); - - Eigen::SelfAdjointEigenSolver< - Eigen::Matrix> - eigen_solver(m, has_vectors ? Eigen::ComputeEigenvectors - : Eigen::EigenvaluesOnly); - PADDLE_ENFORCE_EQ( - eigen_solver.info(), Eigen::Success, - platform::errors::InvalidArgument( - "Self Adjoint Eigen decomposition is not successful. " - "The %d-th input matrice might not be not be positive definite.", - i)); - - eigenvalues = eigen_solver.eigenvalues().transpose(); - if (has_vectors) { - eigenvectors = eigen_solver.eigenvectors(); - } - } -} - inline int64_t GetBatchSize(framework::DDim dims) { int64_t batch_size = 1; auto dim_size = dims.size(); @@ -112,7 +34,20 @@ inline int64_t GetBatchSize(framework::DDim dims) { return batch_size; } -template +static void CheckEighResult(const int batch, const int info) { + PADDLE_ENFORCE_LE( + info, 0, + platform::errors::PreconditionNotMet( + "For batch [%d]: the [%d] off-diagonal elements of an intermediate" + "tridiagonal form did not converge to zero", + batch, info)); + PADDLE_ENFORCE_GE( + info, 0, platform::errors::PreconditionNotMet( + "For batch [%d]: the [%d] argument had an illegal value", + batch, info)); +} + +template struct MatrixEighFunctor { void operator()(const framework::ExecutionContext &ctx, const Tensor &input, Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, @@ -122,43 +57,84 @@ struct MatrixEighFunctor { // Calculates the eigenvalues ​​and eigenvectors of Hermitian or real // symmetric matrices, and uses the variable has_vectors to // control whether to return the eigenvectors. -template -struct MatrixEighFunctor { +template +struct MatrixEighFunctor { public: void operator()(const framework::ExecutionContext &ctx, const Tensor &input, Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, bool has_vectors) { - auto dims = input.dims(); - auto output_value_dim = eigen_values->dims(); + using ValueType = math::Real; + auto *out_value = eigen_values->mutable_data(ctx.GetPlace()); - int64_t batch_size = 1; - int dim_size = dims.size(); - for (int64_t i = 0; i < dim_size - 2; i++) { - batch_size *= dims[i]; - } auto dito = - DeviceIndependenceTensorOperations(ctx); - Tensor input_tensor; - TensorCopy(input, ctx.GetPlace(), &input_tensor); - if (!is_lower) { - input_tensor = dito.Transpose(input); - } - int rows = dims[dims.size() - 2]; + math::DeviceIndependenceTensorOperations( + ctx); - auto *value_data = - eigen_values->mutable_data(output_value_dim, ctx.GetPlace()); + Tensor input_trans; + // lapack is a column-major storge, transpose make the input to + // have a continuous memory layout + input_trans = dito.Transpose(input); + auto *input_vector = input_trans.data(); - if (framework::IsComplexType(input_tensor.type())) { - auto *x_data = input_tensor.data(); - auto *vector_data = eigen_vectors->mutable_data(dims, ctx.GetPlace()); - ComputeComplexEigenvaluesAndVectors( - x_data, value_data, vector_data, batch_size, rows, rows, has_vectors); - } else { - auto *x_data = input_tensor.data(); - auto *vector_data = - eigen_vectors->mutable_data(dims, ctx.GetPlace()); - ComputeFloatEigenvaluesAndVectors( - x_data, value_data, vector_data, batch_size, rows, rows, has_vectors); + auto dims = input.dims(); + int dim_size = dims.size(); + int64_t batch_size = GetBatchSize(dims); + + int vector_stride = dims[dim_size - 1] * dims[dim_size - 2]; + int values_stride = dims[dim_size - 1]; + char uplo = is_lower ? 'L' : 'U'; + char jobz = has_vectors ? 'V' : 'N'; + auto n = dims[dim_size - 1]; + auto lda = std::max(1, n); + // if work = -1, it means that you need to use the lapack function to query + // the optimal value + int lwork = -1; // The length of the array work + int lrwork = -1; // The dimension of the array rwork,rwork is REAL array + int liwork = -1; // The dimension of the array iwork + int iwork_opt = -1; // The optimal length of the array liwork + T lwork_opt = static_cast(-1); // The optimal length of the array work + ValueType rwork_opt = + static_cast(-1); // The optimal length of the array rwork + + int info = 0; + // Call lapackEigh to get the optimal size of work data + math::lapackEigh(jobz, uplo, n, input_vector, lda, out_value, + &lwork_opt, lwork, &rwork_opt, lrwork, + &iwork_opt, liwork, &info); + lwork = std::max(1, static_cast(lwork_opt)); + liwork = std::max(1, iwork_opt); + + Tensor rwork_tensor; + ValueType *rwork_data = nullptr; + + // complex type + if (framework::IsComplexType(input.type())) { + lrwork = std::max(1, static_cast(rwork_opt)); + rwork_data = rwork_tensor.mutable_data( + framework::make_ddim({lrwork}), ctx.GetPlace()); + } + Tensor iwork_tensor, work_tensor; + auto *iwork_data = iwork_tensor.mutable_data( + framework::make_ddim({liwork}), ctx.GetPlace()); + auto *work_data = work_tensor.mutable_data(framework::make_ddim({lwork}), + ctx.GetPlace()); + + for (auto i = 0; i < batch_size; i++) { + auto *value_data = out_value + i * values_stride; + auto *input_data = input_vector + i * vector_stride; + math::lapackEigh>(jobz, uplo, n, input_data, lda, value_data, + work_data, lwork, rwork_data, lrwork, + iwork_data, liwork, &info); + CheckEighResult(i, info); + } + if (has_vectors) { + PADDLE_ENFORCE_NOT_NULL(eigen_vectors, + platform::errors::InvalidArgument( + "When has_vectors is true," + "the eigenvectors needs to be calculated, " + "so the eigenvectors must be provided.")); + input_trans = dito.Transpose(input_trans); + eigen_vectors->ShareDataWith(input_trans); } } }; @@ -168,15 +144,22 @@ struct MatrixEighFunctor { // Calculates the eigenvalues ​​and eigenvectors of Hermitian or real // symmetric matrices on GPU, and uses the variable has_vectors // to control whether to return the eigenvectors. -template -struct MatrixEighFunctor { +template +struct MatrixEighFunctor { public: void operator()(const framework::ExecutionContext &ctx, const Tensor &input, Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, bool has_vectors) { + using ValueType = math::Real; auto *out_value = eigen_values->mutable_data(ctx.GetPlace()); - auto *out_vector = eigen_vectors->mutable_data(ctx.GetPlace()); + auto &dev_ctx = ctx.template device_context(); + auto dito = + math::DeviceIndependenceTensorOperations(ctx); + Tensor input_trans; + input_trans = dito.Transpose(input); + auto *input_vector = input_trans.data(); auto &dims = input.dims(); int dim_size = dims.size(); int64_t batch_size = GetBatchSize(dims); @@ -190,14 +173,6 @@ struct MatrixEighFunctor { int lda = std::max(1, n); auto vector_stride = dims[dim_size - 1] * dims[dim_size - 2]; auto values_stride = dims[dim_size - 1]; - - auto &dev_ctx = ctx.template device_context(); - auto dito = - math::DeviceIndependenceTensorOperations(ctx); - Tensor output_v_var_trans = dito.Transpose(input); - TensorCopy(output_v_var_trans, ctx.GetPlace(), eigen_vectors); - int lwork = 0; auto info = memory::Alloc(dev_ctx, sizeof(int) * batch_size); auto *info_ptr = reinterpret_cast(info->ptr()); @@ -205,10 +180,8 @@ struct MatrixEighFunctor { // When the input type is float32, and the feature value input dimension is // greater than or equal to [*,32,32] and less than or equal to // [*,512,512], Syevj has better performance. - bool use_syevj = - (eigen_vectors->type() == framework::proto::VarType::FP32 && - values_stride >= 32 && values_stride <= 512); - + bool use_syevj = (input.type() == framework::proto::VarType::FP32 && + values_stride >= 32 && values_stride <= 512); syevjInfo_t syevj_params; if (use_syevj) { PADDLE_ENFORCE_CUDA_SUCCESS( @@ -216,52 +189,52 @@ struct MatrixEighFunctor { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cusolverDnSsyevj_bufferSize( dev_ctx.cusolver_dn_handle(), jobz, uplo, n, - reinterpret_cast(out_vector), lda, + reinterpret_cast(input_vector), lda, reinterpret_cast(out_value), &lwork, syevj_params)); } else { - EvdBuffer(dev_ctx.cusolver_dn_handle(), jobz, uplo, n, out_vector, lda, + EvdBuffer(dev_ctx.cusolver_dn_handle(), jobz, uplo, n, input_vector, lda, out_value, &lwork); } - auto work = memory::Alloc(dev_ctx, sizeof(T) * lwork); auto *work_ptr = reinterpret_cast(work->ptr()); - for (auto i = 0; i < batch_size; i++) { - auto vector_data = out_vector + i * vector_stride; - auto value_data = out_value + i * values_stride; + auto *input_data = input_vector + i * vector_stride; + auto *value_data = out_value + i * values_stride; auto handle = dev_ctx.cusolver_dn_handle(); if (use_syevj) { PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cusolverDnSsyevj( - handle, jobz, uplo, n, reinterpret_cast(vector_data), lda, + handle, jobz, uplo, n, reinterpret_cast(input_data), lda, reinterpret_cast(value_data), reinterpret_cast(work_ptr), lwork, info_ptr, syevj_params)); } else { - Evd(handle, jobz, uplo, n, vector_data, lda, value_data, work_ptr, - lwork, info_ptr); + Evd(handle, jobz, uplo, n, input_data, lda, value_data, work_ptr, lwork, + info_ptr); } - int error_info; + int error_info = 0; memory::Copy(platform::CPUPlace(), &error_info, BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), info_ptr, sizeof(int), dev_ctx.stream()); - PADDLE_ENFORCE_EQ( - error_info, 0, - platform::errors::PreconditionNotMet( - "For batch [%d]: the [%d] argument had an illegal value", i, - error_info)); + CheckEighResult(i, error_info); } if (use_syevj) { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cusolverDnDestroySyevjInfo(syevj_params)); } - if (has_vectors) { - *eigen_vectors = dito.Transpose(*eigen_vectors); + PADDLE_ENFORCE_NOT_NULL(eigen_vectors, + platform::errors::InvalidArgument( + "When has_vectors is true," + "the eigenvectors needs to be calculated," + "so the eigenvectors must be provided.")); + input_trans = dito.Transpose(input_trans); + eigen_vectors->ShareDataWith(input_trans); } } + using ValueType = math::Real; inline void EvdBuffer(cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, int n, const T *A, int lda, const ValueType *W, int *lwork) const; @@ -271,15 +244,14 @@ struct MatrixEighFunctor { T *work, int lwork, int *devInfo) const; }; -#define FUNC_WITH_TYPES(m) \ - m(float, float, Ssy, float) m(double, double, Dsy, double) \ - m(float, paddle::platform::complex, Che, cuComplex) \ - m(double, paddle::platform::complex, Zhe, cuDoubleComplex) +#define FUNC_WITH_TYPES(m) \ + m(float, Ssy, float) m(double, Dsy, double) \ + m(paddle::platform::complex, Che, cuComplex) \ + m(paddle::platform::complex, Zhe, cuDoubleComplex) -#define EVDBUFFER_INSTANCE(ValueType, T, C, CastType) \ +#define EVDBUFFER_INSTANCE(T, C, CastType) \ template <> \ - inline void \ - MatrixEighFunctor::EvdBuffer( \ + inline void MatrixEighFunctor::EvdBuffer( \ cusolverDnHandle_t handle, cusolverEigMode_t jobz, \ cublasFillMode_t uplo, int n, const T *A, int lda, const ValueType *W, \ int *lwork) const { \ @@ -291,10 +263,9 @@ struct MatrixEighFunctor { FUNC_WITH_TYPES(EVDBUFFER_INSTANCE); -#define EVD_INSTANCE(ValueType, T, C, CastType) \ +#define EVD_INSTANCE(T, C, CastType) \ template <> \ - inline void \ - MatrixEighFunctor::Evd( \ + inline void MatrixEighFunctor::Evd( \ cusolverDnHandle_t handle, cusolverEigMode_t jobz, \ cublasFillMode_t uplo, int n, T *A, int lda, ValueType *W, T *work, \ int lwork, int *devInfo) const { \ diff --git a/paddle/fluid/operators/math/lapack_function.cc b/paddle/fluid/operators/math/lapack_function.cc index 5adb20b9a7..3ce2225420 100644 --- a/paddle/fluid/operators/math/lapack_function.cc +++ b/paddle/fluid/operators/math/lapack_function.cc @@ -31,6 +31,49 @@ void lapackLu(int m, int n, float *a, int lda, int *ipiv, int *info) { platform::dynload::sgetrf_(&m, &n, a, &lda, ipiv, info); } +// eigh +template <> +void lapackEigh(char jobz, char uplo, int n, float *a, int lda, float *w, + float *work, int lwork, float *rwork, int lrwork, + int *iwork, int liwork, int *info) { + (void)rwork; // unused + (void)lrwork; // unused + platform::dynload::ssyevd_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, + &liwork, info); +} + +template <> +void lapackEigh(char jobz, char uplo, int n, double *a, int lda, + double *w, double *work, int lwork, double *rwork, + int lrwork, int *iwork, int liwork, int *info) { + (void)rwork; // unused + (void)lrwork; // unused + platform::dynload::dsyevd_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, + &liwork, info); +} + +template <> +void lapackEigh, float>( + char jobz, char uplo, int n, platform::complex *a, int lda, float *w, + platform::complex *work, int lwork, float *rwork, int lrwork, + int *iwork, int liwork, int *info) { + platform::dynload::cheevd_(&jobz, &uplo, &n, + reinterpret_cast *>(a), &lda, + w, reinterpret_cast *>(work), + &lwork, rwork, &lrwork, iwork, &liwork, info); +} + +template <> +void lapackEigh, double>( + char jobz, char uplo, int n, platform::complex *a, int lda, + double *w, platform::complex *work, int lwork, double *rwork, + int lrwork, int *iwork, int liwork, int *info) { + platform::dynload::zheevd_(&jobz, &uplo, &n, + reinterpret_cast *>(a), &lda, + w, reinterpret_cast *>(work), + &lwork, rwork, &lrwork, iwork, &liwork, info); +} + // Eig template <> void lapackEig(char jobvl, char jobvr, int n, double *a, int lda, diff --git a/paddle/fluid/operators/math/lapack_function.h b/paddle/fluid/operators/math/lapack_function.h index a9cc2d2c00..a4c2c865c8 100644 --- a/paddle/fluid/operators/math/lapack_function.h +++ b/paddle/fluid/operators/math/lapack_function.h @@ -20,12 +20,17 @@ namespace math { // LU (for example) template -void lapackLu(int m, int n, T *a, int lda, int *ipiv, int *info); +void lapackLu(int m, int n, T* a, int lda, int* ipiv, int* info); + +template +void lapackEigh(char jobz, char uplo, int n, T* a, int lda, ValueType* w, + T* work, int lwork, ValueType* rwork, int lrwork, int* iwork, + int liwork, int* info); template -void lapackEig(char jobvl, char jobvr, int n, T1 *a, int lda, T1 *w, T1 *vl, - int ldvl, T1 *vr, int ldvr, T1 *work, int lwork, T2 *rwork, - int *info); +void lapackEig(char jobvl, char jobvr, int n, T1* a, int lda, T1* w, T1* vl, + int ldvl, T1* vr, int ldvr, T1* work, int lwork, T2* rwork, + int* info); } // namespace math } // namespace operators diff --git a/paddle/fluid/platform/dynload/lapack.h b/paddle/fluid/platform/dynload/lapack.h index db95e557eb..9b4dd3d9e3 100644 --- a/paddle/fluid/platform/dynload/lapack.h +++ b/paddle/fluid/platform/dynload/lapack.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" @@ -28,6 +29,22 @@ extern "C" void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, extern "C" void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info); +// evd +extern "C" void zheevd_(char *jobz, char *uplo, int *n, std::complex *a, + int *lda, double *w, std::complex *work, + int *lwork, double *rwork, int *lrwork, int *iwork, + int *liwork, int *info); +extern "C" void cheevd_(char *jobz, char *uplo, int *n, std::complex *a, + int *lda, float *w, std::complex *work, + int *lwork, float *rwork, int *lrwork, int *iwork, + int *liwork, int *info); +extern "C" void dsyevd_(char *jobz, char *uplo, int *n, double *a, int *lda, + double *w, double *work, int *lwork, int *iwork, + int *liwork, int *info); +extern "C" void ssyevd_(char *jobz, char *uplo, int *n, float *a, int *lda, + float *w, float *work, int *lwork, int *iwork, + int *liwork, int *info); + // geev extern "C" void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double *vl, int *ldvl, @@ -81,6 +98,10 @@ extern void *lapack_dso_handle; #define LAPACK_ROUTINE_EACH(__macro) \ __macro(dgetrf_); \ __macro(sgetrf_); \ + __macro(zheevd_); \ + __macro(cheevd_); \ + __macro(dsyevd_); \ + __macro(ssyevd_); \ __macro(dgeev_); \ __macro(sgeev_); \ __macro(zgeev_); \ diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index e09138ef09..e4f0860e3b 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -106,7 +106,6 @@ from .tensor.linalg import slogdet # noqa: F401 from .tensor.linalg import multi_dot # noqa: F401 from .tensor.linalg import matrix_power # noqa: F401 from .tensor.linalg import svd # noqa: F401 -from .tensor.linalg import eigh # noqa: F401 from .tensor.linalg import pinv # noqa: F401 from .tensor.linalg import solve # noqa: F401 from .tensor.logic import equal # noqa: F401 diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index b9fb0e7c56..6c898f2d60 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -1759,7 +1759,7 @@ def eigh(x, UPLO='L', name=None): x_data = np.array([[1, -2j], [2j, 5]]) x = paddle.to_tensor(x_data) - out_value, out_vector = paddle.eigh(x, UPLO='L') + out_value, out_vector = paddle.linalg.eigh(x, UPLO='L') print(out_value) #[0.17157288, 5.82842712] print(out_vector) @@ -1780,7 +1780,7 @@ def eigh(x, UPLO='L', name=None): raise ValueError( "The input matrix must be batches of square matrices. But received x's dimention: {}". format(x_shape)) - if UPLO is not 'L' and UPLO is not 'U': + if UPLO != 'L' and UPLO != 'U': raise ValueError( "UPLO must be L or U. But received UPLO is: {}".format(UPLO)) -- GitLab