From 82cd8d2138e984fc5bf85d553a76668d2a5e3e6e Mon Sep 17 00:00:00 2001 From: WangZhen <23097963+0x45f@users.noreply.github.com> Date: Tue, 28 Jun 2022 10:36:53 +0800 Subject: [PATCH] Speed up matrix_rank_tol_kernel.cc compile time (#43856) --- .../phi/kernels/cpu/matrix_rank_tol_kernel.cc | 96 ++++++++++--------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc index af9b772838..4f941099c9 100644 --- a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc @@ -14,67 +14,66 @@ #include "paddle/phi/kernels/matrix_rank_tol_kernel.h" -#include -#include - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/abs_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/compare_functors.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/funcs/lapack/lapack_function.h" +#include "paddle/phi/kernels/funcs/values_vectors_functor.h" #include "paddle/phi/kernels/impl/matrix_rank_kernel_impl.h" #include "paddle/phi/kernels/reduce_max_kernel.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" +#include "paddle/phi/kernels/transpose_kernel.h" namespace phi { template -void BatchEigenvalues(const T* x_data, - T* eigenvalues_data, - int batches, - int rows, - int cols, - int k) { - // Eigen::Matrix API need non-const pointer. - T* input = const_cast(x_data); - int stride = rows * cols; - for (int i = 0; i < batches; i++) { - auto m = Eigen::Map< - Eigen::Matrix>( - input + i * stride, rows, rows); - Eigen::SelfAdjointEigenSolver< - Eigen::Matrix> - eigen_solver(m); - auto eigenvalues = eigen_solver.eigenvalues().cwiseAbs(); - for (int j = 0; j < k; j++) { - *(eigenvalues_data + i * k + j) = eigenvalues[j]; - } +void LapackSVD(const T* x_data, T* eigenvalues_data, int rows, int cols) { + char jobz = 'N'; + int mx = std::max(rows, cols); + int mn = std::min(rows, cols); + T* a = const_cast(x_data); + int lda = rows; + int lwork = 3 * mn + std::max(mx, 7 * mn); + std::vector work(lwork); + std::vector iwork(8 * mn); + int info; + + phi::funcs::lapackSvd(jobz, + rows, + cols, + a, + lda, + eigenvalues_data, + nullptr, + 1, + nullptr, + 1, + work.data(), + lwork, + iwork.data(), + &info); + + if (info < 0) { + PADDLE_THROW(phi::errors::InvalidArgument( + "This %s-th argument has an illegal value", info)); + } + if (info > 0) { + PADDLE_THROW(phi::errors::InvalidArgument( + "DBDSDC/SBDSDC did not converge, updating process failed. May be you " + "passes a invalid matrix.")); } } template -void BatchSVD(const T* x_data, - T* eigenvalues_data, - int batches, - int rows, - int cols, - int k) { - // Eigen::Matrix API need non-const pointer. - T* input = const_cast(x_data); +void BatchSVD( + const T* x_data, T* eigenvalues_data, int batches, int rows, int cols) { int stride = rows * cols; - Eigen::BDCSVD< - Eigen::Matrix> - svd; - for (int i = 0; i < batches; i++) { - auto m = Eigen::Map< - Eigen::Matrix>( - input + i * stride, rows, cols); - svd.compute(m); - auto res_s = svd.singularValues(); - for (int j = 0; j < k; j++) { - eigenvalues_data[i * k + j] = res_s[j]; - } + int k = std::min(rows, cols); + for (int i = 0; i < batches; ++i) { + LapackSVD(x_data + i * stride, eigenvalues_data + i * k, rows, cols); } } @@ -85,7 +84,6 @@ void MatrixRankTolKernel(const Context& dev_ctx, bool use_default_tol, bool hermitian, DenseTensor* out) { - auto* x_data = x.data(); dev_ctx.template Alloc(out); auto dim_x = x.dims(); auto dim_out = out->dims(); @@ -106,9 +104,13 @@ void MatrixRankTolKernel(const Context& dev_ctx, auto* eigenvalue_data = dev_ctx.template Alloc(&eigenvalue_tensor); if (hermitian) { - BatchEigenvalues(x_data, eigenvalue_data, batches, rows, cols, k); + phi::funcs::MatrixEighFunctor functor; + functor(dev_ctx, x, &eigenvalue_tensor, nullptr, true, false); + phi::AbsKernel(dev_ctx, eigenvalue_tensor, &eigenvalue_tensor); } else { - BatchSVD(x_data, eigenvalue_data, batches, rows, cols, k); + DenseTensor trans_x = phi::TransposeLast2Dim(dev_ctx, x); + auto* x_data = trans_x.data(); + BatchSVD(x_data, eigenvalue_data, batches, rows, cols); } DenseTensor max_eigenvalue_tensor; -- GitLab