[Phi] Move matrix inverse into phi (#40237)

* move matrix inverse into phi * change license year

[Phi] Move matrix inverse into phi (#40237)
* move matrix inverse into phi * change license year
7024ade7 · Chen Weihang · GitHub · 975f99ab · 7024ade7 · 7024ade7
9 changed file
--- a/paddle/fluid/operators/determinant_op.h
+++ b/paddle/fluid/operators/determinant_op.h
@@ -19,11 +19,11 @@
 #include <cmath>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/matrix_inverse.h"
 #include "paddle/fluid/operators/svd_helper.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/kernels/funcs/complex_functors.h"
+#include "paddle/phi/kernels/funcs/matrix_inverse.h"
 namespace paddle {
 namespace operators {
@@ -226,7 +226,7 @@ class DeterminantGradKernel : public framework::OpKernel<T> {
    inverse_A.Resize(input->dims());
    inverse_A.mutable_data<T>(context.GetPlace());
-    math::MatrixInverseFunctor<DeviceContext, T> mat_inv;
+    phi::funcs::MatrixInverseFunctor<DeviceContext, T> mat_inv;
    mat_inv(dev_ctx, *input, &inverse_A);
    VLOG(3) << "inverse(A) dims: " << inverse_A.dims();
@@ -381,7 +381,7 @@ class SlogDeterminantGradKernel : public framework::OpKernel<T> {
    inverse_A.Resize(input->dims());
    inverse_A.mutable_data<T>(context.GetPlace());
-    math::MatrixInverseFunctor<DeviceContext, T> mat_inv;
+    phi::funcs::MatrixInverseFunctor<DeviceContext, T> mat_inv;
    mat_inv(dev_ctx, *input, &inverse_A);
    VLOG(3) << "inverse(A) dims: " << inverse_A.dims();

--- a/paddle/fluid/operators/inverse_op.h
+++ b/paddle/fluid/operators/inverse_op.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/matrix_inverse.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/matrix_inverse.h"
 namespace paddle {
 namespace operators {
@@ -30,7 +30,7 @@ class InverseKernel : public framework::OpKernel<T> {
    output->mutable_data<T>(context.GetPlace());
    auto& dev_ctx = context.template device_context<DeviceContext>();
-    math::MatrixInverseFunctor<DeviceContext, T> mat_inv;
+    phi::funcs::MatrixInverseFunctor<DeviceContext, T> mat_inv;
    mat_inv(dev_ctx, *input, output);
  }
 };

--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -46,7 +46,6 @@ math_library(vol2col)
 math_library(prelu)
 math_library(bert_encoder_functor)
 math_library(tree2col DEPS math_function)
-math_library(matrix_inverse)
 math_library(segment_pooling)
 math_library(matrix_solve)

--- a/paddle/fluid/operators/math/matrix_inverse.cu.cc
+++ b/paddle/fluid/operators/math/matrix_inverse.cu.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/math/matrix_inverse.h"
-#include "paddle/phi/kernels/funcs/blas/blas.h"
-namespace paddle {
-namespace platform {
-class CUDADeviceContext;
-}  // namespace platform
-}  // namespace paddle
-namespace paddle {
-namespace operators {
-namespace math {
-template <typename DeviceContext, typename T>
-class MatrixInverseFunctor;
-template <typename T>
-class MatrixInverseFunctor<platform::CUDADeviceContext, T> {
- public:
-  void operator()(const platform::CUDADeviceContext& context,
-                  const framework::Tensor& a, framework::Tensor* a_inv) {
-#ifndef PADDLE_WITH_HIP
-    const auto& mat_dims = a.dims();
-    const int rank = mat_dims.size();
-    int n = mat_dims[rank - 1];
-    int batch_size = rank > 2 ? a.numel() / (n * n) : 1;
-    memory::allocation::AllocationPtr tmp_gpu_mat_data;
-    const T* gpu_mat = a.data<T>();
-    if (n >= 32) {
-      // Copy all elements of input matrix A to a temporary memory space to
-      // avoid being overriden by getrf.
-      tmp_gpu_mat_data = memory::Alloc(context, a.numel() * sizeof(T));
-      memory::Copy(context.GetPlace(), tmp_gpu_mat_data->ptr(),
-                   context.GetPlace(), a.data(), a.numel() * sizeof(T),
-                   context.stream());
-      gpu_mat = reinterpret_cast<const T*>(tmp_gpu_mat_data->ptr());
-    }
-    std::vector<const T*> cpu_ptrs(batch_size * 2);
-    for (int i = 0; i < batch_size; ++i) {
-      cpu_ptrs[i] = gpu_mat + i * n * n;
-      cpu_ptrs[i + batch_size] = a_inv->data<T>() + i * n * n;
-    }
-    // Copy the addresses of A and A_inv from host to device.
-    memory::allocation::AllocationPtr tmp_gpu_ptrs_data =
-        memory::Alloc(context, cpu_ptrs.size() * sizeof(T*));
-    memory::Copy(context.GetPlace(), tmp_gpu_ptrs_data->ptr(),
-                 platform::CPUPlace(), static_cast<void*>(cpu_ptrs.data()),
-                 cpu_ptrs.size() * sizeof(T*), context.stream());
-    T** gpu_inv_ptrs =
-        reinterpret_cast<T**>(tmp_gpu_ptrs_data->ptr()) + batch_size;
-    // Allocate device memory for info and pivots.
-    int num_ints = n < 32 ? batch_size : batch_size * (n + 1);
-    memory::allocation::AllocationPtr tmp_gpu_info_data =
-        memory::Alloc(context, num_ints * sizeof(int));
-    int* gpu_info_ptr = reinterpret_cast<int*>(tmp_gpu_info_data->ptr());
-    auto blas = phi::funcs::GetBlas<platform::CUDADeviceContext, T>(context);
-    std::vector<int> info;  // only for singular checking
-    info.resize(batch_size);
-    // This functions in cuBLAS is intended to be used for matrices of small
-    // sizes where the launch overhead is a significant factor.
-    // TODO(Xreki): call function in cusolver for large matrices.
-    if (n < 32) {
-      // cublas<S/D>matinvBatched is a short cut of cublas<S/D>getrfBatched
-      // plus cublas<S/D>getriBatched.
-      // However it only works if N is less than 32. If not, we need to
-      // go through cublas<S/D>getrfBatched and cublas<S/D>getriBatched.
-      blas.BatchedMatInv(n,
-                         reinterpret_cast<const T**>(tmp_gpu_ptrs_data->ptr()),
-                         gpu_inv_ptrs, gpu_info_ptr, batch_size);
-    } else {
-      // This function performs the LU factorization of each matrix A by the
-      // equation P * A = L * U. L and U are written back to original matrix A,
-      // and diagonal elements of L are discarded.
-      int* gpu_pivot_ptr =
-          reinterpret_cast<int*>(tmp_gpu_info_data->ptr()) + batch_size;
-      blas.BatchedGETRF(n, reinterpret_cast<T**>(tmp_gpu_ptrs_data->ptr()),
-                        gpu_pivot_ptr, gpu_info_ptr, batch_size);
-      blas.BatchedGETRI(n,
-                        reinterpret_cast<const T**>(tmp_gpu_ptrs_data->ptr()),
-                        gpu_pivot_ptr, gpu_inv_ptrs, gpu_info_ptr, batch_size);
-    }
-    memory::Copy(platform::CPUPlace(), info.data(), context.GetPlace(),
-                 gpu_info_ptr, sizeof(int) * batch_size, context.stream());
-    for (int i = 0; i < batch_size; ++i) {
-      PADDLE_ENFORCE_EQ(info[i], 0,
-                        platform::errors::PreconditionNotMet(
-                            "For batch [%d]: U(%d, %d) is zero, singular U. "
-                            "Please check the matrix value and change it to a "
-                            "non-singular matrix",
-                            i, info[i], info[i]));
-    }
-#else
-    compute_inverse_eigen<platform::CUDADeviceContext, T>(context, a, a_inv);
-#endif
-  }
-};
-template class MatrixInverseFunctor<platform::CUDADeviceContext, float>;
-template class MatrixInverseFunctor<platform::CUDADeviceContext, double>;
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/matrix_power_op.h
+++ b/paddle/fluid/operators/matrix_power_op.h
@@ -18,9 +18,9 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/matrix_inverse.h"
 #include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/matrix_inverse.h"
 namespace paddle {
 namespace operators {
@@ -67,7 +67,7 @@ void MatrixPowerFunction(const Tensor* X, const int n, Tensor* Out,
    framework::TensorCopy(*X, ctx.GetPlace(), dev_ctx, &new_x);
  } else {
    // newX = X^{-1}, n = -n
-    math::MatrixInverseFunctor<DeviceContext, T> mat_inv;
+    phi::funcs::MatrixInverseFunctor<DeviceContext, T> mat_inv;
    mat_inv(dev_ctx, *X, &new_x);
    new_n = -n;
  }
@@ -200,7 +200,7 @@ void MatrixPowerGradFunction(const Tensor* X, const Tensor* Out,
    framework::TensorCopy(*X, ctx.GetPlace(), dev_ctx, &new_x);
  } else {
    // newX = X^{-1}, n = -n
-    math::MatrixInverseFunctor<DeviceContext, T> mat_inv;
+    phi::funcs::MatrixInverseFunctor<DeviceContext, T> mat_inv;
    mat_inv(dev_ctx, *X, &new_x);
    new_n = -n;
  }

--- a/paddle/phi/kernels/funcs/CMakeLists.txt
+++ b/paddle/phi/kernels/funcs/CMakeLists.txt
@@ -9,3 +9,4 @@ math_library(gru_compute DEPS activation_functions math_function)
 math_library(lstm_compute DEPS activation_functions)
 math_library(concat_and_split_functor DEPS dense_tensor)
 math_library(matrix_reduce DEPS dense_tensor)
+math_library(matrix_inverse DEPS dense_tensor eigen3 blas)
--- a/paddle/fluid/operators/math/matrix_inverse.cc
+++ b/paddle/fluid/operators/math/matrix_inverse.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,27 +12,26 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/operators/math/matrix_inverse.h"
+#include "paddle/phi/kernels/funcs/matrix_inverse.h"
-#include "Eigen/Core"
-#include "Eigen/LU"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
-namespace paddle {
+namespace phi {
-namespace operators {
+namespace funcs {
-namespace math {
+template <typename Context, typename T>
-template <typename T>
+void MatrixInverseFunctor<Context, T>::operator()(const Context& dev_ctx,
-class MatrixInverseFunctor<platform::CPUDeviceContext, T> {
+                                                  const DenseTensor& a,
- public:
+                                                  DenseTensor* a_inv) {
-  void operator()(const platform::CPUDeviceContext& context,
+  ComputeInverseEigen<Context, T>(dev_ctx, a, a_inv);
-                  const framework::Tensor& a, framework::Tensor* a_inv) {
+}
-    compute_inverse_eigen<platform::CPUDeviceContext, T>(context, a, a_inv);
-  }
+template class MatrixInverseFunctor<CPUContext, float>;
-};
+template class MatrixInverseFunctor<CPUContext, double>;
-template class MatrixInverseFunctor<platform::CPUDeviceContext, float>;
+// TODO(chenweihang): remove these instantiations later
-template class MatrixInverseFunctor<platform::CPUDeviceContext, double>;
+template class MatrixInverseFunctor<paddle::platform::CPUDeviceContext, float>;
+template class MatrixInverseFunctor<paddle::platform::CPUDeviceContext, double>;
-}  // namespace math
-}  // namespace operators
+}  // namespace funcs
-}  // namespace paddle
+}  // namespace phi
--- a/paddle/phi/kernels/funcs/matrix_inverse.cu.cc
+++ b/paddle/phi/kernels/funcs/matrix_inverse.cu.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/phi/kernels/funcs/matrix_inverse.h"
+#include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/fluid/memory/malloc.h"
+#include "paddle/fluid/memory/memcpy.h"
+namespace phi {
+namespace funcs {
+template <typename Context, typename T>
+void MatrixInverseFunctor<Context, T>::operator()(const Context& dev_ctx,
+                                                  const DenseTensor& a,
+                                                  DenseTensor* a_inv) {
+#ifndef PADDLE_WITH_HIP
+  const auto& mat_dims = a.dims();
+  const int rank = mat_dims.size();
+  int n = mat_dims[rank - 1];
+  int batch_size = rank > 2 ? a.numel() / (n * n) : 1;
+  paddle::memory::allocation::AllocationPtr tmp_gpu_mat_data;
+  const T* gpu_mat = a.data<T>();
+  if (n >= 32) {
+    // Copy all elements of input matrix A to a temporary memory space to
+    // avoid being overriden by getrf.
+    tmp_gpu_mat_data = paddle::memory::Alloc(dev_ctx, a.numel() * sizeof(T));
+    paddle::memory::Copy(dev_ctx.GetPlace(),
+                         tmp_gpu_mat_data->ptr(),
+                         dev_ctx.GetPlace(),
+                         a.data(),
+                         a.numel() * sizeof(T),
+                         dev_ctx.stream());
+    gpu_mat = reinterpret_cast<const T*>(tmp_gpu_mat_data->ptr());
+  }
+  std::vector<const T*> cpu_ptrs(batch_size * 2);
+  for (int i = 0; i < batch_size; ++i) {
+    cpu_ptrs[i] = gpu_mat + i * n * n;
+    cpu_ptrs[i + batch_size] = a_inv->data<T>() + i * n * n;
+  }
+  // Copy the addresses of A and A_inv from host to device.
+  paddle::memory::allocation::AllocationPtr tmp_gpu_ptrs_data =
+      paddle::memory::Alloc(dev_ctx, cpu_ptrs.size() * sizeof(T*));
+  paddle::memory::Copy(dev_ctx.GetPlace(),
+                       tmp_gpu_ptrs_data->ptr(),
+                       phi::CPUPlace(),
+                       static_cast<void*>(cpu_ptrs.data()),
+                       cpu_ptrs.size() * sizeof(T*),
+                       dev_ctx.stream());
+  T** gpu_inv_ptrs =
+      reinterpret_cast<T**>(tmp_gpu_ptrs_data->ptr()) + batch_size;
+  // Allocate device memory for info and pivots.
+  int num_ints = n < 32 ? batch_size : batch_size * (n + 1);
+  paddle::memory::allocation::AllocationPtr tmp_gpu_info_data =
+      paddle::memory::Alloc(dev_ctx, num_ints * sizeof(int));
+  int* gpu_info_ptr = reinterpret_cast<int*>(tmp_gpu_info_data->ptr());
+  auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
+  std::vector<int> info;  // only for singular checking
+  info.resize(batch_size);
+  // This functions in cuBLAS is intended to be used for matrices of small
+  // sizes where the launch overhead is a significant factor.
+  // TODO(Xreki): call function in cusolver for large matrices.
+  if (n < 32) {
+    // cublas<S/D>matinvBatched is a short cut of cublas<S/D>getrfBatched
+    // plus cublas<S/D>getriBatched.
+    // However it only works if N is less than 32. If not, we need to
+    // go through cublas<S/D>getrfBatched and cublas<S/D>getriBatched.
+    blas.BatchedMatInv(n,
+                       reinterpret_cast<const T**>(tmp_gpu_ptrs_data->ptr()),
+                       gpu_inv_ptrs,
+                       gpu_info_ptr,
+                       batch_size);
+  } else {
+    // This function performs the LU factorization of each matrix A by the
+    // equation P * A = L * U. L and U are written back to original matrix A,
+    // and diagonal elements of L are discarded.
+    int* gpu_pivot_ptr =
+        reinterpret_cast<int*>(tmp_gpu_info_data->ptr()) + batch_size;
+    blas.BatchedGETRF(n,
+                      reinterpret_cast<T**>(tmp_gpu_ptrs_data->ptr()),
+                      gpu_pivot_ptr,
+                      gpu_info_ptr,
+                      batch_size);
+    blas.BatchedGETRI(n,
+                      reinterpret_cast<const T**>(tmp_gpu_ptrs_data->ptr()),
+                      gpu_pivot_ptr,
+                      gpu_inv_ptrs,
+                      gpu_info_ptr,
+                      batch_size);
+  }
+  paddle::memory::Copy(phi::CPUPlace(),
+                       info.data(),
+                       dev_ctx.GetPlace(),
+                       gpu_info_ptr,
+                       sizeof(int) * batch_size,
+                       dev_ctx.stream());
+  for (int i = 0; i < batch_size; ++i) {
+    PADDLE_ENFORCE_EQ(info[i],
+                      0,
+                      phi::errors::PreconditionNotMet(
+                          "For batch [%d]: U(%d, %d) is zero, singular U. "
+                          "Please check the matrix value and change it to a "
+                          "non-singular matrix",
+                          i,
+                          info[i],
+                          info[i]));
+  }
+#else
+  ComputeInverseEigen<Context, T>(dev_ctx, a, a_inv);
+#endif
+}
+template class MatrixInverseFunctor<GPUContext, float>;
+template class MatrixInverseFunctor<GPUContext, double>;
+// TODO(chenweihang): remove these instantiations later
+template class MatrixInverseFunctor<paddle::platform::CUDADeviceContext, float>;
+template class MatrixInverseFunctor<paddle::platform::CUDADeviceContext,
+                                    double>;
+}  // namespace funcs
+}  // namespace phi
--- a/paddle/fluid/operators/math/matrix_inverse.h
+++ b/paddle/fluid/operators/math/matrix_inverse.h
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,17 +17,18 @@ limitations under the License. */
 #include <string>
 #include "Eigen/Core"
 #include "Eigen/LU"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/device_context.h"
-namespace paddle {
+#include "paddle/phi/backends/all_context.h"
-namespace operators {
+#include "paddle/phi/core/dense_tensor.h"
-namespace math {
+#include "paddle/phi/core/enforce.h"
-template <typename DeviceContext, typename T>
+namespace phi {
-void compute_inverse_eigen(const DeviceContext& context,
+namespace funcs {
-                           const framework::Tensor& a,
-                           framework::Tensor* a_inv) {
+template <typename Context, typename T>
+void ComputeInverseEigen(const Context& dev_ctx,
+                         const DenseTensor& a,
+                         DenseTensor* a_inv) {
  using Matrix =
      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
  using EigenMatrixMap = Eigen::Map<Matrix>;
@@ -38,7 +39,7 @@ void compute_inverse_eigen(const DeviceContext& context,
  int batch_size = rank > 2 ? a.numel() / (n * n) : 1;
  const T* a_ptr = a.data<T>();
-  T* a_inv_ptr = a_inv->mutable_data<T>(context.GetPlace());
+  T* a_inv_ptr = a_inv->mutable_data<T>(dev_ctx.GetPlace());
  for (int i = 0; i < batch_size; ++i) {
    ConstEigenMatrixMap mat(a_ptr + i * n * n, n, n);
@@ -47,20 +48,20 @@ void compute_inverse_eigen(const DeviceContext& context,
    lu.compute(mat);
    const T min_abs_pivot = lu.matrixLU().diagonal().cwiseAbs().minCoeff();
-    PADDLE_ENFORCE_GT(
+    PADDLE_ENFORCE_GT(min_abs_pivot,
-        min_abs_pivot, static_cast<T>(0),
+                      static_cast<T>(0),
-        platform::errors::InvalidArgument("Input is not invertible."));
+                      errors::InvalidArgument("Input is not invertible."));
    mat_inv.noalias() = lu.inverse();
  }
 }
-template <typename DeviceContext, typename T>
+template <typename Context, typename T>
 class MatrixInverseFunctor {
 public:
-  void operator()(const DeviceContext& context, const framework::Tensor& a,
+  void operator()(const Context& dev_ctx,
-                  framework::Tensor* a_inv);
+                  const DenseTensor& a,
+                  DenseTensor* a_inv);
 };
-}  // namespace math
+}  // namespace funcs
-}  // namespace operators
+}  // namespace phi
-}  // namespace paddle