add API paddle.linalg.eig (#35674) (#36188)

向PaddlePaddle中的线性代数库添加eig算子，该算子计算一般方阵的特征分解。 cherry-pick 自#35674.

add API paddle.linalg.eig (#35674) (#36188)
向PaddlePaddle中的线性代数库添加eig算子，该算子计算一般方阵的特征分解。 cherry-pick 自#35674.
4e2daa9a · Lijunhui · GitHub · 96fd98bc · 4e2daa9a · 4e2daa9a
9 changed file
--- a/paddle/fluid/operators/eig_op.cc
+++ b/paddle/fluid/operators/eig_op.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/operators/eig_op.h"
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+class EigOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Eig");
+    OP_INOUT_CHECK(ctx->HasOutput("Eigenvalues"), "Output", "Eigenvalues",
+                   "Eig");
+    OP_INOUT_CHECK(ctx->HasOutput("Eigenvectors"), "Output", "Eigenvectors",
+                   "Eig");
+    auto x_dims = ctx->GetInputDim("X");
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_GE(rank, 2, platform::errors::InvalidArgument(
+                                   "Expects input tensor x to be not less than "
+                                   "2 dimentions, but got dimention %d",
+                                   rank));
+    PADDLE_ENFORCE_EQ(x_dims[rank - 2], x_dims[rank - 1],
+                      platform::errors::InvalidArgument(
+                          "The input matrix must be a square matrix, "
+                          "but receive a matrix with %d rows and %d colums",
+                          x_dims[rank - 2], x_dims[rank - 1]));
+    std::vector<int> batch_dims_vec{};
+    for (int i = 0; i < rank - 1; ++i) {
+      batch_dims_vec.emplace_back(x_dims[i]);
+    }
+    ctx->SetOutputDim("Eigenvectors", x_dims);
+    ctx->SetOutputDim("Eigenvalues", framework::make_ddim(batch_dims_vec));
+  }
+ protected:
+  // The output of eig is always complex-valued even for real-valued inputs
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    auto dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X");
+    if (dtype != framework::proto::VarType::FP32 &&
+        dtype != framework::proto::VarType::FP64 &&
+        dtype != framework::proto::VarType::COMPLEX64 &&
+        dtype != framework::proto::VarType::COMPLEX128) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "unsupported data type: %s!", dtype));
+    }
+    return framework::OpKernelType(dtype, ctx.GetPlace());
+  }
+};
+class EigOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput(
+        "X",
+        "(Tensor), A complex-valued or real-valued tensor with shape (*, "
+        "n, n). The accepted datatype is one of float32, float64, complex64 "
+        "or complex128");
+    AddOutput("Eigenvalues",
+              "(Tensor), The output eigenvalues tensor with shape (*, n). The "
+              "datatype is complex64 or complex128");
+    AddOutput("Eigenvectors",
+              "(Tensor), The output eigenvectors tensor with shape (*, n, n). "
+              "The datatype is complex64 or complex128");
+    AddComment(R"DOC(
+        Eig Operator.
+This API processes eigen decomposition for general square matrices.
+)DOC");
+  }
+};
+class EigGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("Eigenvalues"), "Input", "Eigenvalues",
+                   "EigGrad");
+    OP_INOUT_CHECK(ctx->HasInput("Eigenvectors"), "Input", "Eigenvectors",
+                   "EigGrad");
+    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Eigenvalues")),
+                   "Input", "Eigenvalues@GRAD", "EigGrad");
+    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Eigenvectors")),
+                   "Input", "Eigenvectors@GRAD", "EigGrad");
+    auto dims = ctx->GetInputDim("Eigenvectors");
+    auto x_grad_name = framework::GradVarName("X");
+    if (ctx->HasOutput(x_grad_name)) {
+      ctx->SetOutputDim(x_grad_name, dims);
+    }
+  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(
+            ctx, framework::GradVarName("Eigenvectors")),
+        ctx.device_context());
+  }
+};
+template <typename T>
+class EigGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+ protected:
+  void Apply(GradOpPtr<T> op) const override {
+    op->SetType(this->ForwardOpType() + "_grad");
+    op->SetInput("Eigenvalues", this->Output("Eigenvalues"));
+    op->SetInput("Eigenvectors", this->Output("Eigenvectors"));
+    op->SetInput(framework::GradVarName("Eigenvalues"),
+                 this->OutputGrad("Eigenvalues"));
+    op->SetInput(framework::GradVarName("Eigenvectors"),
+                 this->OutputGrad("Eigenvectors"));
+    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+using complex64 = paddle::platform::complex<float>;
+using complex128 = paddle::platform::complex<double>;
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(eig, ops::EigOp, ops::EigOpMaker,
+                  ops::EigGradOpMaker<paddle::framework::OpDesc>,
+                  ops::EigGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OPERATOR(eig_grad, ops::EigGradOp);
+REGISTER_OP_CPU_KERNEL(
+    eig, ops::EigKernel<paddle::platform::CPUDeviceContext, float, complex64>,
+    ops::EigKernel<paddle::platform::CPUDeviceContext, double, complex128>,
+    ops::EigKernel<paddle::platform::CPUDeviceContext, complex64, complex64>,
+    ops::EigKernel<paddle::platform::CPUDeviceContext, complex128, complex128>);
+REGISTER_OP_CPU_KERNEL(
+    eig_grad,
+    ops::EigGradKernel<paddle::platform::CPUDeviceContext, float, complex64>,
+    ops::EigGradKernel<paddle::platform::CPUDeviceContext, double, complex128>,
+    ops::EigGradKernel<paddle::platform::CPUDeviceContext, complex64,
+                       complex64>,
+    ops::EigGradKernel<paddle::platform::CPUDeviceContext, complex128,
+                       complex128>);
--- a/paddle/fluid/operators/eig_op.h
+++ b/paddle/fluid/operators/eig_op.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <math.h>
+#include <algorithm>
+#include <complex>
+#include "paddle/fluid/operators/math/complex_functors.h"
+#include "paddle/fluid/operators/math/lapack_function.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/operators/math/matrix_solve.h"
+#include "paddle/fluid/operators/svd_helper.h"
+#include "paddle/fluid/operators/transpose_op.h"
+#include "paddle/fluid/platform/for_range.h"
+#define EPSILON 1e-6
+namespace paddle {
+namespace operators {
+using paddle::framework::Tensor;
+inline int BatchCount(const Tensor& matrix) {
+  int count = 1;
+  int num_dims = matrix.dims().size();
+  for (int i = 0; i < num_dims - 2; ++i) {
+    count *= matrix.dims()[i];
+  }
+  return count;
+}
+inline int MatrixStride(const Tensor& matrix) {
+  framework::DDim dims_list = matrix.dims();
+  int num_dims = dims_list.size();
+  return dims_list[num_dims - 1] * dims_list[num_dims - 2];
+}
+// Transpose two axis of a Tensor
+template <typename DeviceContext, typename T>
+void TransposeTwoAxis(const Tensor& input, Tensor* transposed_input,
+                      const int axis1, const int axis2,
+                      const framework::ExecutionContext& context) {
+  std::vector<int> permute(input.dims().size());
+  std::iota(permute.begin(), permute.end(), 0);
+  permute[axis1] = axis2;
+  permute[axis2] = axis1;
+  transposed_input->mutable_data<T>(input.dims(), context.GetPlace());
+  auto& dev_ctx = context.template device_context<platform::CPUDeviceContext>();
+  TransCompute<DeviceContext, T>(input.dims().size(), dev_ctx, input,
+                                 transposed_input, permute);
+}
+// Apply eig to a batch of matrices, values, vectors and (intermidiate
+// tensor) info are overritten
+template <typename T>
+void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info,
+               const framework::ExecutionContext& context) {
+  char jobvl = 'N';
+  char jobvr = 'V';  // only right eigenvectors are computed
+  int num_dims = input->dims().size();
+  int order = input->dims()[num_dims - 1];
+  T* input_data = input->data<T>();
+  int lda = std::max<int>(1, order);
+  T* values_data = values->mutable_data<T>(context.GetPlace());
+  T* lvector_data = nullptr;
+  int ldvl = 1;
+  T* rvector_data = vectors->mutable_data<T>(context.GetPlace());
+  int ldvr = lda;
+  int lwork = -1;
+  int batch_count = BatchCount(*input);
+  int matrix_stride = MatrixStride(*input);
+  int values_stride = values->dims()[values->dims().size() - 1];
+  Tensor rwork;
+  math::Real<T>* rwork_data = nullptr;
+  rwork.Resize(framework::make_ddim({lda * 2}));
+  rwork_data = rwork.mutable_data<math::Real<T>>(context.GetPlace());
+  // call lapackEig once to compute the size of work;
+  T computed_work_size;
+  math::lapackEig<T, math::Real<T>>(
+      jobvl, jobvr, order, input_data, lda, values_data, lvector_data, ldvl,
+      rvector_data, ldvr, &computed_work_size, lwork, rwork_data, &info);
+  lwork = std::max<int>(1, static_cast<int>(math::Real<T>(computed_work_size)));
+  Tensor work;
+  work.Resize(framework::make_ddim({lwork}));
+  T* work_data = work.mutable_data<T>(context.GetPlace());
+  for (auto i = 0; i < batch_count; ++i) {
+    T* current_matrix = &input_data[i * matrix_stride];
+    T* current_values = &values_data[i * values_stride];
+    T* current_rvectors = &rvector_data[i * matrix_stride];
+    math::lapackEig<T, math::Real<T>>(
+        jobvl, jobvr, order, current_matrix, lda, current_values, lvector_data,
+        ldvl, current_rvectors, ldvr, work_data, lwork, rwork_data, &info);
+    PADDLE_ENFORCE_EQ(
+        info, 0,
+        platform::errors::PreconditionNotMet(
+            "current info is not 0, computation failed. "
+            "= 0:  successful exit."
+            "< 0:  if INFO = -i, the i-th argument had an illegal value."
+            "> 0:  if INFO = i, the QR algorithm failed to compute all the "
+            "eigenvalues, and no eigenvectors have been computed; "
+            "elements i+1:N of WR and WI contain eigenvalues which "
+            "have converged."));
+  }
+}
+template <typename DeviceContext, typename T>
+void ApplyEigKernel(const Tensor& input, Tensor* values, Tensor* vectors,
+                    const framework::ExecutionContext& context) {
+  Tensor input_column_major;
+  Tensor vectors_row_major;
+  int num_dims = input.dims().size();
+  // transfer to column-major memory layout i.e. make_ddim from tranposed_input:
+  // [batch,row,col]->[batch,col,row]
+  TransposeTwoAxis<DeviceContext, T>(input, &input_column_major, num_dims - 1,
+                                     num_dims - 2, context);
+  // make sure 'vectors_row_major' holds memory before passed to LapackEig()
+  vectors_row_major.Resize(input.dims());
+  int info = 0;
+  LapackEig<T>(&input_column_major, values, &vectors_row_major, info, context);
+  // transfer column-major layout back
+  // vectors_row_major: column-major layout
+  // vector: original layout
+  TransposeTwoAxis<DeviceContext, T>(vectors_row_major, vectors, num_dims - 1,
+                                     num_dims - 2, context);
+}
+template <typename T, typename Tout>
+void ConstructComplexVectors(Tensor* c_vectors, const Tensor& c_values,
+                             const Tensor& r_vectors,
+                             const framework::ExecutionContext& ctx,
+                             int batch_count, int order) {
+  int matrix_stride = MatrixStride(r_vectors);
+  auto* c_vectors_data = c_vectors->mutable_data<Tout>(ctx.GetPlace());
+  auto* c_values_data = c_values.data<Tout>();
+  auto* r_v_data = r_vectors.data<T>();
+  for (int b = 0; b < batch_count; b++) {
+    auto* vecs = &r_v_data[b * matrix_stride];
+    auto* res = &c_vectors_data[b * matrix_stride];
+    auto* vals = &c_values_data[b * order];
+    for (int j = 0; j < order; j++) {
+      if (vals[j].imag < EPSILON) {
+        for (int i = 0; i < order; i++) {
+          res[j * order + i] = platform::complex<T>(vecs[j * order + i], 0);
+        }
+      } else {
+        for (int i = 0; i < order; i++) {
+          res[j * order + i] = platform::complex<T>(vecs[j * order + i],
+                                                    vecs[(j + 1) * order + i]);
+          res[(j + 1) * order + i] = platform::complex<T>(
+              vecs[j * order + i], -vecs[(j + 1) * order + i]);
+        }
+        j++;
+      }
+    }
+  }
+}
+template <typename DeviceContext, typename T, typename Tout>
+class EigKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* x = context.Input<Tensor>("X");
+    auto* out_values = context.Output<Tensor>("Eigenvalues");
+    auto* out_vectors = context.Output<Tensor>("Eigenvectors");
+    if (!framework::IsComplexType(x->type())) {
+      out_values->mutable_data<Tout>(context.GetPlace());
+      out_vectors->mutable_data<Tout>(context.GetPlace());
+      int batch_count = BatchCount(*x);
+      int order = x->dims()[x->dims().size() - 1];
+      Tensor real_values;
+      Tensor real_vectors;
+      // double the size of real_values, the first half stores the real part,
+      // the next half stores the imag part
+      std::vector<int> origin_dim =
+          framework::vectorize<int>(out_values->dims());
+      int last_item = origin_dim.back();
+      origin_dim.pop_back();
+      origin_dim.push_back(last_item * 2);
+      framework::DDim big_dim = framework::make_ddim(origin_dim);
+      real_values.mutable_data<math::Real<T>>(big_dim, context.GetPlace());
+      real_vectors.mutable_data<math::Real<T>>(x->dims(), context.GetPlace());
+      ApplyEigKernel<DeviceContext, math::Real<T>>(*x, &real_values,
+                                                   &real_vectors, context);
+      auto dito =
+          math::DeviceIndependenceTensorOperations<DeviceContext, math::Real<T>,
+                                                   Tout>(context);
+      // 1. extract real part & imag part from real_values
+      Tensor real_part = dito.Slice(real_values, {-1}, {0}, {order});
+      Tensor imag_part = dito.Slice(real_values, {-1}, {order}, {order * 2});
+      // 2. construct complex values
+      auto* real_part_data = real_part.data<math::Real<T>>();
+      auto* imag_part_data = imag_part.data<math::Real<T>>();
+      int out_values_numel = out_values->numel();
+      platform::ForRange<DeviceContext> for_range(
+          context.template device_context<DeviceContext>(), out_values_numel);
+      math::RealImagToComplexFunctor<Tout> functor(
+          real_part_data, imag_part_data,
+          out_values->mutable_data<Tout>(context.GetPlace()), out_values_numel);
+      for_range(functor);
+      // 3. construct complex vectors
+      Tensor real_vector_trans = dito.Transpose(real_vectors);
+      Tensor out_vectors_trans;
+      out_vectors_trans.mutable_data<Tout>(x->dims(), context.GetPlace());
+      ConstructComplexVectors<math::Real<T>, Tout>(
+          &out_vectors_trans, *out_values, real_vector_trans, context,
+          batch_count, order);
+      TransposeTwoAxis<DeviceContext, Tout>(out_vectors_trans, out_vectors,
+                                            x->dims().size() - 1,
+                                            x->dims().size() - 2, context);
+    } else {
+      out_values->mutable_data<T>(context.GetPlace());
+      out_vectors->mutable_data<T>(context.GetPlace());
+      ApplyEigKernel<DeviceContext, T>(*x, out_values, out_vectors, context);
+    }
+  }
+};
+template <typename DeviceContext, typename Tout>
+void ComputeBackwardForComplexInput(
+    const Tensor& V, const Tensor& L, const Tensor& gL, const Tensor& gV,
+    Tout* x_grad_data, int batch_count, int order,
+    const framework::ExecutionContext& context) {
+  auto dito =
+      math::DeviceIndependenceTensorOperations<DeviceContext, Tout, Tout>(
+          context);
+  Tensor trans_v = dito.Transpose(V);
+  Tensor Vh = dito.Conj(trans_v);
+  Tensor Lconj = dito.Conj(L);
+  Tensor Econj = dito.Sub(dito.Unsqueeze(Lconj, -2), dito.Unsqueeze(Lconj, -1));
+  Tensor VhgV = dito.Matmul(Vh, gV);
+  Tensor diag_real = dito.Real(VhgV);
+  Tensor diag_res = dito.BatchDiag(diag_real, batch_count);
+  Tensor diag_unsqueezed = dito.Unsqueeze(diag_res, -2);
+  // turn diag_unsqueezed into complex
+  auto numel = diag_unsqueezed.numel();
+  Tensor diag_unsqueezed_complex;
+  auto* data_diag_un = diag_unsqueezed.data<math::Real<Tout>>();
+  auto* data_diag_un_com = diag_unsqueezed_complex.mutable_data<Tout>(
+      diag_unsqueezed.dims(), context.GetPlace(),
+      static_cast<size_t>(numel * sizeof(Tout)));
+  auto& dev_ctx = context.template device_context<DeviceContext>();
+  platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
+  math::RealToComplexFunctor<Tout> functor(data_diag_un, data_diag_un_com,
+                                           numel);
+  for_range(functor);
+  // real tensor multiply complex tensor in broadcast manner
+  Tensor res1 = dito.RealMulComplex(V, diag_unsqueezed_complex);
+  Tensor res2 = dito.Matmul(Vh, res1);
+  Tensor result = dito.Sub(VhgV, res2);
+  result.mutable_data<Tout>(V.dims(), context.GetPlace());
+  result = dito.Div(result, Econj);
+  result = dito.DiagFill(order, order, order, 0, gL, result);
+  Tensor rhs = dito.Matmul(result, Vh);
+  // solve linear system
+  // solve(Vh, rhs, out, m, k)
+  // Vh: matrix with shape [m,m]
+  // rhs: rhs with shape [m,k]
+  // x_grad: out
+  int m = Vh.dims()[Vh.dims().size() - 1];
+  int k = rhs.dims()[rhs.dims().size() - 1];
+  auto* matrix_data = Vh.data<Tout>();
+  auto* rhs_data = rhs.data<Tout>();
+  math::SolveLinearSystem<Tout>(matrix_data, rhs_data, x_grad_data, m, k,
+                                batch_count);
+}
+template <typename DeviceContext, typename T, typename Tout>
+class EigGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto& L = *context.Input<Tensor>("Eigenvalues");
+    auto& V = *context.Input<Tensor>("Eigenvectors");
+    auto& gL = *context.Input<Tensor>(framework::GradVarName("Eigenvalues"));
+    auto& gV = *context.Input<Tensor>(framework::GradVarName("Eigenvectors"));
+    auto& x_grad = *context.Output<Tensor>(framework::GradVarName("X"));
+    auto* x_grad_data = x_grad.mutable_data<Tout>(context.GetPlace());
+    auto& dims = V.dims();
+    framework::DDim dim_origin = dims;
+    int num_dims = dim_origin.size();
+    int batch_count = BatchCount(V);
+    const int order = dim_origin[num_dims - 1];
+    ComputeBackwardForComplexInput<DeviceContext, Tout>(
+        V, L, gL, gV, x_grad_data, batch_count, order, context);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/operators/math/matrix_solve.h
+++ b/paddle/fluid/operators/math/matrix_solve.h
@@ -70,6 +70,46 @@ void compute_solve_eigen(const DeviceContext& context,
  }
 }
+// only used for complex input
+template <typename T>
+void SolveLinearSystem(T* matrix_data, T* rhs_data, T* out_data, int order,
+                       int rhs_cols, int batch) {
+  using Treal = typename Eigen::NumTraits<T>::Real;
+  // cast paddle::complex into std::complex
+  std::complex<Treal>* matrix_data_ =
+      reinterpret_cast<std::complex<Treal>*>(matrix_data);
+  std::complex<Treal>* rhs_data_ =
+      reinterpret_cast<std::complex<Treal>*>(rhs_data);
+  std::complex<Treal>* out_data_ =
+      reinterpret_cast<std::complex<Treal>*>(out_data);
+  using Matrix = Eigen::Matrix<std::complex<Treal>, Eigen::Dynamic,
+                               Eigen::Dynamic, Eigen::RowMajor>;
+  using InputMatrixMap = Eigen::Map<Matrix>;
+  using OutputMatrixMap = Eigen::Map<Matrix>;
+  for (int i = 0; i < batch; ++i) {
+    auto input_matrix =
+        InputMatrixMap(matrix_data_ + i * order * order, order, order);
+    auto input_rhs =
+        InputMatrixMap(rhs_data_ + i * order * rhs_cols, order, rhs_cols);
+    auto output =
+        OutputMatrixMap(out_data_ + i * order * rhs_cols, order, rhs_cols);
+    Eigen::PartialPivLU<Matrix> lu_decomposition(order);
+    lu_decomposition.compute(input_matrix);
+    const Treal min_abs_piv =
+        lu_decomposition.matrixLU().diagonal().cwiseAbs().minCoeff();
+    PADDLE_ENFORCE_GT(min_abs_piv, Treal(0),
+                      platform::errors::InvalidArgument(
+                          "Something's wrong with SolveLinearSystem. "));
+    output = lu_decomposition.solve(input_rhs);
+  }
+}
 template <typename DeviceContext, typename T>
 class MatrixSolveFunctor {
 public:

--- a/paddle/fluid/operators/svd_helper.h
+++ b/paddle/fluid/operators/svd_helper.h
@@ -96,6 +96,20 @@ struct PowFunctor {
  float exp_;
 };
+template <typename T>
+struct RealMulComplexFunctor {
+  // x: complex number (a+bj)
+  // y: complex number (c+0j) pretend to be a real number
+  // out: complex number (ac+bcj)
+  inline HOSTDEVICE T operator()(T x, T y) {
+    PADDLE_ENFORCE_LT(y.imag, 1e-6, platform::errors::InvalidArgument(
+                                        "The image part of y must to be 0"
+                                        "but got [%d]",
+                                        y.imag));
+    return platform::complex<Real<T>>(x.real * y.real, x.imag * y.real);
+  }
+};
 static std::vector<int> GetBroadcastShape(InTensors ins) {
  PADDLE_ENFORCE_EQ(ins.size(), 2, platform::errors::InvalidArgument(
                                       "GetBroadcastShape Receive 2 tensors"
@@ -286,6 +300,45 @@ struct DeviceIndependenceTensorOperations {
    for_range(DiagFunctor<T>(x.data<T>(), x.numel(), output));
    return ret;
  }
+  // batch_diag for CPU only
+  Tensor BatchDiag(const Tensor& x, int batch) {
+    Tensor out;
+    auto* x_data = x.data<math::Real<T>>();
+    auto numel = x.numel();
+    auto* out_data = out.mutable_data<math::Real<T>>(
+        x.dims(), context.GetPlace(),
+        static_cast<size_t>(numel * sizeof(math::Real<T>)));
+    auto x_dims = x.dims();
+    int num_dims = x_dims.size();
+    std::vector<int> out_shape;
+    for (int i = 0; i < num_dims - 1; ++i) {
+      out_shape.push_back(x.dims()[i]);
+    }
+    out.Resize(framework::make_ddim(out_shape));
+    int order = x.dims()[num_dims - 1];
+    int stride_out = order * order;
+    int stride_in = order + 1;
+    for (int i = 0; i < batch; ++i) {
+      for (int j = 0; j < order; ++j) {
+        out_data[i * order + j] = x_data[stride_out * i + stride_in * j];
+      }
+    }
+    return out;
+  }
+  // a complex number x times a real number y, which is represented as (a+0j)
+  Tensor RealMulComplex(const Tensor& x, const Tensor& y) {
+    framework::Tensor ret;
+    std::vector<int> out_shape = GetBroadcastShape({&x, &y});
+    ret.Resize(framework::make_ddim(out_shape));
+    ElementwiseComputeEx<RealMulComplexFunctor<T>, DeviceContext, T>(
+        context, &x, &y, -1, RealMulComplexFunctor<T>(), &ret);
+    return ret;
+  }
  framework::Tensor Div(const framework::Tensor& x,
                        const framework::Tensor& y) {
    framework::Tensor ret;
@@ -459,6 +512,19 @@ struct DeviceIndependenceTensorOperations {
    return out;
  }
+  Tensor Real(const Tensor& x) {
+    Tensor out;
+    auto numel = x.numel();
+    auto* out_data = out.mutable_data<math::Real<T>>(
+        x.dims(), context.GetPlace(),
+        static_cast<size_t>(numel * sizeof(math::Real<T>)));
+    auto* x_data = x.data<T>();
+    auto for_range = GetForRange(numel);
+    math::RealFunctor<T> functor(x_data, out_data, numel);
+    for_range(functor);
+    return out;
+  }
  Tensor DiagFill(const int m, const int n, const int num_lower_diags,
                  const int num_upper_diags, const Tensor& scale,
                  const Tensor& input) {

--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -134,6 +134,10 @@ def get_numeric_gradient(place,
        delta = np.array(delta).astype(np.float16)
    elif tensor_to_check_dtype == core.VarDesc.VarType.BF16:
        tensor_to_check_dtype = np.float32
+    elif tensor_to_check_dtype == core.VarDesc.VarType.COMPLEX64:
+        tensor_to_check_dtype = np.complex64
+    elif tensor_to_check_dtype == core.VarDesc.VarType.COMPLEX128:
+        tensor_tp_check_dtype = np.complex128
    else:
        raise ValueError("Not supported data type " + str(
            tensor_to_check_dtype))

--- a/python/paddle/fluid/tests/unittests/test_eig_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eig_op.py
+#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from op_test import OpTest, skip_check_grad_ci
+import unittest
+from paddle.fluid.op import Operator
+from paddle.fluid import compiler, Program, program_guard
+# cast output to complex for numpy.linalg.eig
+def cast_to_complex(input, output):
+    if (input.dtype == np.float32):
+        output = output.astype(np.complex64)
+    elif (input.dtype == np.float64):
+        output = output.astype(np.complex128)
+    return output
+# define eig backward function for a single square matrix
+def eig_backward(w, v, grad_w, grad_v):
+    v_tran = np.transpose(v)
+    v_tran = np.conjugate(v_tran)
+    w_conj = np.conjugate(w)
+    w_conj_l = w_conj.reshape(1, w.size)
+    w_conj_r = w_conj.reshape(w.size, 1)
+    w_conj_2d = w_conj_l - w_conj_r
+    vhgv = np.matmul(v_tran, grad_v)
+    real_vhgv = np.real(vhgv)
+    diag_real = real_vhgv.diagonal()
+    diag_2d = diag_real.reshape(1, w.size)
+    rhs = v * diag_2d
+    mid = np.matmul(v_tran, rhs)
+    result = vhgv - mid
+    res = np.divide(result, w_conj_2d)
+    row, col = np.diag_indices_from(res)
+    res[row, col] = 1.0
+    tmp = np.matmul(res, v_tran)
+    dx = np.linalg.solve(v_tran, tmp)
+    return dx
+class TestEigOp(OpTest):
+    def setUp(self):
+        paddle.enable_static()
+        paddle.device.set_device("cpu")
+        self.op_type = "eig"
+        self.__class__.op_type = self.op_type
+        self.init_input()
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x)}
+        self.outputs = {'Eigenvalues': self.out[0], 'Eigenvectors': self.out[1]}
+    def init_input(self):
+        self.set_dtype()
+        self.set_dims()
+        self.x = np.random.random(self.shape).astype(self.dtype)
+        self.out = np.linalg.eig(self.x)
+        self.out = (cast_to_complex(self.x, self.out[0]),
+                    cast_to_complex(self.x, self.out[1]))
+    # for the real input, a customized checker is needed
+    def checker(self, outs):
+        actual_out_w = outs[0].flatten()
+        expect_out_w = self.out[0].flatten()
+        actual_out_v = outs[1].flatten()
+        expect_out_v = self.out[1].flatten()
+        length_w = len(expect_out_w)
+        act_w_real = np.sort(
+            np.array([np.abs(actual_out_w[i].real) for i in range(length_w)]))
+        act_w_imag = np.sort(
+            np.array([np.abs(actual_out_w[i].imag) for i in range(length_w)]))
+        exp_w_real = np.sort(
+            np.array([np.abs(expect_out_w[i].real) for i in range(length_w)]))
+        exp_w_imag = np.sort(
+            np.array([np.abs(expect_out_w[i].imag) for i in range(length_w)]))
+        for i in range(length_w):
+            self.assertTrue(
+                np.allclose(act_w_real[i], exp_w_real[i], 1e-6, 1e-5),
+                "The eigenvalues real part have diff: \nExpected " +
+                str(act_w_real[i]) + "\n" + "But got: " + str(exp_w_real[i]))
+            self.assertTrue(
+                np.allclose(act_w_imag[i], exp_w_imag[i], 1e-6, 1e-5),
+                "The eigenvalues image part have diff: \nExpected " +
+                str(act_w_imag[i]) + "\n" + "But got: " + str(exp_w_imag[i]))
+        length_v = len(expect_out_v)
+        act_v_real = np.sort(
+            np.array([np.abs(actual_out_v[i].real) for i in range(length_v)]))
+        act_v_imag = np.sort(
+            np.array([np.abs(actual_out_v[i].imag) for i in range(length_v)]))
+        exp_v_real = np.sort(
+            np.array([np.abs(expect_out_v[i].real) for i in range(length_v)]))
+        exp_v_imag = np.sort(
+            np.array([np.abs(expect_out_v[i].imag) for i in range(length_v)]))
+        for i in range(length_v):
+            self.assertTrue(
+                np.allclose(act_v_real[i], exp_v_real[i], 1e-6, 1e-5),
+                "The eigenvectors real part have diff: \nExpected " +
+                str(act_v_real[i]) + "\n" + "But got: " + str(exp_v_real[i]))
+            self.assertTrue(
+                np.allclose(act_v_imag[i], exp_v_imag[i], 1e-6, 1e-5),
+                "The eigenvectors image part have diff: \nExpected " +
+                str(act_v_imag[i]) + "\n" + "But got: " + str(exp_v_imag[i]))
+    def set_dtype(self):
+        self.dtype = np.complex64
+    def set_dims(self):
+        self.shape = (10, 10)
+    def init_grad(self):
+        # grad_w, grad_v complex dtype
+        gtype = self.dtype
+        if self.dtype == np.float32:
+            gtype = np.complex64
+        elif self.dtype == np.float64:
+            gtype = np.complex128
+        self.grad_w = np.ones(self.out[0].shape, gtype)
+        self.grad_v = np.ones(self.out[1].shape, gtype)
+        self.grad_x = eig_backward(self.out[0], self.out[1], self.grad_w,
+                                   self.grad_v)
+    def test_check_output(self):
+        self.check_output_with_place_customized(
+            checker=self.checker, place=core.CPUPlace())
+    def test_check_grad(self):
+        self.init_grad()
+        self.check_grad(
+            ['X'], ['Eigenvalues', 'Eigenvectors'],
+            user_defined_grads=[self.grad_x],
+            user_defined_grad_outputs=[self.grad_w, self.grad_v])
+class TestComplex128(TestEigOp):
+    def set_dtype(self):
+        self.dtype = np.complex128
+@skip_check_grad_ci(
+    reason="For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig"
+)
+class TestDouble(TestEigOp):
+    def set_dtype(self):
+        self.dtype = np.float64
+    def test_check_grad(self):
+        pass
+@skip_check_grad_ci(
+    reason="For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig"
+)
+class TestEigBatchMarices(TestEigOp):
+    def set_dtype(self):
+        self.dtype = np.float64
+    def set_dims(self):
+        self.shape = (3, 10, 10)
+    def test_check_grad(self):
+        pass
+@skip_check_grad_ci(
+    reason="For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig"
+)
+class TestFloat(TestEigOp):
+    def set_dtype(self):
+        self.dtype = np.float32
+    def test_check_grad(self):
+        pass
+class TestEigStatic(TestEigOp):
+    def test_check_output_with_place(self):
+        paddle.enable_static()
+        place = core.CPUPlace()
+        input_np = np.random.random([3, 3]).astype('complex')
+        expect_val, expect_vec = np.linalg.eig(input_np)
+        with fluid.program_guard(fluid.Program(), fluid.Program()):
+            input = fluid.data(name="input", shape=[3, 3], dtype='complex')
+            act_val, act_vec = paddle.linalg.eig(input)
+            exe = fluid.Executor(place)
+            fetch_val, fetch_vec = exe.run(fluid.default_main_program(),
+                                           feed={"input": input_np},
+                                           fetch_list=[act_val, act_vec])
+        self.assertTrue(
+            np.allclose(expect_val, fetch_val, 1e-6, 1e-6),
+            "The eigen values have diff: \nExpected " + str(expect_val) + "\n" +
+            "But got: " + str(fetch_val))
+        self.assertTrue(
+            np.allclose(np.abs(expect_vec), np.abs(fetch_vec), 1e-6, 1e-6),
+            "The eigen vectors have diff: \nExpected " +
+            str(np.abs(expect_vec)) + "\n" + "But got: " +
+            str(np.abs(fetch_vec)))
+class TestEigWrongDimsError(unittest.TestCase):
+    def test_error(self):
+        paddle.device.set_device("cpu")
+        paddle.disable_static()
+        a = np.random.random((3)).astype('float32')
+        x = paddle.to_tensor(a)
+        self.assertRaises(ValueError, paddle.linalg.eig, x)
+class TestEigNotSquareError(unittest.TestCase):
+    def test_error(self):
+        paddle.device.set_device("cpu")
+        paddle.disable_static()
+        a = np.random.random((1, 2, 3)).astype('float32')
+        x = paddle.to_tensor(a)
+        self.assertRaises(ValueError, paddle.linalg.eig, x)
+class TestEigUnsupportedDtypeError(unittest.TestCase):
+    def test_error(self):
+        paddle.device.set_device("cpu")
+        paddle.disable_static()
+        a = (np.random.random((3, 3)) * 10).astype('int64')
+        x = paddle.to_tensor(a)
+        self.assertRaises(ValueError, paddle.linalg.eig, x)
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/linalg.py
+++ b/python/paddle/linalg.py
@@ -14,6 +14,7 @@
 from .tensor.linalg import cholesky  # noqa: F401
 from .tensor.linalg import norm  # noqa: F401
+from .tensor.linalg import eig  # noqa: F401
 from .tensor.linalg import cond  # noqa: F401
 from .tensor.linalg import matrix_power  # noqa: F401
 from .tensor.linalg import solve  # noqa: F401
@@ -32,6 +33,7 @@ __all__ = [
    'norm',
    'cond',
    'inv',
+    'eig',
    'eigvals',
    'multi_dot',
    'matrix_rank',

--- a/python/paddle/tensor/__init__.py
+++ b/python/paddle/tensor/__init__.py
@@ -45,6 +45,7 @@ from .linalg import cholesky  # noqa: F401
 from .linalg import bmm  # noqa: F401
 from .linalg import histogram  # noqa: F401
 from .linalg import mv  # noqa: F401
+from .linalg import eig  # noqa: F401
 from .linalg import matrix_power  # noqa: F401
 from .linalg import eigvals  # noqa: F401
 from .linalg import multi_dot  # noqa: F401
@@ -386,6 +387,7 @@ tensor_method_func  = [ #noqa
           'bitwise_xor',
           'bitwise_not',
           'broadcast_tensors',
+           'eig',
           'uniform_',
           'multi_dot',
           'solve',

--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
@@ -23,6 +23,7 @@ import paddle
 from paddle.common_ops_import import core
 from paddle.common_ops_import import VarDesc
 from paddle import _C_ops
+import paddle
 __all__ = []
@@ -1593,6 +1594,72 @@ def matrix_power(x, n, name=None):
    return out
+def eig(x, name=None):
+    """
+    This API performs the eigenvalue decomposition of a square matrix or a batch of square matrices.
+    .. note::
+        If the matrix is a Hermitian or a real symmetric matrix, please use :ref:`paddle.linalg.eigh` instead, which is much faster.
+        If only eigenvalues is needed, please use :ref:`paddle.linalg.eigvals` instead.
+        If the matrix is of any shape, please use :ref:`paddle.linalg.svd`.
+        This API is only supported on CPU device.
+        The output datatype is always complex for both real and complex input.
+    Args:
+        x (Tensor): A tensor with shape math:`[*, N, N]`, The data type of the x should be one of ``float32``,
+            ``float64``, ``compplex64`` or ``complex128``.
+        name (str, optional): The default value is `None`. Normally there is no need for user to set 
+            this property. For more information, please refer to :ref:`api_guide_Name`.
+    Returns:
+        Eigenvalues(Tensors): A tensor with shape math:`[*, N]` refers to the eigen values.
+        Eigenvectors(Tensors): A tensor with shape math:`[*, N, N]` refers to the eigen vectors.
+    Examples:
+        .. code-block:: python
+            import paddle
+            import numpy as np
+            paddle.device.set_device("cpu")
+            x_data = np.array([[1.6707249, 7.2249975, 6.5045543],
+                               [9.956216,  8.749598,  6.066444 ],
+                               [4.4251957, 1.7983172, 0.370647 ]]).astype("float32")
+            x = paddle.to_tensor(x_data)
+            w, v = paddle.linalg.eig(x)
+            print(w)
+            # Tensor(shape=[3, 3], dtype=complex128, place=CPUPlace, stop_gradient=False,
+            #       [[(-0.5061363550800655+0j) , (-0.7971760990842826+0j) ,
+            #         (0.18518077798279986+0j)],
+            #        [(-0.8308237755993192+0j) ,  (0.3463813401919749+0j) ,
+            #         (-0.6837005269141947+0j) ],
+            #        [(-0.23142567697893396+0j),  (0.4944999840400175+0j) ,
+            #         (0.7058765252952796+0j) ]])
+            print(v)
+            # Tensor(shape=[3], dtype=complex128, place=CPUPlace, stop_gradient=False,
+            #       [ (16.50471283351188+0j)  , (-5.5034820550763515+0j) ,
+            #         (-0.21026087843552282+0j)])
+    """
+    if in_dygraph_mode():
+        w, v = _C_ops.eig(x)
+        return w, v
+    check_variable_and_dtype(
+        x, 'X', ['float32', 'float64', 'complex64', 'complex128'], 'eig')
+    helper = LayerHelper('eig', **locals())
+    w = helper.create_variable_for_type_inference(x.dtype)
+    v = helper.create_variable_for_type_inference(x.dtype)
+    inputs = {'X': x}
+    outputs = {'Eigenvalues': w, 'Eigenvectors': v}
+    helper.append_op(type='eig', inputs=inputs, outputs=outputs)
+    return w, v
 def eigvals(x, name=None):
    """
    Compute the eigenvalues of one or more general matrices.