Add complex type compatibility for stft api and stft op. (#40113)

* Add stft_op. * Add stft_grad_op. * Add stft_op unittest. * [DLTP-45176] Add complex compatibility in static mode for stft api. * [DLTP-45176] Add complex compatibility in static mode for stft api. * Add doc. * Update unitests of stft op. * Update spectral helper. * fix coding style.

Add complex type compatibility for stft api and stft op. (#40113)
* Add stft_op. * Add stft_grad_op. * Add stft_op unittest. * [DLTP-45176] Add complex compatibility in static mode for stft api. * [DLTP-45176] Add complex compatibility in static mode for stft api. * Add doc. * Update unitests of stft op. * Update spectral helper. * fix coding style.
319f95d0 · KP · GitHub · 3d0be938 · 319f95d0 · 319f95d0
20 changed file
--- a/paddle/fluid/operators/frame_op.cc
+++ b/paddle/fluid/operators/frame_op.cc
@@ -64,18 +64,26 @@ class FrameOp : public framework::OperatorWithKernel {
      end_axis = x_rank - 2;
    }
-    PADDLE_ENFORCE_LE(frame_length, seq_length,
+    bool contain_unknown_dim = phi::contain_unknown_dim(x_dims);
-                      platform::errors::InvalidArgument(
+    bool check = ctx->IsRuntime() || !contain_unknown_dim;
-                          "Attribute(frame_length) of FrameOp should be less "
+    if (check) {
-                          "equal than sequence length, but got (%s) > (%s).",
+      PADDLE_ENFORCE_LE(frame_length, seq_length,
-                          frame_length, seq_length));
+                        platform::errors::InvalidArgument(
+                            "Attribute(frame_length) of FrameOp should be less "
+                            "equal than sequence length, but got (%s) > (%s).",
+                            frame_length, seq_length));
+    }
    // It won't go into for loop when x_rank == 1U.
    for (int i = start_axis; i <= end_axis; i++) {
      output_shape.push_back(x_dims[i]);
    }
-    n_frames = 1 + (seq_length - frame_length) / hop_length;
+    if (seq_length == -1) {
+      n_frames = -1;
+    } else {
+      n_frames = 1 + (seq_length - frame_length) / hop_length;
+    }
    if (axis == 0) {
      // (n_frames, frame_length, ...)

--- a/paddle/fluid/operators/mean_op.cc
+++ b/paddle/fluid/operators/mean_op.cc
@@ -98,9 +98,17 @@ REGISTER_OP_CPU_KERNEL(
    mean, ops::MeanKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MeanKernel<paddle::platform::CPUDeviceContext, double>,
    ops::MeanKernel<paddle::platform::CPUDeviceContext,
-                    paddle::platform::bfloat16>);
+                    paddle::platform::bfloat16>,
+    ops::MeanKernel<paddle::platform::CPUDeviceContext,
+                    paddle::platform::complex<float>>,
+    ops::MeanKernel<paddle::platform::CPUDeviceContext,
+                    paddle::platform::complex<double>>);
 REGISTER_OP_CPU_KERNEL(
    mean_grad, ops::MeanGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MeanGradKernel<paddle::platform::CPUDeviceContext, double>,
    ops::MeanGradKernel<paddle::platform::CPUDeviceContext,
-                        paddle::platform::bfloat16>);
+                        paddle::platform::bfloat16>,
+    ops::MeanGradKernel<paddle::platform::CPUDeviceContext,
+                        paddle::platform::complex<float>>,
+    ops::MeanGradKernel<paddle::platform::CPUDeviceContext,
+                        paddle::platform::complex<double>>);
--- a/paddle/fluid/operators/mean_op.cu
+++ b/paddle/fluid/operators/mean_op.cu
@@ -102,10 +102,17 @@ namespace plat = paddle::platform;
 REGISTER_OP_CUDA_KERNEL(
    mean, ops::MeanCUDAKernel<paddle::platform::CUDADeviceContext, float>,
    ops::MeanCUDAKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::MeanCUDAKernel<paddle::platform::CUDADeviceContext, plat::float16>);
+    ops::MeanCUDAKernel<paddle::platform::CUDADeviceContext, plat::float16>,
+    ops::MeanCUDAKernel<paddle::platform::CUDADeviceContext,
+                        paddle::platform::complex<float>>,
+    ops::MeanCUDAKernel<paddle::platform::CUDADeviceContext,
+                        paddle::platform::complex<double>>);
 REGISTER_OP_CUDA_KERNEL(
    mean_grad,
    ops::MeanCUDAGradKernel<paddle::platform::CUDADeviceContext, float>,
    ops::MeanCUDAGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::MeanCUDAGradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
    ops::MeanCUDAGradKernel<paddle::platform::CUDADeviceContext,
-                            plat::float16>);
+                            paddle::platform::complex<float>>,
+    ops::MeanCUDAGradKernel<paddle::platform::CUDADeviceContext,
+                            paddle::platform::complex<double>>);
--- a/paddle/fluid/operators/overlap_add_op.cc
+++ b/paddle/fluid/operators/overlap_add_op.cc
@@ -54,6 +54,7 @@ class OverlapAddOp : public framework::OperatorWithKernel {
    std::vector<int64_t> output_shape;
    int n_frames;
    int frame_length;
+    int seq_length;
    int start_axis;
    int end_axis;
@@ -69,14 +70,22 @@ class OverlapAddOp : public framework::OperatorWithKernel {
      end_axis = x_rank - 3;
    }
-    PADDLE_ENFORCE_LE(
+    bool contain_unknown_dim = phi::contain_unknown_dim(x_dims);
-        hop_length, frame_length,
+    bool check = ctx->IsRuntime() || !contain_unknown_dim;
-        platform::errors::InvalidArgument(
+    if (check) {
-            "Attribute(hop_length) of OverlapAddOp should be less or equal "
+      PADDLE_ENFORCE_LE(
-            "than frame_length, but got hop_length(%s) > frame_length(%s).",
+          hop_length, frame_length,
-            hop_length, frame_length));
+          platform::errors::InvalidArgument(
+              "Attribute(hop_length) of OverlapAddOp should be less or equal "
+              "than frame_length, but got hop_length(%s) > frame_length(%s).",
+              hop_length, frame_length));
+    }
-    const int seq_length = (n_frames - 1) * hop_length + frame_length;
+    if (n_frames == -1) {
+      seq_length = -1;
+    } else {
+      seq_length = (n_frames - 1) * hop_length + frame_length;
+    }
    // It won't go into for loop when x_rank == 2U.
    for (int i = start_axis; i <= end_axis; i++) {

--- a/paddle/fluid/operators/spectral_helper.h
+++ b/paddle/fluid/operators/spectral_helper.h
--- a/paddle/fluid/operators/spectral_op.cc
+++ b/paddle/fluid/operators/spectral_op.cc
@@ -13,28 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/operators/spectral_op.h"
+#include "paddle/fluid/operators/spectral_helper.h"
-#include <algorithm>
-#include <functional>
-#include <memory>
-#include <numeric>
-#include <string>
-#include <vector>
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/operators/transpose_op.h"
-#include "paddle/fluid/platform/complex.h"
-#include "paddle/phi/kernels/funcs/complex_functors.h"
-#if defined(PADDLE_WITH_ONEMKL)
-#include "paddle/phi/backends/dynload/mklrt.h"
-#elif defined(PADDLE_WITH_POCKETFFT)
-#include "extern_pocketfft/pocketfft_hdronly.h"
-#endif
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/for_range.h"
 namespace paddle {
 namespace operators {
@@ -355,465 +334,6 @@ FFTNormMode get_norm_from_string(const std::string& norm, bool forward) {
      norm));
 }
-// FFT Functors
-#if defined(PADDLE_WITH_ONEMKL)
-#define MKL_DFTI_CHECK(expr)                                                   \
-  do {                                                                         \
-    MKL_LONG status = (expr);                                                  \
-    if (!phi::dynload::DftiErrorClass(status, DFTI_NO_ERROR))                  \
-      PADDLE_THROW(                                                            \
-          platform::errors::External(phi::dynload::DftiErrorMessage(status))); \
-  } while (0);
-namespace {
-struct DftiDescriptorDeleter {
-  void operator()(DFTI_DESCRIPTOR_HANDLE handle) {
-    if (handle != nullptr) {
-      MKL_DFTI_CHECK(phi::dynload::DftiFreeDescriptor(&handle));
-    }
-  }
-};
-// A RAII wrapper for MKL_DESCRIPTOR*
-class DftiDescriptor {
- public:
-  void init(DFTI_CONFIG_VALUE precision, DFTI_CONFIG_VALUE signal_type,
-            MKL_LONG signal_ndim, MKL_LONG* sizes) {
-    PADDLE_ENFORCE_EQ(desc_.get(), nullptr,
-                      platform::errors::AlreadyExists(
-                          "DftiDescriptor has already been initialized."));
-    DFTI_DESCRIPTOR* raw_desc;
-    MKL_DFTI_CHECK(phi::dynload::DftiCreateDescriptorX(
-        &raw_desc, precision, signal_type, signal_ndim, sizes));
-    desc_.reset(raw_desc);
-  }
-  DFTI_DESCRIPTOR* get() const {
-    DFTI_DESCRIPTOR* raw_desc = desc_.get();
-    PADDLE_ENFORCE_NOT_NULL(raw_desc,
-                            platform::errors::PreconditionNotMet(
-                                "DFTI DESCRIPTOR has not been initialized."));
-    return raw_desc;
-  }
- private:
-  std::unique_ptr<DFTI_DESCRIPTOR, DftiDescriptorDeleter> desc_;
-};
-DftiDescriptor _plan_mkl_fft(const framework::proto::VarType::Type& in_dtype,
-                             const framework::proto::VarType::Type& out_dtype,
-                             const framework::DDim& in_strides,
-                             const framework::DDim& out_strides,
-                             const std::vector<int>& signal_sizes,
-                             FFTNormMode normalization, bool forward) {
-  const DFTI_CONFIG_VALUE precision = [&] {
-    switch (in_dtype) {
-      case framework::proto::VarType::FP32:
-        return DFTI_SINGLE;
-      case framework::proto::VarType::COMPLEX64:
-        return DFTI_SINGLE;
-      case framework::proto::VarType::FP64:
-        return DFTI_DOUBLE;
-      case framework::proto::VarType::COMPLEX128:
-        return DFTI_DOUBLE;
-      default:
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "Invalid input datatype (%s), input data type should be FP32, "
-            "FP64, COMPLEX64 or COMPLEX128.",
-            framework::DataTypeToString(in_dtype)));
-    }
-  }();
-  // C2C, R2C, C2R
-  const FFTTransformType fft_type = GetFFTTransformType(in_dtype, out_dtype);
-  const DFTI_CONFIG_VALUE domain =
-      (fft_type == FFTTransformType::C2C) ? DFTI_COMPLEX : DFTI_REAL;
-  DftiDescriptor descriptor;
-  std::vector<MKL_LONG> fft_sizes(signal_sizes.cbegin(), signal_sizes.cend());
-  const MKL_LONG signal_ndim = fft_sizes.size() - 1;
-  descriptor.init(precision, domain, signal_ndim, fft_sizes.data() + 1);
-  // placement inplace or not inplace
-  MKL_DFTI_CHECK(phi::dynload::DftiSetValue(descriptor.get(), DFTI_PLACEMENT,
-                                            DFTI_NOT_INPLACE));
-  // number of transformations
-  const MKL_LONG batch_size = fft_sizes[0];
-  MKL_DFTI_CHECK(phi::dynload::DftiSetValue(
-      descriptor.get(), DFTI_NUMBER_OF_TRANSFORMS, batch_size));
-  // input & output distance
-  const MKL_LONG idist = in_strides[0];
-  const MKL_LONG odist = out_strides[0];
-  MKL_DFTI_CHECK(
-      phi::dynload::DftiSetValue(descriptor.get(), DFTI_INPUT_DISTANCE, idist));
-  MKL_DFTI_CHECK(phi::dynload::DftiSetValue(descriptor.get(),
-                                            DFTI_OUTPUT_DISTANCE, odist));
-  // input & output stride
-  std::vector<MKL_LONG> mkl_in_stride(1 + signal_ndim, 0);
-  std::vector<MKL_LONG> mkl_out_stride(1 + signal_ndim, 0);
-  for (MKL_LONG i = 1; i <= signal_ndim; i++) {
-    mkl_in_stride[i] = in_strides[i];
-    mkl_out_stride[i] = out_strides[i];
-  }
-  MKL_DFTI_CHECK(phi::dynload::DftiSetValue(
-      descriptor.get(), DFTI_INPUT_STRIDES, mkl_in_stride.data()));
-  MKL_DFTI_CHECK(phi::dynload::DftiSetValue(
-      descriptor.get(), DFTI_OUTPUT_STRIDES, mkl_out_stride.data()));
-  // conjugate even storage
-  if (!(fft_type == FFTTransformType::C2C)) {
-    MKL_DFTI_CHECK(phi::dynload::DftiSetValue(
-        descriptor.get(), DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX));
-  }
-  MKL_LONG signal_numel =
-      std::accumulate(fft_sizes.cbegin() + 1, fft_sizes.cend(), 1UL,
-                      std::multiplies<MKL_LONG>());
-  if (normalization != FFTNormMode::none) {
-    const double scale =
-        ((normalization == FFTNormMode::by_sqrt_n)
-             ? 1.0 / std::sqrt(static_cast<double>(signal_numel))
-             : 1.0 / static_cast<double>(signal_numel));
-    const auto scale_direction = [&]() {
-      if (fft_type == FFTTransformType::R2C ||
-          (fft_type == FFTTransformType::C2C && forward)) {
-        return DFTI_FORWARD_SCALE;
-      } else {
-        // (fft_type == FFTTransformType::C2R ||
-        //          (fft_type == FFTTransformType::C2C && !forward))
-        return DFTI_BACKWARD_SCALE;
-      }
-    }();
-    MKL_DFTI_CHECK(
-        phi::dynload::DftiSetValue(descriptor.get(), scale_direction, scale));
-  }
-  // commit the descriptor
-  MKL_DFTI_CHECK(phi::dynload::DftiCommitDescriptor(descriptor.get()));
-  return descriptor;
-}
-// Execute a general fft operation (can be c2c, onesided r2c or onesided c2r)
-template <typename DeviceContext, typename Ti, typename To>
-void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out,
-              const std::vector<int64_t>& axes, FFTNormMode normalization,
-              bool forward) {
-  const framework::DDim& in_sizes = x->dims();
-  const int ndim = in_sizes.size();
-  const int signal_ndim = axes.size();
-  const int batch_ndim = ndim - signal_ndim;
-  const framework::DDim& out_sizes = out->dims();
-  // make a dim permutation
-  std::vector<int> dim_permute(ndim);
-  std::iota(dim_permute.begin(), dim_permute.end(), 0);
-  std::vector<bool> is_transformed_dim(ndim, false);
-  for (const auto& d : axes) {
-    is_transformed_dim[d] = true;
-  }
-  const auto batch_end =
-      std::partition(dim_permute.begin(), dim_permute.end(),
-                     [&](size_t axis) { return !is_transformed_dim[axis]; });
-  std::copy(axes.cbegin(), axes.cend(), batch_end);
-  // transpose input according to that permutation
-  framework::DDim transposed_input_shape = in_sizes.transpose(dim_permute);
-  std::vector<int64_t> transposed_input_shape_ =
-      phi::vectorize(transposed_input_shape);
-  framework::Tensor transposed_input;
-  transposed_input.Resize(transposed_input_shape);
-  const auto place = ctx.GetPlace();
-  transposed_input.mutable_data<Ti>(place);
-  TransCompute<platform::CPUDeviceContext, Ti>(ndim, ctx, *x, &transposed_input,
-                                               dim_permute);
-  // make an collapsed input: collapse batch axes for input
-  const int batch_size = std::accumulate(
-      transposed_input_shape.Get(), transposed_input_shape.Get() + batch_ndim,
-      1L, std::multiplies<int64_t>());
-  std::vector<int> collapsed_input_shape_(1 + signal_ndim);
-  collapsed_input_shape_[0] = batch_size;
-  std::copy(transposed_input_shape_.begin() + batch_ndim,
-            transposed_input_shape_.end(), collapsed_input_shape_.begin() + 1);
-  const framework::DDim collapsed_input_shape =
-      phi::make_ddim(collapsed_input_shape_);
-  transposed_input.Resize(collapsed_input_shape);
-  framework::Tensor& collapsed_input = transposed_input;
-  // make a collapsed output
-  std::vector<int> collapsed_output_shape_(1 + signal_ndim);
-  collapsed_output_shape_[0] = batch_size;
-  for (int i = 0; i < signal_ndim; i++) {
-    collapsed_output_shape_[1 + i] = out_sizes[axes[i]];
-  }
-  const framework::DDim collapsed_output_shape =
-      phi::make_ddim(collapsed_output_shape_);
-  framework::Tensor collapsed_output;
-  collapsed_output.Resize(collapsed_output_shape);
-  collapsed_output.mutable_data(place, out->type());
-  // signal sizes
-  std::vector<int> signal_sizes(1 + signal_ndim);
-  signal_sizes[0] = batch_size;
-  for (int i = 0; i < signal_ndim; i++) {
-    signal_sizes[1 + i] =
-        std::max(collapsed_input_shape[1 + i], collapsed_output_shape[1 + i]);
-  }
-  // input & output stride
-  const framework::DDim input_stride = phi::stride(collapsed_input_shape);
-  const framework::DDim output_stride = phi::stride(collapsed_output_shape);
-  // make a DFTI_DESCRIPTOR
-  DftiDescriptor desc =
-      _plan_mkl_fft(framework::TransToProtoVarType(x->dtype()),
-                    framework::TransToProtoVarType(out->dtype()), input_stride,
-                    output_stride, signal_sizes, normalization, forward);
-  const FFTTransformType fft_type =
-      GetFFTTransformType(framework::TransToProtoVarType(x->dtype()),
-                          framework::TransToProtoVarType(out->type()));
-  if (fft_type == FFTTransformType::C2R && forward) {
-    framework::Tensor collapsed_input_conj(collapsed_input.dtype());
-    collapsed_input_conj.mutable_data<Ti>(collapsed_input.dims(),
-                                          ctx.GetPlace());
-    // conjugate the input
-    platform::ForRange<DeviceContext> for_range(ctx, collapsed_input.numel());
-    phi::funcs::ConjFunctor<Ti> functor(collapsed_input.data<Ti>(),
-                                        collapsed_input.numel(),
-                                        collapsed_input_conj.data<Ti>());
-    for_range(functor);
-    MKL_DFTI_CHECK(phi::dynload::DftiComputeBackward(
-        desc.get(), collapsed_input_conj.data(), collapsed_output.data()));
-  } else if (fft_type == FFTTransformType::R2C && !forward) {
-    framework::Tensor collapsed_output_conj(collapsed_output.dtype());
-    collapsed_output_conj.mutable_data<To>(collapsed_output.dims(),
-                                           ctx.GetPlace());
-    MKL_DFTI_CHECK(phi::dynload::DftiComputeForward(
-        desc.get(), collapsed_input.data(), collapsed_output_conj.data()));
-    // conjugate the output
-    platform::ForRange<DeviceContext> for_range(ctx, collapsed_output.numel());
-    phi::funcs::ConjFunctor<To> functor(collapsed_output_conj.data<To>(),
-                                        collapsed_output.numel(),
-                                        collapsed_output.data<To>());
-    for_range(functor);
-  } else {
-    if (forward) {
-      MKL_DFTI_CHECK(phi::dynload::DftiComputeForward(
-          desc.get(), collapsed_input.data(), collapsed_output.data()));
-    } else {
-      MKL_DFTI_CHECK(phi::dynload::DftiComputeBackward(
-          desc.get(), collapsed_input.data(), collapsed_output.data()));
-    }
-  }
-  // resize for the collapsed output
-  framework::DDim transposed_output_shape = out_sizes.transpose(dim_permute);
-  collapsed_output.Resize(transposed_output_shape);
-  framework::Tensor& transposed_output = collapsed_output;
-  // reverse the transposition
-  std::vector<int> reverse_dim_permute(ndim);
-  for (int i = 0; i < ndim; i++) {
-    reverse_dim_permute[dim_permute[i]] = i;
-  }
-  TransCompute<platform::CPUDeviceContext, To>(ndim, ctx, transposed_output,
-                                               out, reverse_dim_permute);
-}
-}  // anonymous namespace
-template <typename Ti, typename To>
-struct FFTC2CFunctor<platform::CPUDeviceContext, Ti, To> {
-  void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x,
-                  Tensor* out, const std::vector<int64_t>& axes,
-                  FFTNormMode normalization, bool forward) {
-    exec_fft<platform::CPUDeviceContext, Ti, To>(ctx, x, out, axes,
-                                                 normalization, forward);
-  }
-};
-template <typename Ti, typename To>
-struct FFTR2CFunctor<platform::CPUDeviceContext, Ti, To> {
-  void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x,
-                  Tensor* out, const std::vector<int64_t>& axes,
-                  FFTNormMode normalization, bool forward) {
-    exec_fft<platform::CPUDeviceContext, Ti, To>(ctx, x, out, axes,
-                                                 normalization, forward);
-  }
-};
-template <typename Ti, typename To>
-struct FFTC2RFunctor<platform::CPUDeviceContext, Ti, To> {
-  void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x,
-                  Tensor* out, const std::vector<int64_t>& axes,
-                  FFTNormMode normalization, bool forward) {
-    if (axes.size() > 1) {
-      const std::vector<int64_t> c2c_dims(axes.begin(), axes.end() - 1);
-      Tensor temp;
-      temp.mutable_data<Ti>(x->dims(), ctx.GetPlace());
-      FFTC2CFunctor<platform::CPUDeviceContext, Ti, Ti> c2c_functor;
-      c2c_functor(ctx, x, &temp, c2c_dims, normalization, forward);
-      const std::vector<int64_t> new_axes{axes.back()};
-      exec_fft<platform::CPUDeviceContext, Ti, To>(ctx, &temp, out, new_axes,
-                                                   normalization, forward);
-    } else {
-      exec_fft<platform::CPUDeviceContext, Ti, To>(ctx, x, out, axes,
-                                                   normalization, forward);
-    }
-  }
-};
-#elif defined(PADDLE_WITH_POCKETFFT)
-namespace {
-template <typename T>
-T compute_factor(int64_t size, FFTNormMode normalization) {
-  constexpr auto one = static_cast<T>(1);
-  switch (normalization) {
-    case FFTNormMode::none:
-      return one;
-    case FFTNormMode::by_n:
-      return one / static_cast<T>(size);
-    case FFTNormMode::by_sqrt_n:
-      return one / std::sqrt(static_cast<T>(size));
-  }
-  PADDLE_THROW(
-      platform::errors::InvalidArgument("Unsupported normalization type"));
-}
-}  // anonymous namespace
-template <typename Ti, typename To>
-struct FFTC2CFunctor<platform::CPUDeviceContext, Ti, To> {
-  void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x,
-                  Tensor* out, const std::vector<int64_t>& axes,
-                  FFTNormMode normalization, bool forward) {
-    using R = typename Ti::value_type;
-    using C = std::complex<R>;
-    const auto& input_dim = x->dims();
-    const std::vector<size_t> in_sizes = phi::vectorize<size_t>(input_dim);
-    std::vector<std::ptrdiff_t> in_strides =
-        phi::vectorize<std::ptrdiff_t>(phi::stride(input_dim));
-    const int64_t data_size = sizeof(C);
-    std::transform(in_strides.begin(), in_strides.end(), in_strides.begin(),
-                   [&](std::ptrdiff_t s) { return s * data_size; });
-    const auto* in_data = reinterpret_cast<const C*>(x->data<Ti>());
-    auto* out_data = reinterpret_cast<C*>(out->data<To>());
-    // pocketfft requires std::vector<size_t>
-    std::vector<size_t> axes_(axes.size());
-    std::copy(axes.begin(), axes.end(), axes_.begin());
-    // compuet factor
-    int64_t signal_numel = 1;
-    for (auto i : axes) {
-      signal_numel *= in_sizes[i];
-    }
-    R factor = compute_factor<R>(signal_numel, normalization);
-    pocketfft::c2c(in_sizes, in_strides, in_strides, axes_, forward, in_data,
-                   out_data, factor);
-  }
-};
-template <typename Ti, typename To>
-struct FFTR2CFunctor<platform::CPUDeviceContext, Ti, To> {
-  void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x,
-                  Tensor* out, const std::vector<int64_t>& axes,
-                  FFTNormMode normalization, bool forward) {
-    using R = Ti;
-    using C = std::complex<R>;
-    const auto& input_dim = x->dims();
-    const std::vector<size_t> in_sizes = phi::vectorize<size_t>(input_dim);
-    std::vector<std::ptrdiff_t> in_strides =
-        phi::vectorize<std::ptrdiff_t>(phi::stride(input_dim));
-    {
-      const int64_t data_size = sizeof(R);
-      std::transform(in_strides.begin(), in_strides.end(), in_strides.begin(),
-                     [&](std::ptrdiff_t s) { return s * data_size; });
-    }
-    const auto& output_dim = out->dims();
-    const std::vector<size_t> out_sizes = phi::vectorize<size_t>(output_dim);
-    std::vector<std::ptrdiff_t> out_strides =
-        phi::vectorize<std::ptrdiff_t>(phi::stride(output_dim));
-    {
-      const int64_t data_size = sizeof(C);
-      std::transform(out_strides.begin(), out_strides.end(),
-                     out_strides.begin(),
-                     [&](std::ptrdiff_t s) { return s * data_size; });
-    }
-    const auto* in_data = x->data<R>();
-    auto* out_data = reinterpret_cast<C*>(out->data<To>());
-    // pocketfft requires std::vector<size_t>
-    std::vector<size_t> axes_(axes.size());
-    std::copy(axes.begin(), axes.end(), axes_.begin());
-    // compuet normalization factor
-    int64_t signal_numel = 1;
-    for (auto i : axes) {
-      signal_numel *= in_sizes[i];
-    }
-    R factor = compute_factor<R>(signal_numel, normalization);
-    pocketfft::r2c(in_sizes, in_strides, out_strides, axes_, forward, in_data,
-                   out_data, factor);
-  }
-};
-template <typename Ti, typename To>
-struct FFTC2RFunctor<platform::CPUDeviceContext, Ti, To> {
-  void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x,
-                  Tensor* out, const std::vector<int64_t>& axes,
-                  FFTNormMode normalization, bool forward) {
-    using R = To;
-    using C = std::complex<R>;
-    const auto& input_dim = x->dims();
-    const std::vector<size_t> in_sizes = phi::vectorize<size_t>(input_dim);
-    std::vector<std::ptrdiff_t> in_strides =
-        phi::vectorize<std::ptrdiff_t>(phi::stride(input_dim));
-    {
-      const int64_t data_size = sizeof(C);
-      std::transform(in_strides.begin(), in_strides.end(), in_strides.begin(),
-                     [&](std::ptrdiff_t s) { return s * data_size; });
-    }
-    const auto& output_dim = out->dims();
-    const std::vector<size_t> out_sizes = phi::vectorize<size_t>(output_dim);
-    std::vector<std::ptrdiff_t> out_strides =
-        phi::vectorize<std::ptrdiff_t>(phi::stride(output_dim));
-    {
-      const int64_t data_size = sizeof(R);
-      std::transform(out_strides.begin(), out_strides.end(),
-                     out_strides.begin(),
-                     [&](std::ptrdiff_t s) { return s * data_size; });
-    }
-    const auto* in_data = reinterpret_cast<const C*>(x->data<Ti>());
-    auto* out_data = out->data<R>();
-    // pocketfft requires std::vector<size_t>
-    std::vector<size_t> axes_(axes.size());
-    std::copy(axes.begin(), axes.end(), axes_.begin());
-    // compuet normalization factor
-    int64_t signal_numel = 1;
-    for (auto i : axes) {
-      signal_numel *= out_sizes[i];
-    }
-    R factor = compute_factor<R>(signal_numel, normalization);
-    pocketfft::c2r(out_sizes, in_strides, out_strides, axes_, forward, in_data,
-                   out_data, factor);
-  }
-};
-#endif
 }  // namespace operators
 }  // namespace paddle

--- a/paddle/fluid/operators/spectral_op.cu
+++ b/paddle/fluid/operators/spectral_op.cu
--- a/paddle/fluid/operators/spectral_op.cu.h
+++ b/paddle/fluid/operators/spectral_op.cu.h
--- a/paddle/fluid/operators/spectral_op.h
+++ b/paddle/fluid/operators/spectral_op.h
@@ -11,8 +11,11 @@
 #pragma once
 #define NOMINMAX  // to use std::min std::max correctly on windows
+#include <algorithm>
+#include <functional>
 #include <iostream>
 #include <memory>
+#include <numeric>
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/convert_utils.h"
@@ -23,8 +26,10 @@
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/conj_op.h"
 #include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/operators/transpose_op.h"
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/kernels/funcs/complex_functors.h"
 #include "paddle/phi/kernels/funcs/padding.h"
 #if defined(__NVCC__) || defined(__HIPCC__)
 #include "thrust/device_vector.h"

--- a/paddle/fluid/operators/stft_op.cc
+++ b/paddle/fluid/operators/stft_op.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/operators/stft_op.h"
+#include "paddle/fluid/operators/spectral_helper.h"
+namespace paddle {
+namespace operators {
+class StftOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "frame");
+    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "frame");
+    const int n_fft = ctx->Attrs().Get<int>("n_fft");
+    const int hop_length = ctx->Attrs().Get<int>("hop_length");
+    const auto x_dims = ctx->GetInputDim("X");
+    const int x_rank = x_dims.size();
+    const bool onesided = ctx->Attrs().Get<bool>("onesided");
+    PADDLE_ENFORCE_EQ(
+        x_rank, 2,
+        platform::errors::InvalidArgument(
+            "Input(X) of StftOp should be a tensor with shape [N, T], "
+            "but got rank %s.",
+            x_rank));
+    PADDLE_ENFORCE_GT(
+        hop_length, 0,
+        platform::errors::InvalidArgument(
+            "Attribute(hop_length) should be greater than 0, but got %s.",
+            hop_length));
+    int seq_length = x_dims[x_rank - 1];
+    int n_frames = 1 + (seq_length - n_fft) / hop_length;
+    PADDLE_ENFORCE_LE(n_fft, seq_length,
+                      platform::errors::InvalidArgument(
+                          "Attribute(frame_length) should be less equal than "
+                          "sequence length, but got (%s) > (%s).",
+                          n_fft, seq_length));
+    std::vector<int64_t> output_shape;
+    output_shape.push_back(x_dims[0]);
+    if (onesided) {
+      output_shape.push_back(n_fft / 2 + 1);
+    } else {
+      output_shape.push_back(n_fft);
+    }
+    output_shape.push_back(n_frames);
+    ctx->SetOutputDim("Out", phi::make_ddim(output_shape));
+  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    const auto in_dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X");
+    return framework::OpKernelType(in_dtype, ctx.GetPlace());
+  }
+};
+class StftOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X", "Input waveforms with shape (N, T)");
+    AddOutput("Out",
+              "The complex STFT output tensor with shape (N, n_fft, "
+              "num_frames) or (N, n_fft/2 + 1, num_frames)");
+    AddAttr<int>("n_fft", "The number of input samples to perform FFT");
+    AddAttr<int>("hop_length", "Number of samples between adjacent frames");
+    AddAttr<bool>("normalized",
+                  "Control whether to scale the output by 1/sqrt(n_fft)");
+    AddAttr<bool>("onesided",
+                  "Control whether to return half of the FFT output");
+    AddComment(R"DOC(
+      Short-time Fourier transform (STFT).
+    )DOC");
+  }
+};
+template <typename T>
+class StftGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+ protected:
+  void Apply(GradOpPtr<T> grad_op) const override {
+    grad_op->SetType("stft_grad");
+    grad_op->SetInput("X", this->Input("X"));
+    grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+    grad_op->SetAttrMap(this->Attrs());
+  }
+};
+class StftGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    const auto out_grad_name = framework::GradVarName("Out");
+    OP_INOUT_CHECK(ctx->HasInput(out_grad_name), "Input", out_grad_name,
+                   "stft_grad");
+    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "stft_grad");
+    const auto x_grad_name = framework::GradVarName("X");
+    OP_INOUT_CHECK(ctx->HasOutput(x_grad_name), "Output", x_grad_name,
+                   "stft_grad");
+    ctx->ShareDim("X", /*->*/ x_grad_name);
+  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    const auto in_dtype = OperatorWithKernel::IndicateVarDataType(
+        ctx, framework::GradVarName("Out"));
+    const auto kernel_dtype = framework::ToRealType(in_dtype);
+    return framework::OpKernelType(kernel_dtype, ctx.GetPlace());
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(stft, ops::StftOp, ops::StftOpMaker,
+                  ops::StftGradOpMaker<paddle::framework::OpDesc>,
+                  ops::StftGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OPERATOR(stft_grad, ops::StftGradOp);
+REGISTER_OP_CPU_KERNEL(
+    stft, ops::StftKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::StftKernel<paddle::platform::CPUDeviceContext, double>);
+REGISTER_OP_CPU_KERNEL(
+    stft_grad, ops::StftGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::StftGradKernel<paddle::platform::CPUDeviceContext, double>);
--- a/paddle/fluid/operators/stft_op.cu
+++ b/paddle/fluid/operators/stft_op.cu
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/operators/spectral_op.cu.h"
+#include "paddle/fluid/operators/stft_op.h"
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    stft, ops::StftKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::StftKernel<paddle::platform::CUDADeviceContext, double>);
+REGISTER_OP_CUDA_KERNEL(
+    stft_grad, ops::StftGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::StftGradKernel<paddle::platform::CUDADeviceContext, double>);
--- a/paddle/fluid/operators/stft_op.h
+++ b/paddle/fluid/operators/stft_op.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/operators/frame_op.h"
+#include "paddle/fluid/operators/spectral_op.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+template <typename DeviceContext, typename T>
+class StftKernel : public framework::OpKernel<T> {
+ public:
+  /*
+    Batch Signals (N, T) -> Frames (N, n_fft, num_frames) -> FFTR2C -> (N,
+    n_fft/2 + 1, num_frames) or (N, n_fft, num_frames)
+  */
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    using C = paddle::platform::complex<T>;
+    const Tensor* x = ctx.Input<Tensor>("X");
+    Tensor* out = ctx.Output<Tensor>("Out");
+    out->mutable_data<C>(ctx.GetPlace());
+    const size_t x_rank = x->dims().size();
+    const size_t out_rank = out->dims().size();
+    const int n_fft = ctx.Attr<int>("n_fft");
+    const int hop_length = ctx.Attr<int>("hop_length");
+    const bool normalized = ctx.Attr<bool>("normalized");
+    const bool onesided = ctx.Attr<bool>("onesided");
+    const int n_frames = out->dims()[out_rank - 1];
+    const int seq_length = x->dims()[x_rank - 1];
+    auto& dev_ctx = ctx.device_context<DeviceContext>();
+    std::vector<int64_t> axes = {1};
+    // Frame
+    Tensor frames;
+    framework::DDim frames_dims(out->dims());
+    frames_dims.at(axes.back()) = n_fft;
+    frames.mutable_data<T>(frames_dims, ctx.GetPlace());
+    FrameFunctor<DeviceContext, T>()(dev_ctx, x, &frames, seq_length, n_fft,
+                                     n_frames, hop_length, /*is_grad*/ false);
+    // FFTR2C
+    FFTNormMode normalization;
+    if (normalized) {
+      normalization = get_norm_from_string("ortho", true);
+    } else {
+      normalization = get_norm_from_string("backward", true);
+    }
+    FFTR2CFunctor<DeviceContext, T, C> fft_r2c_func;
+    if (onesided) {
+      fft_r2c_func(dev_ctx, &frames, out, axes, normalization, true);
+    } else {
+      framework::DDim onesided_dims(out->dims());
+      const int64_t onesided_axis_size = out->dims().at(axes.back()) / 2 + 1;
+      onesided_dims.at(axes.back()) = onesided_axis_size;
+      Tensor onesided_out;
+      onesided_out.mutable_data<C>(onesided_dims, ctx.GetPlace());
+      fft_r2c_func(dev_ctx, &frames, &onesided_out, axes, normalization, true);
+      fill_conj<DeviceContext, C>(dev_ctx, &onesided_out, out, axes);
+    }
+  }
+};
+template <typename DeviceContext, typename T>
+class StftGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    using C = paddle::platform::complex<T>;
+    auto& dev_ctx = ctx.device_context<DeviceContext>();
+    const auto* dy = ctx.Input<Tensor>(framework::GradVarName("Out"));
+    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+    dx->mutable_data<T>(ctx.GetPlace());
+    const size_t dy_rank = dy->dims().size();
+    const size_t dx_rank = dx->dims().size();
+    const int n_fft = ctx.Attr<int>("n_fft");
+    const int hop_length = ctx.Attr<int>("hop_length");
+    const bool normalized = ctx.Attr<bool>("normalized");
+    const bool onesided = ctx.Attr<bool>("onesided");
+    const int n_frames = dy->dims()[dy_rank - 1];
+    const int seq_length = dx->dims()[dx_rank - 1];
+    std::vector<int64_t> axes = {1};
+    Tensor d_frames;
+    framework::DDim d_frames_dims(dy->dims());
+    d_frames_dims.at(axes.back()) = n_fft;
+    d_frames.mutable_data<T>(d_frames_dims, ctx.GetPlace());
+    Tensor complex_d_frames;
+    complex_d_frames.mutable_data<C>(d_frames_dims, ctx.GetPlace());
+    // dy -> d_frames
+    FFTNormMode normalization;
+    if (normalized) {
+      normalization = get_norm_from_string("ortho", true);
+    } else {
+      normalization = get_norm_from_string("backward", true);
+    }
+    FFTC2CFunctor<DeviceContext, C, C> fft_c2c_func;
+    if (!onesided) {
+      fft_c2c_func(dev_ctx, dy, &complex_d_frames, axes, normalization, false);
+    } else {
+      Tensor full_dy;
+      full_dy.mutable_data<C>(d_frames_dims, ctx.GetPlace());
+      auto zero_length = static_cast<int>(full_dy.dims().at(axes.back()) -
+                                          dy->dims().at(axes.back()));
+      auto rank = dy->dims().size();
+      std::vector<int> pads(rank * 2, 0);
+      pads[axes.back() * 2 + 1] = zero_length;
+      phi::funcs::PaddingFunctor<DeviceContext, C>(
+          rank, ctx.template device_context<DeviceContext>(), pads,
+          static_cast<C>(0), *dy, &full_dy);
+      fft_c2c_func(dev_ctx, &full_dy, &complex_d_frames, axes, normalization,
+                   false);
+    }
+    framework::TransComplexToReal(
+        framework::TransToProtoVarType(d_frames.dtype()),
+        framework::TransToProtoVarType(complex_d_frames.dtype()),
+        complex_d_frames, &d_frames);
+    // d_frames -> dx
+    FrameFunctor<DeviceContext, T>()(dev_ctx, &d_frames, dx, seq_length, n_fft,
+                                     n_frames, hop_length, /*is_grad*/ true);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/phi/kernels/cpu/pad3d_kernel.cc
+++ b/paddle/phi/kernels/cpu/pad3d_kernel.cc
@@ -15,6 +15,7 @@
 #include "paddle/phi/kernels/pad3d_kernel.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/common/complex.h"
 #include "paddle/phi/core/kernel_registry.h"
 namespace phi {
@@ -574,5 +575,13 @@ void Pad3dKernel(const Context& dev_ctx,
 }  // namespace phi
-PD_REGISTER_KERNEL(
+PD_REGISTER_KERNEL(pad3d,
-    pad3d, CPU, ALL_LAYOUT, phi::Pad3dKernel, float, double, int, int64_t) {}
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::Pad3dKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
--- a/paddle/phi/kernels/gpu/pad3d_kernel.cu
+++ b/paddle/phi/kernels/gpu/pad3d_kernel.cu
@@ -19,6 +19,7 @@
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/complex.h"
 #include "paddle/phi/core/kernel_registry.h"
 namespace phi {
@@ -585,4 +586,6 @@ PD_REGISTER_KERNEL(pad3d,
                   float,
                   double,
                   int,
-                   int64_t) {}
+                   int64_t,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
--- a/python/paddle/fluid/layers/layer_function_generator.py
+++ b/python/paddle/fluid/layers/layer_function_generator.py
@@ -266,9 +266,10 @@ def generate_activation_fn(op_type):
                                     op_type)
        else:
            # abs exp square ops support dtype(int32, int64, float16, float32, float64)
-            check_variable_and_dtype(
+            check_variable_and_dtype(x, 'x', [
-                x, 'x', ['int32', 'int64', 'float16', 'float32', 'float64'],
+                'int32', 'int64', 'float16', 'float32', 'float64', 'complex64',
-                op_type)
+                'complex128'
+            ], op_type)
        helper = LayerHelper(op_type, **locals())

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -5616,9 +5616,10 @@ def transpose(x, perm, name=None):
        out, _ = _C_ops.transpose2(x, 'axis', perm)
        return out
-    check_variable_and_dtype(
+    check_variable_and_dtype(x, 'x', [
-        x, 'x', ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'],
+        'bool', 'float16', 'float32', 'float64', 'int32', 'int64', 'complex64',
-        'transpose')
+        'complex128'
+    ], 'transpose')
    check_type(perm, 'perm', (list, tuple), 'transpose')
    if isinstance(perm, tuple):
        perm = list(perm)
@@ -6410,10 +6411,10 @@ def squeeze(input, axes, name=None):
        return out
    helper = LayerHelper("squeeze", **locals())
-    check_variable_and_dtype(
+    check_variable_and_dtype(input, 'input', [
-        input, 'input',
+        'float16', 'float32', 'float64', 'bool', 'int8', 'int32', 'int64',
-        ['float16', 'float32', 'float64', 'bool', 'int8', 'int32', 'int64'],
+        'complex64', 'complex128'
-        'squeeze')
+    ], 'squeeze')
    check_type(axes, 'axis/axes', (list, tuple), 'squeeze')
    out = helper.create_variable_for_type_inference(dtype=input.dtype)
    x_shape = helper.create_variable_for_type_inference(dtype=input.dtype)
@@ -6471,8 +6472,16 @@ def unsqueeze(input, axes, name=None):
    check_type(axes, 'axis/axes', (int, list, tuple, Variable), 'unsqueeze')
    check_variable_and_dtype(input, 'input', [
-        'float16', 'float32', 'float64', 'bool', 'int8', 'int16', 'int32',
+        'float16',
-        'int64'
+        'float32',
+        'float64',
+        'bool',
+        'int8',
+        'int16',
+        'int32',
+        'int64',
+        'complex64',
+        'complex128',
    ], 'unsqueeze')
    helper = LayerHelper("unsqueeze2", **locals())
    inputs = {"X": input}

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -756,7 +756,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
    check_shape(shape)
    check_dtype(dtype, 'dtype', [
        'bool', 'float16', 'float32', 'float64', 'uint8', 'int16', 'int32',
-        'int64'
+        'int64', 'complex64', 'complex128'
    ], 'fill_constant')
    check_type(shape, 'shape', (Variable, list, tuple), 'fill_constant')

--- a/python/paddle/fluid/tests/unittests/test_stft_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stft_op.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from numpy.lib.stride_tricks import as_strided
+import paddle
+import unittest
+from op_test import OpTest
+def frame_from_librosa(x, frame_length, hop_length, axis=-1):
+    if axis == -1 and not x.flags["C_CONTIGUOUS"]:
+        x = np.ascontiguousarray(x)
+    elif axis == 0 and not x.flags["F_CONTIGUOUS"]:
+        x = np.asfortranarray(x)
+    n_frames = 1 + (x.shape[axis] - frame_length) // hop_length
+    strides = np.asarray(x.strides)
+    if axis == -1:
+        shape = list(x.shape)[:-1] + [frame_length, n_frames]
+        strides = list(strides) + [hop_length * x.itemsize]
+    elif axis == 0:
+        shape = [n_frames, frame_length] + list(x.shape)[1:]
+        strides = [hop_length * x.itemsize] + list(strides)
+    else:
+        raise ValueError("Frame axis={} must be either 0 or -1".format(axis))
+    return as_strided(x, shape=shape, strides=strides)
+def stft_np(x, n_fft, hop_length, **kwargs):
+    frames = frame_from_librosa(x, n_fft, hop_length)
+    res = np.fft.rfft(frames, axis=1)
+    return res
+class TestStftOp(OpTest):
+    def setUp(self):
+        self.op_type = "stft"
+        self.shape, self.type, self.attrs = self.initTestCase()
+        self.inputs = {
+            'X': np.random.random(size=self.shape).astype(self.type),
+        }
+        self.outputs = {'Out': stft_np(x=self.inputs['X'], **self.attrs)}
+    def initTestCase(self):
+        input_shape = (2, 100)
+        input_type = 'float64'
+        attrs = {
+            'n_fft': 50,
+            'hop_length': 15,
+            'normalized': False,
+            'onesided': True,
+        }
+        return input_shape, input_type, attrs
+    def test_check_output(self):
+        paddle.enable_static()
+        self.check_output()
+        paddle.disable_static()
+    def test_check_grad_normal(self):
+        paddle.enable_static()
+        self.check_grad(['X'], 'Out')
+        paddle.disable_static()
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/signal.py
+++ b/python/paddle/signal.py
@@ -119,10 +119,11 @@ def frame(x, frame_length, hop_length, axis=-1, name=None):
            f'Unexpected hop_length: {hop_length}. It should be an positive integer.'
        )
-    if frame_length > x.shape[axis]:
+    if in_dygraph_mode():
-        raise ValueError(
+        if frame_length > x.shape[axis]:
-            f'Attribute frame_length should be less equal than sequence length, '
+            raise ValueError(
-            f'but got ({frame_length}) > ({x.shape[axis]}).')
+                f'Attribute frame_length should be less equal than sequence length, '
+                f'but got ({frame_length}) > ({x.shape[axis]}).')
    op_type = 'frame'
@@ -306,8 +307,7 @@ def stft(x,
            y1 = stft(x, n_fft=512, center=False, onesided=False)  # [8, 512, 372]
    """
    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64', 'complex64', 'complex128'],
+        x, 'x', ['float32', 'float64', 'complex64', 'complex128'], 'stft')
-        'stft')
    x_rank = len(x.shape)
    assert x_rank in [1, 2], \
@@ -325,8 +325,9 @@ def stft(x,
    if win_length is None:
        win_length = n_fft
-    assert 0 < n_fft <= x.shape[-1], \
+    if in_dygraph_mode():
-        f'n_fft should be in (0, seq_length({x.shape[-1]})], but got {n_fft}.'
+        assert 0 < n_fft <= x.shape[-1], \
+            f'n_fft should be in (0, seq_length({x.shape[-1]})], but got {n_fft}.'
    assert 0 < win_length <= n_fft, \
        f'win_length should be in (0, n_fft({n_fft})], but got {win_length}.'
@@ -359,7 +360,7 @@ def stft(x,
    x_frames = x_frames.transpose(
        perm=[0, 2,
              1])  # switch n_fft to last dim, egs: (batch, num_frames, n_fft)
-    x_frames = x_frames * window
+    x_frames = paddle.multiply(x_frames, window)
    norm = 'ortho' if normalized else 'backward'
    if is_complex(x_frames):
@@ -495,18 +496,22 @@ def istft(x,
    n_frames = x.shape[-1]
    fft_size = x.shape[-2]
-    if onesided:
+    if in_dygraph_mode():
-        assert (fft_size == n_fft // 2 + 1), \
+        if onesided:
-            'fft_size should be equal to n_fft // 2 + 1({}) when onesided is True, but got {}.'.format(n_fft // 2 + 1, fft_size)
+            assert (fft_size == n_fft // 2 + 1), \
-    else:
+                'fft_size should be equal to n_fft // 2 + 1({}) when onesided is True, but got {}.'.format(n_fft // 2 + 1, fft_size)
-        assert (fft_size == n_fft), \
+        else:
-            'fft_size should be equal to n_fft({}) when onesided is False, but got {}.'.format(n_fft, fft_size)
+            assert (fft_size == n_fft), \
+                'fft_size should be equal to n_fft({}) when onesided is False, but got {}.'.format(n_fft, fft_size)
    if window is not None:
        assert len(window.shape) == 1 and len(window) == win_length, \
            'expected a 1D window tensor of size equal to win_length({}), but got window with shape {}.'.format(win_length, window.shape)
    else:
-        window = paddle.ones(shape=(win_length, ))
+        window_dtype = paddle.float32 if x.dtype in [
+            paddle.float32, paddle.complex64
+        ] else paddle.float64
+        window = paddle.ones(shape=(win_length, ), dtype=window_dtype)
    if win_length < n_fft:
        pad_left = (n_fft - win_length) // 2
@@ -534,15 +539,15 @@ def istft(x,
            x = x[:, :, :n_fft // 2 + 1]
        out = fft_c2r(x=x, n=None, axis=-1, norm=norm, forward=False, name=None)
+    out = paddle.multiply(out, window).transpose(
+        perm=[0, 2, 1])  # (batch, n_fft, num_frames)
    out = overlap_add(
-        x=(out * window).transpose(
+        x=out, hop_length=hop_length, axis=-1)  # (batch, seq_length)
-            perm=[0, 2, 1]),  # (batch, n_fft, num_frames)
-        hop_length=hop_length,
-        axis=-1)  # (batch, seq_length)
    window_envelop = overlap_add(
        x=paddle.tile(
-            x=window * window, repeat_times=[n_frames, 1]).transpose(
+            x=paddle.multiply(window, window).unsqueeze(0),
+            repeat_times=[n_frames, 1]).transpose(
                perm=[1, 0]),  # (n_fft, num_frames)
        hop_length=hop_length,
        axis=-1)  # (seq_length, )
@@ -561,7 +566,7 @@ def istft(x,
        window_envelop = window_envelop[start:start + length]
    # Check whether the Nonzero Overlap Add (NOLA) constraint is met.
-    if window_envelop.abs().min().item() < 1e-11:
+    if in_dygraph_mode() and window_envelop.abs().min().item() < 1e-11:
        raise ValueError(
            'Abort istft because Nonzero Overlap Add (NOLA) condition failed. For more information about NOLA constraint please see `scipy.signal.check_NOLA`(https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.check_NOLA.html).'
        )

--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
@@ -147,7 +147,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
        var_names = {'x': x, 'y': y}
        for name, val in var_names.items():
            check_variable_and_dtype(
-                val, name, ['float16', 'float32', 'float64'], 'matmul')
+                val, name,
+                ['float16', 'float32', 'float64', 'complex64', 'complex128'],
+                'matmul')
    __check_input(x, y)