From e9994f2e66fc8d94011f10a00beca24f156047e4 Mon Sep 17 00:00:00 2001
From: xiaoting <31891223+tink2123@users.noreply.github.com>
Date: Thu, 4 Aug 2022 15:12:41 +0800
Subject: [PATCH] mv fold & unpool to phi  (#44836)

* fix conflicts

* mv unused file

* revert backward.h

* revert lu_unpack kernel

* rm .cu file

* Update lu_unpack_kernel.cc

* format phi yaml
---
 paddle/fluid/operators/fold_op.cc             | 253 ++----------------
 paddle/fluid/operators/fold_op.cu             |  25 --
 paddle/fluid/operators/fold_op.h              | 140 ----------
 paddle/fluid/operators/unpool_op.cc           | 141 ++--------
 paddle/fluid/operators/unpool_op.cu.cc        |  29 --
 paddle/fluid/operators/unpool_op.h            | 123 ---------
 paddle/phi/api/yaml/legacy_api.yaml           |  32 +++
 paddle/phi/api/yaml/legacy_backward.yaml      |  38 +++
 paddle/phi/infermeta/backward.h               |   0
 paddle/phi/infermeta/binary.cc                |  83 ++++++
 paddle/phi/infermeta/binary.h                 |  20 ++
 paddle/phi/infermeta/unary.cc                 | 205 ++++++++++++++
 paddle/phi/infermeta/unary.h                  |   8 +
 paddle/phi/kernels/cpu/fold_grad_kernel.cc    |  22 ++
 paddle/phi/kernels/cpu/fold_kernel.cc         |  21 ++
 paddle/phi/kernels/cpu/unpool_grad_kernel.cc  | 137 ++++++++++
 paddle/phi/kernels/cpu/unpool_kernel.cc       | 132 +++++++++
 paddle/phi/kernels/fold_grad_kernel.h         |  31 +++
 paddle/phi/kernels/fold_kernel.h              |  30 +++
 paddle/phi/kernels/gpu/fold_grad_kernel.cu    |  22 ++
 paddle/phi/kernels/gpu/fold_kernel.cu         |  21 ++
 paddle/phi/kernels/gpu/unpool_grad_kernel.cu  | 203 ++++++++++++++
 paddle/phi/kernels/gpu/unpool_kernel.cu       | 188 +++++++++++++
 .../phi/kernels/impl/fold_grad_kernel_impl.h  |  75 ++++++
 paddle/phi/kernels/impl/fold_kernel_impl.h    |  77 ++++++
 paddle/phi/kernels/unpool_grad_kernel.h       |  47 ++++
 paddle/phi/kernels/unpool_kernel.h            |  43 +++
 paddle/phi/ops/compat/fold_sig.cc             |  26 ++
 paddle/phi/ops/compat/unpool3d_sig.cc         |  37 +++
 paddle/phi/ops/compat/unpool_sig.cc           |  36 +++
 .../fluid/tests/unittests/test_fold_op.py     |   6 +-
 .../fluid/tests/unittests/test_pad_op.py      |   1 +
 .../fluid/tests/unittests/test_unpool3d_op.py |  24 +-
 .../fluid/tests/unittests/test_unpool_op.py   |  29 +-
 python/paddle/nn/functional/common.py         |   5 +-
 python/paddle/nn/functional/pooling.py        |  18 +-
 36 files changed, 1650 insertions(+), 678 deletions(-)
 delete mode 100644 paddle/fluid/operators/fold_op.cu
 delete mode 100644 paddle/fluid/operators/fold_op.h
 delete mode 100644 paddle/fluid/operators/unpool_op.cu.cc
 delete mode 100644 paddle/fluid/operators/unpool_op.h
 mode change 100755 => 100644 paddle/phi/infermeta/backward.h
 create mode 100644 paddle/phi/kernels/cpu/fold_grad_kernel.cc
 create mode 100644 paddle/phi/kernels/cpu/fold_kernel.cc
 create mode 100644 paddle/phi/kernels/cpu/unpool_grad_kernel.cc
 create mode 100644 paddle/phi/kernels/cpu/unpool_kernel.cc
 create mode 100644 paddle/phi/kernels/fold_grad_kernel.h
 create mode 100644 paddle/phi/kernels/fold_kernel.h
 create mode 100644 paddle/phi/kernels/gpu/fold_grad_kernel.cu
 create mode 100644 paddle/phi/kernels/gpu/fold_kernel.cu
 create mode 100644 paddle/phi/kernels/gpu/unpool_grad_kernel.cu
 create mode 100644 paddle/phi/kernels/gpu/unpool_kernel.cu
 create mode 100644 paddle/phi/kernels/impl/fold_grad_kernel_impl.h
 create mode 100644 paddle/phi/kernels/impl/fold_kernel_impl.h
 create mode 100644 paddle/phi/kernels/unpool_grad_kernel.h
 create mode 100644 paddle/phi/kernels/unpool_kernel.h
 create mode 100644 paddle/phi/ops/compat/fold_sig.cc
 create mode 100644 paddle/phi/ops/compat/unpool3d_sig.cc
 create mode 100644 paddle/phi/ops/compat/unpool_sig.cc

diff --git a/paddle/fluid/operators/fold_op.cc b/paddle/fluid/operators/fold_op.cc
index 5ec5a93ada4..149d2bdac3c 100644
--- a/paddle/fluid/operators/fold_op.cc
+++ b/paddle/fluid/operators/fold_op.cc
@@ -12,7 +12,13 @@
  *     See the License for the specific language governing permissions and
  *     limitations under the License. */
 
-#include "paddle/fluid/operators/fold_op.h"
+#include <memory>
+#include <vector>
+
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/infermeta/backward.h"
+#include "paddle/phi/infermeta/unary.h"
 
 namespace paddle {
 namespace operators {
@@ -20,216 +26,6 @@ namespace operators {
 class FoldOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("X"),
-        true,
-        platform::errors::NotFound("Input(X) of FoldOp should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("Y"),
-        true,
-        platform::errors::NotFound("Output(Y) of FoldOp should not be null"));
-    auto in_dims = ctx->GetInputDim("X");
-    std::vector<int> output_sizes =
-        ctx->Attrs().Get<std::vector<int>>("output_sizes");
-    std::vector<int> kernel_sizes =
-        ctx->Attrs().Get<std::vector<int>>("kernel_sizes");
-    std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
-    std::vector<int> dilations =
-        ctx->Attrs().Get<std::vector<int>>("dilations");
-
-    PADDLE_ENFORCE_EQ(
-        output_sizes.size(),
-        2,
-        platform::errors::InvalidArgument(
-            "It is expected output_size equals to 2, but got size %d",
-            output_sizes.size()));
-    PADDLE_ENFORCE_EQ(
-        kernel_sizes.size(),
-        2,
-        platform::errors::InvalidArgument(
-            "It is expected kernel_size equals to 2, but got size %d",
-            kernel_sizes.size()));
-    PADDLE_ENFORCE_EQ(
-        strides.size(),
-        2,
-        platform::errors::InvalidArgument(
-            "It is expected strides_size equals to 2, but got size %d",
-            strides.size()));
-    PADDLE_ENFORCE_EQ(
-        paddings.size(),
-        4,
-        platform::errors::InvalidArgument(
-            "It is expected paddings_size equals to 4, but got size %d",
-            paddings.size()));
-    PADDLE_ENFORCE_EQ(
-        dilations.size(),
-        2,
-        platform::errors::InvalidArgument(
-            "It is expected dilations_size equals to 2, but got size %d",
-            dilations.size()));
-
-    int output_height = output_sizes[0];
-    int output_width = output_sizes[1];
-    int kernel_height = kernel_sizes[0];
-    int kernel_width = kernel_sizes[1];
-    int dilation_height = dilations[0];
-    int dilation_width = dilations[1];
-    int stride_height = strides[0];
-    int stride_width = strides[1];
-
-    // check kernel_sizes
-    PADDLE_ENFORCE_GT(kernel_height,
-                      0,
-                      platform::errors::InvalidArgument(
-                          "The `kernel_sizes` should be greater than zero, "
-                          "but received kernel_height: %d kernel_width: %d.",
-                          kernel_sizes[0],
-                          kernel_sizes[1]));
-    PADDLE_ENFORCE_GT(kernel_width,
-                      0,
-                      platform::errors::InvalidArgument(
-                          "The `kernel_sizes` should be greater than zero, "
-                          "but received kernel_height: %d kernel_width: %d.",
-                          kernel_sizes[0],
-                          kernel_sizes[1]));
-    // check strides
-    PADDLE_ENFORCE_GT(stride_height,
-                      0,
-                      platform::errors::InvalidArgument(
-                          "The `strides` should be greater than zero, "
-                          "but received strides_height: %d strides_width: %d.",
-                          strides[0],
-                          strides[1]));
-    PADDLE_ENFORCE_GT(stride_width,
-                      0,
-                      platform::errors::InvalidArgument(
-                          "The `strides` should be greater than zero, "
-                          "but received strides_height: %d strides_width: %d.",
-                          strides[0],
-                          strides[1]));
-    // check dilations
-    PADDLE_ENFORCE_GT(output_height,
-                      1,
-                      platform::errors::InvalidArgument(
-                          "The `output_height` should be greater than one, "
-                          "but received output_height: %d .",
-                          output_height));
-    PADDLE_ENFORCE_GT(output_width,
-                      1,
-                      platform::errors::InvalidArgument(
-                          "The `output_width` should be greater than one, "
-                          "but received output_width: %d .",
-                          output_width));
-    // check output size
-    PADDLE_ENFORCE_GT(
-        dilation_height,
-        0,
-        platform::errors::InvalidArgument(
-            "The `dilations` should be greater than zero, "
-            "but received dilations_height: %d dilations_width: %d.",
-            dilations[0],
-            dilations[1]));
-    PADDLE_ENFORCE_GT(
-        dilation_width,
-        0,
-        platform::errors::InvalidArgument(
-            "The `dilations` should be greater than zero, "
-            "but received dilations_height: %d dilations_width: %d.",
-            dilations[0],
-            dilations[1]));
-
-    std::vector<int> out_dims;
-    // batch_size
-    out_dims.push_back(in_dims[0]);
-    // output_plane
-    int output_channels = in_dims[1] / (kernel_width * kernel_height);
-    out_dims.push_back(output_channels);
-
-    int blocks_height = (output_sizes[0] + 2 * paddings[0] -
-                         (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
-                            strides[0] +
-                        1;
-    int blocks_width = (output_sizes[1] + 2 * paddings[1] -
-                        (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
-                           strides[1] +
-                       1;
-
-    // check output height and width
-    PADDLE_ENFORCE_GT(
-        blocks_height,
-        0,
-        platform::errors::InvalidArgument(
-            "The sliding blocks calculated from input spatial size (%d, %d), "
-            "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
-            "is (%d, %d), which should be a positive integer.",
-            in_dims[2],
-            in_dims[3],
-            kernel_sizes[0],
-            kernel_sizes[1],
-            strides[0],
-            strides[1],
-            dilations[0],
-            dilations[1],
-            output_height,
-            output_width));
-
-    PADDLE_ENFORCE_GT(
-        blocks_width,
-        0,
-        platform::errors::InvalidArgument(
-            "The sliding blocks calculated from input spatial size (%d, %d), "
-            "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
-            "is (%d, %d), which should be a positive integer.",
-            in_dims[2],
-            in_dims[3],
-            kernel_sizes[0],
-            kernel_sizes[1],
-            strides[0],
-            strides[1],
-            dilations[0],
-            dilations[1],
-            output_height,
-            output_width));
-
-    PADDLE_ENFORCE_EQ(
-        blocks_height * blocks_width,
-        in_dims[2],
-        platform::errors::InvalidArgument(
-            "Given input output_size (%d, %d), "
-            "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
-            "which should be expected size of input's dimension "
-            "2 to match the calculated number of %d * %d = %d, but got %d",
-            output_height,
-            output_width,
-            kernel_sizes[0],
-            kernel_sizes[1],
-            strides[0],
-            strides[1],
-            dilations[0],
-            dilations[1],
-            blocks_height,
-            blocks_width,
-            blocks_height * blocks_width,
-            in_dims[2]));
-
-    PADDLE_ENFORCE_EQ(
-        in_dims[1] % (kernel_sizes[0] * kernel_sizes[1]),
-        0,
-        platform::errors::InvalidArgument(
-            "Expected size of input's dimension 1 to be divisible by the"
-            "product of kernel_size, but got input.size(1)=%d and "
-            "kernel_size=( %d"
-            ", %d).",
-            in_dims[1],
-            kernel_sizes[0],
-            kernel_sizes[1]));
-
-    out_dims.push_back(output_height);
-    out_dims.push_back(output_width);
-    ctx->SetOutputDim("Y", phi::make_ddim(out_dims));
-  }
 
  protected:
   framework::OpKernelType GetExpectedKernelType(
@@ -286,22 +82,6 @@ class FoldGradOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput(framework::GradVarName("Y")),
-        true,
-        platform::errors::NotFound("The gradient of Y should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("X"),
-        true,
-        platform::errors::NotFound("The input X should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput(framework::GradVarName("X")),
-        true,
-        platform::errors::NotFound("The gradient of X should not be null"));
-    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
-  }
-
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
@@ -332,18 +112,19 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(FoldGradOpNoNeedBufferVarsInferer, "X");
 }  // namespace paddle
 
 namespace ops = paddle::operators;
+DECLARE_INFER_SHAPE_FUNCTOR(fold,
+                            FoldInferShapeFunctor,
+                            PD_INFER_META(phi::FoldInferMeta));
 REGISTER_OPERATOR(fold,
                   ops::FoldOp,
                   ops::FoldOpMaker,
                   ops::FoldGradMaker<paddle::framework::OpDesc>,
-                  ops::FoldGradMaker<paddle::imperative::OpBase>);
+                  ops::FoldGradMaker<paddle::imperative::OpBase>,
+                  FoldInferShapeFunctor);
+DECLARE_INFER_SHAPE_FUNCTOR(fold_grad,
+                            FoldGradInferShapeFunctor,
+                            PD_INFER_META(phi::UnchangedInferMeta));
 REGISTER_OPERATOR(fold_grad,
                   ops::FoldGradOp,
-                  ops::FoldGradOpNoNeedBufferVarsInferer);
-
-REGISTER_OP_CPU_KERNEL(fold,
-                       ops::FoldOpKernel<phi::CPUContext, float>,
-                       ops::FoldOpKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(fold_grad,
-                       ops::FoldGradOpKernel<phi::CPUContext, float>,
-                       ops::FoldGradOpKernel<phi::CPUContext, double>);
+                  ops::FoldGradOpNoNeedBufferVarsInferer,
+                  FoldGradInferShapeFunctor);
diff --git a/paddle/fluid/operators/fold_op.cu b/paddle/fluid/operators/fold_op.cu
deleted file mode 100644
index 7728d57a276..00000000000
--- a/paddle/fluid/operators/fold_op.cu
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-Indicesou may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/fold_op.h"
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_CUDA_KERNEL(fold,
-                        ops::FoldOpKernel<phi::GPUContext, float>,
-                        ops::FoldOpKernel<phi::GPUContext, double>);
-
-REGISTER_OP_CUDA_KERNEL(fold_grad,
-                        ops::FoldGradOpKernel<phi::GPUContext, float>,
-                        ops::FoldGradOpKernel<phi::GPUContext, double>);
diff --git a/paddle/fluid/operators/fold_op.h b/paddle/fluid/operators/fold_op.h
deleted file mode 100644
index 704e4de1a69..00000000000
--- a/paddle/fluid/operators/fold_op.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/im2col.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-
-using Tensor = framework::Tensor;
-
-template <typename DeviceContext, typename T>
-class FoldOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    const Tensor* input = ctx.Input<Tensor>("X");
-    const int batch_size = static_cast<int>(input->dims()[0]);
-    Tensor* output = ctx.Output<Tensor>("Y");
-    output->mutable_data<T>(ctx.GetPlace());
-
-    std::vector<int> output_sizes = ctx.Attr<std::vector<int>>("output_sizes");
-    std::vector<int> kernel_sizes = ctx.Attr<std::vector<int>>("kernel_sizes");
-    std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
-    std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
-
-    math::Col2ImFunctor<math::ColFormat::kCFO, DeviceContext, T> col2im;
-
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-
-    auto input_dims = input->dims();
-
-    int output_height = (output_sizes[0] + 2 * paddings[0] -
-                         (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
-                            strides[0] +
-                        1;
-    int output_width = (output_sizes[1] + 2 * paddings[1] -
-                        (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
-                           strides[1] +
-                       1;
-
-    int n_input_plane = input_dims[1];
-    int n_output_plane = n_input_plane / (kernel_sizes[0] * kernel_sizes[1]);
-
-    framework::DDim output_shape(
-        {n_output_plane, output_sizes[0], output_sizes[1]});
-
-    framework::DDim input_matrix_shape({input_dims[0],
-                                        kernel_sizes[0],
-                                        kernel_sizes[1],
-                                        output_height,
-                                        output_width});
-    phi::funcs::SetConstant<DeviceContext, T> set_zero;
-    set_zero(dev_ctx, output, static_cast<T>(0));
-
-    for (int i = 0; i < batch_size; i++) {
-      Tensor out_batch =
-          output->Slice(i, i + 1).Resize(output_shape);  // im size=3
-      Tensor in_batch =
-          input->Slice(i, i + 1).Resize(input_matrix_shape);  // col size=5
-      col2im(dev_ctx, in_batch, dilations, strides, paddings, &out_batch);
-    }
-  }
-};
-
-template <typename DeviceContext, typename T>
-class FoldGradOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    const Tensor* output_grad = ctx.Input<Tensor>(framework::GradVarName("Y"));
-    Tensor* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
-    input_grad->mutable_data<T>(ctx.GetPlace());
-
-    if ((!output_grad) || (!input_grad)) return;
-
-    std::vector<int> output_sizes = ctx.Attr<std::vector<int>>("output_sizes");
-    std::vector<int> kernel_sizes = ctx.Attr<std::vector<int>>("kernel_sizes");
-    std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
-    std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
-
-    const int batch_size = static_cast<int>(input_grad->dims()[0]);
-
-    auto input_dims = input_grad->dims();
-
-    int output_height = (output_sizes[0] + 2 * paddings[0] -
-                         (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
-                            strides[0] +
-                        1;
-    int output_width = (output_sizes[1] + 2 * paddings[1] -
-                        (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
-                           strides[1] +
-                       1;
-
-    int n_input_plane = input_dims[1];
-    int n_output_plane = n_input_plane / (kernel_sizes[0] * kernel_sizes[1]);
-
-    framework::DDim output_shape(
-        {n_output_plane, output_sizes[0], output_sizes[1]});
-    framework::DDim input_matrix_shape({input_dims[0],
-                                        kernel_sizes[0],
-                                        kernel_sizes[1],
-                                        output_height,
-                                        output_width});
-
-    math::Im2ColFunctor<math::ColFormat::kCFO, DeviceContext, T> im2col;
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-
-    for (int i = 0; i < batch_size; i++) {
-      Tensor out_grad_batch = output_grad->Slice(i, i + 1).Resize(output_shape);
-      Tensor in_grad_batch =
-          input_grad->Slice(i, i + 1).Resize(input_matrix_shape);
-      im2col(dev_ctx,
-             out_grad_batch,
-             dilations,
-             strides,
-             paddings,
-             &in_grad_batch);
-    }
-  }
-};
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/unpool_op.cc b/paddle/fluid/operators/unpool_op.cc
index 47679ca57f5..b1a0d68c12e 100644
--- a/paddle/fluid/operators/unpool_op.cc
+++ b/paddle/fluid/operators/unpool_op.cc
@@ -12,11 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/unpool_op.h"
-
 #include <memory>
 #include <string>
 #include <vector>
+
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/infermeta/backward.h"
+#include "paddle/phi/infermeta/binary.h"
+
 namespace paddle {
 namespace operators {
 
@@ -152,45 +156,6 @@ class UnpoolOp : public framework::OperatorWithKernel {
 
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Unpool");
-    OP_INOUT_CHECK(ctx->HasInput("Indices"), "Input", "Indices", "Unpool");
-    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Unpool");
-    auto in_x_dims = ctx->GetInputDim("X");
-    auto in_y_dims = ctx->GetInputDim("Indices");
-    std::string unpooling_type =
-        ctx->Attrs().Get<std::string>("unpooling_type");
-    std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
-    std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
-    std::vector<int> output_size =
-        ctx->Attrs().Get<std::vector<int>>("output_size");
-    PADDLE_ENFORCE_EQ(in_x_dims.size() == 4,
-                      true,
-                      platform::errors::InvalidArgument(
-                          "Unpool Intput(X) must be of 4-dimensional, but "
-                          "received Input(X)'s dimensions is %d.",
-                          in_x_dims.size()));
-    PADDLE_ENFORCE_EQ(in_x_dims,
-                      in_y_dims,
-                      platform::errors::InvalidArgument(
-                          "The dimensions of Input(X) must equal to be"
-                          "the dimensions of Input(Indices), but received"
-                          "dimensions of Input(X) is [%d], received dimensions"
-                          "of Input(Indices) is [%d]",
-                          in_x_dims,
-                          in_y_dims));
-
-    std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
-    for (size_t i = 0; i < ksize.size(); ++i) {
-      if (!ctx->IsRuntime() && in_x_dims[i + 2] <= 0) {
-        output_shape.push_back(-1);
-      } else {
-        output_shape.push_back(output_size[i]);
-      }
-    }
-    ctx->SetOutputDim("Out", phi::make_ddim(output_shape));
-  }
 };
 
 class Unpool3dOp : public framework::OperatorWithKernel {
@@ -204,45 +169,6 @@ class Unpool3dOp : public framework::OperatorWithKernel {
 
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Unpool3d");
-    OP_INOUT_CHECK(ctx->HasInput("Indices"), "Input", "Indices", "Unpool3d");
-    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Unpool3d");
-    auto in_x_dims = ctx->GetInputDim("X");
-    auto in_y_dims = ctx->GetInputDim("Indices");
-    std::string unpooling_type =
-        ctx->Attrs().Get<std::string>("unpooling_type");
-    std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
-    std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
-    std::vector<int> output_size =
-        ctx->Attrs().Get<std::vector<int>>("output_size");
-    PADDLE_ENFORCE_EQ(in_x_dims.size() == 5,
-                      true,
-                      platform::errors::InvalidArgument(
-                          "Unpool Intput(X) must be of 5-dimensional, but "
-                          "received Input(X)'s dimensions is %d.",
-                          in_x_dims.size()));
-    PADDLE_ENFORCE_EQ(in_x_dims,
-                      in_y_dims,
-                      platform::errors::InvalidArgument(
-                          "The dimensions of Input(X) must equal to be"
-                          "the dimensions of Input(Indices), but received"
-                          "dimensions of Input(X) is [%d], received dimensions"
-                          "of Input(Indices) is [%d]",
-                          in_x_dims,
-                          in_y_dims));
-
-    std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
-    for (size_t i = 0; i < ksize.size(); ++i) {
-      if (!ctx->IsRuntime() && in_x_dims[i + 2] <= 0) {
-        output_shape.push_back(-1);
-      } else {
-        output_shape.push_back(output_size[i]);
-      }
-    }
-    ctx->SetOutputDim("Out", phi::make_ddim(output_shape));
-  }
 };
 
 template <typename T>
@@ -286,14 +212,6 @@ class UnpoolOpGrad : public framework::OperatorWithKernel {
 
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "UnpoolGrad");
-    OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output",
-                   framework::GradVarName("X"),
-                   "UnpoolGrad");
-    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
-  }
 };
 
 class Unpool3dOpGrad : public framework::OperatorWithKernel {
@@ -307,44 +225,43 @@ class Unpool3dOpGrad : public framework::OperatorWithKernel {
 
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Unpool3dGrad");
-    OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output",
-                   framework::GradVarName("X"),
-                   "Unpool3dGrad");
-    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
-  }
 };
 
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
+DECLARE_INFER_SHAPE_FUNCTOR(unpool,
+                            UnpoolInferShapeFunctor,
+                            PD_INFER_META(phi::UnpoolInferMeta));
 REGISTER_OPERATOR(unpool,
                   ops::UnpoolOp,
                   ops::Unpool2dOpMaker,
                   ops::UnpoolOpGradMaker<paddle::framework::OpDesc>,
-                  ops::UnpoolOpGradMaker<paddle::imperative::OpBase>);
+                  ops::UnpoolOpGradMaker<paddle::imperative::OpBase>,
+                  UnpoolInferShapeFunctor);
 
-REGISTER_OPERATOR(unpool_grad, ops::UnpoolOpGrad);
-REGISTER_OP_CPU_KERNEL(unpool,
-                       ops::UnpoolKernel<phi::CPUContext, float>,
-                       ops::UnpoolKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(unpool_grad,
-                       ops::UnpoolGradKernel<phi::CPUContext, float>,
-                       ops::UnpoolGradKernel<phi::CPUContext, double>);
+DECLARE_INFER_SHAPE_FUNCTOR(unpool_grad,
+                            UnpoolGradInferShapeFunctor,
+                            PD_INFER_META(phi::UnchangedInferMeta));
+
+REGISTER_OPERATOR(unpool_grad, ops::UnpoolOpGrad, UnpoolGradInferShapeFunctor);
+
+DECLARE_INFER_SHAPE_FUNCTOR(unpool,
+                            Unpool3dInferShapeFunctor,
+                            PD_INFER_META(phi::Unpool3dInferMeta));
 
 REGISTER_OPERATOR(unpool3d,
                   ops::Unpool3dOp,
                   ops::Unpool3dOpMaker,
                   ops::Unpool3dOpGradMaker<paddle::framework::OpDesc>,
-                  ops::Unpool3dOpGradMaker<paddle::imperative::OpBase>);
+                  ops::Unpool3dOpGradMaker<paddle::imperative::OpBase>,
+                  Unpool3dInferShapeFunctor);
+
+DECLARE_INFER_SHAPE_FUNCTOR(unpool3d_grad,
+                            Unpool3dGradInferShapeFunctor,
+                            PD_INFER_META(phi::UnchangedInferMeta));
 
-REGISTER_OPERATOR(unpool3d_grad, ops::Unpool3dOpGrad);
-REGISTER_OP_CPU_KERNEL(unpool3d,
-                       ops::Unpool3dKernel<phi::CPUContext, float>,
-                       ops::Unpool3dKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(unpool3d_grad,
-                       ops::Unpool3dGradKernel<phi::CPUContext, float>,
-                       ops::Unpool3dGradKernel<phi::CPUContext, double>);
+REGISTER_OPERATOR(unpool3d_grad,
+                  ops::Unpool3dOpGrad,
+                  Unpool3dGradInferShapeFunctor);
diff --git a/paddle/fluid/operators/unpool_op.cu.cc b/paddle/fluid/operators/unpool_op.cu.cc
deleted file mode 100644
index 82890419daf..00000000000
--- a/paddle/fluid/operators/unpool_op.cu.cc
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-Indicesou may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/unpool_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(unpool,
-                        ops::UnpoolKernel<phi::GPUContext, float>,
-                        ops::UnpoolKernel<phi::GPUContext, double>);
-REGISTER_OP_CUDA_KERNEL(unpool_grad,
-                        ops::UnpoolGradKernel<phi::GPUContext, float>,
-                        ops::UnpoolGradKernel<phi::GPUContext, double>);
-REGISTER_OP_CUDA_KERNEL(unpool3d,
-                        ops::Unpool3dKernel<phi::GPUContext, float>,
-                        ops::Unpool3dKernel<phi::GPUContext, double>);
-REGISTER_OP_CUDA_KERNEL(unpool3d_grad,
-                        ops::Unpool3dGradKernel<phi::GPUContext, float>,
-                        ops::Unpool3dGradKernel<phi::GPUContext, double>);
diff --git a/paddle/fluid/operators/unpool_op.h b/paddle/fluid/operators/unpool_op.h
deleted file mode 100644
index 062008f95ea..00000000000
--- a/paddle/fluid/operators/unpool_op.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-Indicesou may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/unpooling.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-template <typename DeviceContext, typename T>
-class UnpoolKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
-    const framework::Tensor* in_y = context.Input<framework::Tensor>("Indices");
-    auto* out = context.Output<framework::Tensor>("Out");
-    std::string unpooling_type = context.Attr<std::string>("unpooling_type");
-    std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
-    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-    T* output_data = out->mutable_data<T>(context.GetPlace());
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    if (output_data) {
-      phi::funcs::SetConstant<DeviceContext, T> set_zero;
-      set_zero(dev_ctx, out, static_cast<T>(0));
-    }
-    math::Unpool2dMaxFunctor<DeviceContext, T> unpool2d_max_forward;
-    unpool2d_max_forward(dev_ctx, *in_x, *in_y, out);
-  }
-};
-template <typename DeviceContext, typename T>
-class UnpoolGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
-    const framework::Tensor* in_y = context.Input<framework::Tensor>("Indices");
-    const framework::Tensor* out = context.Input<framework::Tensor>("Out");
-    const framework::Tensor* out_grad =
-        context.Input<framework::Tensor>(framework::GradVarName("Out"));
-    framework::Tensor* in_x_grad =
-        context.Output<framework::Tensor>(framework::GradVarName("X"));
-    std::string unpooling_type = context.Attr<std::string>("unpooling_type");
-    std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
-    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-
-    auto& device_ctx = context.template device_context<DeviceContext>();
-    phi::funcs::SetConstant<DeviceContext, T> zero;
-
-    in_x_grad->mutable_data<T>(context.GetPlace());
-    zero(device_ctx, in_x_grad, static_cast<T>(0));
-
-    math::Unpool2dMaxGradFunctor<DeviceContext, T> unpool2d_max_backward;
-    unpool2d_max_backward(device_ctx, *in_x, *in_y, *out, *out_grad, in_x_grad);
-  }
-};
-
-template <typename DeviceContext, typename T>
-class Unpool3dKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
-    const framework::Tensor* in_y = context.Input<framework::Tensor>("Indices");
-    auto* out = context.Output<framework::Tensor>("Out");
-    std::string unpooling_type = context.Attr<std::string>("unpooling_type");
-    std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
-    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-    T* output_data = out->mutable_data<T>(context.GetPlace());
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    if (output_data) {
-      phi::funcs::SetConstant<DeviceContext, T> set_zero;
-      set_zero(dev_ctx, out, static_cast<T>(0));
-    }
-    math::Unpool3dMaxFunctor<DeviceContext, T> unpool3d_max_forward;
-    unpool3d_max_forward(dev_ctx, *in_x, *in_y, out);
-  }
-};
-
-template <typename DeviceContext, typename T>
-class Unpool3dGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
-    const framework::Tensor* in_y = context.Input<framework::Tensor>("Indices");
-    const framework::Tensor* out = context.Input<framework::Tensor>("Out");
-    const framework::Tensor* out_grad =
-        context.Input<framework::Tensor>(framework::GradVarName("Out"));
-    framework::Tensor* in_x_grad =
-        context.Output<framework::Tensor>(framework::GradVarName("X"));
-    std::string unpooling_type = context.Attr<std::string>("unpooling_type");
-    std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
-    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-
-    auto& device_ctx = context.template device_context<DeviceContext>();
-    phi::funcs::SetConstant<DeviceContext, T> zero;
-
-    in_x_grad->mutable_data<T>(context.GetPlace());
-    zero(device_ctx, in_x_grad, static_cast<T>(0));
-
-    math::Unpool3dMaxGradFunctor<DeviceContext, T> unpool3d_max_backward;
-    unpool3d_max_backward(device_ctx, *in_x, *in_y, *out, *out_grad, in_x_grad);
-  }
-};
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
index b4ca7148a40..61c9eba30ec 100755
--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -2781,6 +2781,16 @@
     func: eig
   backward: eig_grad
 
+# fold
+- api: fold
+  args: (Tensor x, int[] output_sizes, int[] kernel_sizes,  int[] strides, int[] paddings, int[] dilations)
+  output: Tensor(out)
+  infer_meta:
+    func: FoldInferMeta
+  kernel:
+    func: fold
+  backward: fold_grad
+
 # overlap_add
 - api: overlap_add
   args: (Tensor x, int hop_length, int axis)
@@ -2801,3 +2811,25 @@
     data_type: x
   inplace: (x -> out)
   backward: uniform_random_inplace_grad
+
+# unpool
+- api: unpool
+  args: (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, int[] output_size, str data_format)
+  output: Tensor(out)
+  infer_meta:
+    func: UnpoolInferMeta
+  kernel:
+    func: unpool
+    data_type: x
+  backward: unpool_grad
+
+# unpool3d
+- api: unpool3d
+  args: (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, int[] output_size, str data_format)
+  output: Tensor(out)
+  infer_meta:
+    func: Unpool3dInferMeta
+  kernel:
+    func: unpool3d
+    data_type: x
+  backward: unpool3d_grad
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 8b43f7643c7..892e824e60a 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -2625,3 +2625,41 @@
   kernel :
     func : yolov3_loss_grad
   optional : gt_score
+
+# fold
+- backward_api: fold_grad
+  forward: fold (Tensor x, int[] output_sizes, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) -> Tensor(out)
+  args: (Tensor x, Tensor out_grad, int[] output_sizes, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations)
+  output: Tensor(x_grad)
+  infer_meta:
+    func: UnchangedInferMeta
+    param : [x]
+  kernel:
+    func: fold_grad
+  no_need_buffer : x
+
+# unpool3d
+- backward_api: unpool3d_grad
+  forward: unpool3d (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, int[] output_size, str data_format) -> Tensor(out)
+  args: (Tensor x, Tensor indices, Tensor out, Tensor out_grad, int[] ksize, int[] strides, int[] padding, int[] output_size, str data_format)
+  output: Tensor(x_grad)
+  infer_meta:
+    func: UnchangedInferMeta
+    param : [x]
+  kernel:
+    func: unpool3d_grad
+    data_type: x
+  no_need_buffer : x
+
+# unpool
+- backward_api: unpool_grad
+  forward: unpool (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding,  int[] output_size, str data_format) -> Tensor(out)
+  args: (Tensor x, Tensor indices, Tensor out, Tensor out_grad, int[] ksize, int[] strides, int[] padding, int[] output_size, str data_format)
+  output: Tensor(x_grad)
+  infer_meta:
+    func: UnchangedInferMeta
+    param : [x]
+  kernel:
+    func: unpool_grad
+    data_type: x
+  no_need_buffer : x
diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h
old mode 100755
new mode 100644
diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
index 8ba4290e69f..48b76f96c01 100644
--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -2554,6 +2554,89 @@ void SolveInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
   out->share_lod(x);
 }
 
+void UnpoolInferMeta(const MetaTensor& x,
+                     const MetaTensor& indices,
+                     const std::vector<int>& ksize,
+                     const std::vector<int>& strides,
+                     const std::vector<int>& paddings,
+                     const std::vector<int>& output_size,
+                     const std::string& data_format,
+                     MetaTensor* out,
+                     MetaConfig config) {
+  auto in_x_dims = x.dims();
+  auto in_y_dims = indices.dims();
+
+  PADDLE_ENFORCE_EQ(in_x_dims.size() == 4,
+                    true,
+                    phi::errors::InvalidArgument(
+                        "Unpool Intput(X) must be of 4-dimensional, but "
+                        "received Input(X)'s dimensions is %d.",
+                        in_x_dims.size()));
+  PADDLE_ENFORCE_EQ(in_x_dims,
+                    in_y_dims,
+                    phi::errors::InvalidArgument(
+                        "The dimensions of Input(X) must equal to be"
+                        "the dimensions of Input(Indices), but received"
+                        "dimensions of Input(X) is [%d], received dimensions"
+                        "of Input(Indices) is [%d]",
+                        in_x_dims,
+                        in_y_dims));
+
+  std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
+  for (size_t i = 0; i < ksize.size(); ++i) {
+    if (!config.is_runtime && in_x_dims[i + 2] <= 0) {
+      output_shape.push_back(-1);
+    } else {
+      output_shape.push_back(output_size[i]);
+    }
+  }
+  if (out != nullptr) {
+    out->set_dims(phi::make_ddim(output_shape));
+    out->set_dtype(x.dtype());
+  }
+}
+void Unpool3dInferMeta(const MetaTensor& x,
+                       const MetaTensor& indices,
+                       const std::vector<int>& ksize,
+                       const std::vector<int>& strides,
+                       const std::vector<int>& paddings,
+                       const std::vector<int>& output_size,
+                       const std::string& data_format,
+                       MetaTensor* out,
+                       MetaConfig config) {
+  auto in_x_dims = x.dims();
+  auto in_y_dims = indices.dims();
+
+  PADDLE_ENFORCE_EQ(in_x_dims.size() == 5,
+                    true,
+                    phi::errors::InvalidArgument(
+                        "Unpool Intput(X) must be of 5-dimensional, but "
+                        "received Input(X)'s dimensions is %d.",
+                        in_x_dims.size()));
+  PADDLE_ENFORCE_EQ(in_x_dims,
+                    in_y_dims,
+                    phi::errors::InvalidArgument(
+                        "The dimensions of Input(X) must equal to be"
+                        "the dimensions of Input(Indices), but received"
+                        "dimensions of Input(X) is [%d], received dimensions"
+                        "of Input(Indices) is [%d]",
+                        in_x_dims,
+                        in_y_dims));
+
+  std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
+  for (size_t i = 0; i < ksize.size(); ++i) {
+    if (!config.is_runtime && in_x_dims[i + 2] <= 0) {
+      output_shape.push_back(-1);
+    } else {
+      output_shape.push_back(output_size[i]);
+    }
+  }
+  if (out != nullptr) {
+    out->set_dims(phi::make_ddim(output_shape));
+    out->set_dtype(x.dtype());
+  }
+}
+
 }  // namespace phi
 
 PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta);
diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h
index 9f548256f4f..68c020cc68a 100644
--- a/paddle/phi/infermeta/binary.h
+++ b/paddle/phi/infermeta/binary.h
@@ -360,4 +360,24 @@ void ValueCompareInferMeta(const MetaTensor& x,
 
 void SolveInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out);
 
+void UnpoolInferMeta(const MetaTensor& x,
+                     const MetaTensor& indices,
+                     const std::vector<int>& ksize,
+                     const std::vector<int>& strides,
+                     const std::vector<int>& paddings,
+                     const std::vector<int>& output_size,
+                     const std::string& data_format,
+                     MetaTensor* out,
+                     MetaConfig config = MetaConfig());
+
+void Unpool3dInferMeta(const MetaTensor& x,
+                       const MetaTensor& indices,
+                       const std::vector<int>& ksize,
+                       const std::vector<int>& strides,
+                       const std::vector<int>& paddings,
+                       const std::vector<int>& output_size,
+                       const std::string& data_format,
+                       MetaTensor* out,
+                       MetaConfig config = MetaConfig());
+
 }  // namespace phi
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index 74705c3759d..a7ed6e71d05 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -4113,6 +4113,211 @@ void IdentityLossInferMeta(const MetaTensor& x,
   }
 }
 
+void FoldInferMeta(const MetaTensor& x,
+                   const std::vector<int>& output_sizes,
+                   const std::vector<int>& kernel_sizes,
+                   const std::vector<int>& strides,
+                   const std::vector<int>& paddings,
+                   const std::vector<int>& dilations,
+                   MetaTensor* out) {
+  auto in_dims = x.dims();
+
+  PADDLE_ENFORCE_EQ(
+      output_sizes.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected output_size equals to 2, but got size %d",
+          output_sizes.size()));
+  PADDLE_ENFORCE_EQ(
+      kernel_sizes.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected kernel_size equals to 2, but got size %d",
+          kernel_sizes.size()));
+  PADDLE_ENFORCE_EQ(
+      strides.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected strides_size equals to 2, but got size %d",
+          strides.size()));
+  PADDLE_ENFORCE_EQ(
+      paddings.size(),
+      4,
+      phi::errors::InvalidArgument(
+          "It is expected paddings_size equals to 4, but got size %d",
+          paddings.size()));
+
+  PADDLE_ENFORCE_EQ(
+      dilations.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected dilations_size equals to 2, but got size %d",
+          dilations.size()));
+
+  int output_height = output_sizes[0];
+  int output_width = output_sizes[1];
+  int kernel_height = kernel_sizes[0];
+  int kernel_width = kernel_sizes[1];
+  int dilation_height = dilations[0];
+  int dilation_width = dilations[1];
+  int stride_height = strides[0];
+  int stride_width = strides[1];
+
+  // check kernel_sizes
+  PADDLE_ENFORCE_GT(kernel_height,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `kernel_sizes` should be greater than zero, "
+                        "but received kernel_height: %d kernel_width: %d.",
+                        kernel_sizes[0],
+                        kernel_sizes[1]));
+  PADDLE_ENFORCE_GT(kernel_width,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `kernel_sizes` should be greater than zero, "
+                        "but received kernel_height: %d kernel_width: %d.",
+                        kernel_sizes[0],
+                        kernel_sizes[1]));
+  // check strides
+  PADDLE_ENFORCE_GT(stride_height,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `strides` should be greater than zero, "
+                        "but received strides_height: %d strides_width: %d.",
+                        strides[0],
+                        strides[1]));
+  PADDLE_ENFORCE_GT(stride_width,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `strides` should be greater than zero, "
+                        "but received strides_height: %d strides_width: %d.",
+                        strides[0],
+                        strides[1]));
+  // check dilations
+  PADDLE_ENFORCE_GT(output_height,
+                    1,
+                    phi::errors::InvalidArgument(
+                        "The `output_height` should be greater than one, "
+                        "but received output_height: %d .",
+                        output_height));
+  PADDLE_ENFORCE_GT(output_width,
+                    1,
+                    phi::errors::InvalidArgument(
+                        "The `output_width` should be greater than one, "
+                        "but received output_width: %d .",
+                        output_width));
+  // check output size
+  PADDLE_ENFORCE_GT(
+      dilation_height,
+      0,
+      phi::errors::InvalidArgument(
+          "The `dilations` should be greater than zero, "
+          "but received dilations_height: %d dilations_width: %d.",
+          dilations[0],
+          dilations[1]));
+  PADDLE_ENFORCE_GT(
+      dilation_width,
+      0,
+      phi::errors::InvalidArgument(
+          "The `dilations` should be greater than zero, "
+          "but received dilations_height: %d dilations_width: %d.",
+          dilations[0],
+          dilations[1]));
+
+  std::vector<int> out_dims;
+  // batch_size
+  out_dims.push_back(in_dims[0]);
+  // output_plane
+  int output_channels = in_dims[1] / (kernel_width * kernel_height);
+  out_dims.push_back(output_channels);
+
+  int blocks_height = (output_sizes[0] + 2 * paddings[0] -
+                       (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
+                          strides[0] +
+                      1;
+  int blocks_width = (output_sizes[1] + 2 * paddings[1] -
+                      (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
+                         strides[1] +
+                     1;
+
+  // check output height and width
+  PADDLE_ENFORCE_GT(
+      blocks_height,
+      0,
+      phi::errors::InvalidArgument(
+          "The sliding blocks calculated from input spatial size (%d, %d), "
+          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
+          "is (%d, %d), which should be a positive integer.",
+          in_dims[2],
+          in_dims[3],
+          kernel_sizes[0],
+          kernel_sizes[1],
+          strides[0],
+          strides[1],
+          dilations[0],
+          dilations[1],
+          output_height,
+          output_width));
+
+  PADDLE_ENFORCE_GT(
+      blocks_width,
+      0,
+      phi::errors::InvalidArgument(
+          "The sliding blocks calculated from input spatial size (%d, %d), "
+          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
+          "is (%d, %d), which should be a positive integer.",
+          in_dims[2],
+          in_dims[3],
+          kernel_sizes[0],
+          kernel_sizes[1],
+          strides[0],
+          strides[1],
+          dilations[0],
+          dilations[1],
+          output_height,
+          output_width));
+
+  PADDLE_ENFORCE_EQ(
+      blocks_height * blocks_width,
+      in_dims[2],
+      phi::errors::InvalidArgument(
+          "Given input output_size (%d, %d), "
+          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
+          "which should be expected size of input's dimension "
+          "2 to match the calculated number of %d * %d = %d, but got %d",
+          output_height,
+          output_width,
+          kernel_sizes[0],
+          kernel_sizes[1],
+          strides[0],
+          strides[1],
+          dilations[0],
+          dilations[1],
+          blocks_height,
+          blocks_width,
+          blocks_height * blocks_width,
+          in_dims[2]));
+
+  PADDLE_ENFORCE_EQ(
+      in_dims[1] % (kernel_sizes[0] * kernel_sizes[1]),
+      0,
+      phi::errors::InvalidArgument(
+          "Expected size of input's dimension 1 to be divisible by the"
+          "product of kernel_size, but got input.size(1)=%d and "
+          "kernel_size=( %d"
+          ", %d).",
+          in_dims[1],
+          kernel_sizes[0],
+          kernel_sizes[1]));
+
+  out_dims.push_back(output_height);
+  out_dims.push_back(output_width);
+  if (out != nullptr) {
+    out->set_dims(phi::make_ddim(out_dims));
+    out->set_dtype(x.dtype());
+  }
+}
+
 }  // namespace phi
 
 PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta);
diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
index bd35855a431..5a8bcfb2b70 100644
--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -583,4 +583,12 @@ void ChannelShuffleInferMeta(const MetaTensor& x,
 
 void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
 
+void FoldInferMeta(const MetaTensor& x,
+                   const std::vector<int>& output_sizes,
+                   const std::vector<int>& kernel_sizes,
+                   const std::vector<int>& strides,
+                   const std::vector<int>& paddings,
+                   const std::vector<int>& dilations,
+                   MetaTensor* out);
+
 }  // namespace phi
diff --git a/paddle/phi/kernels/cpu/fold_grad_kernel.cc b/paddle/phi/kernels/cpu/fold_grad_kernel.cc
new file mode 100644
index 00000000000..0c3f1dda03e
--- /dev/null
+++ b/paddle/phi/kernels/cpu/fold_grad_kernel.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/fold_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/fold_grad_kernel_impl.h"
+
+PD_REGISTER_KERNEL(
+    fold_grad, CPU, ALL_LAYOUT, phi::FoldGradKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/fold_kernel.cc b/paddle/phi/kernels/cpu/fold_kernel.cc
new file mode 100644
index 00000000000..e22ac4c771e
--- /dev/null
+++ b/paddle/phi/kernels/cpu/fold_kernel.cc
@@ -0,0 +1,21 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/fold_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/fold_kernel_impl.h"
+
+PD_REGISTER_KERNEL(fold, CPU, ALL_LAYOUT, phi::FoldKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/unpool_grad_kernel.cc b/paddle/phi/kernels/cpu/unpool_grad_kernel.cc
new file mode 100644
index 00000000000..e09082f7ba8
--- /dev/null
+++ b/paddle/phi/kernels/cpu/unpool_grad_kernel.cc
@@ -0,0 +1,137 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/unpool_grad_kernel.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void UnpoolGradKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      const DenseTensor& indices,
+                      const DenseTensor& out,
+                      const DenseTensor& out_grad,
+                      const std::vector<int>& ksize,
+                      const std::vector<int>& strides,
+                      const std::vector<int>& paddings,
+                      const std::vector<int>& output_size,
+                      const std::string& data_format,
+                      DenseTensor* x_grad) {
+  T* input_grad_data = dev_ctx.template Alloc<T>(x_grad);
+  const T* output_grad_data = out_grad.data<T>();
+  phi::funcs::SetConstant<Context, T> zero;
+  zero(dev_ctx, x_grad, static_cast<T>(0));
+  const int batch_size = x.dims()[0];
+  const int input_height = x.dims()[2];
+  const int input_width = x.dims()[3];
+  const int output_channels = out.dims()[1];
+  const int output_height = out.dims()[2];
+  const int output_width = out.dims()[3];
+  int input_feasize = input_height * input_width;
+  int output_feasize = output_height * output_width;
+  const int* indices_data = indices.data<int>();
+
+  for (int b = 0; b < batch_size; ++b) {
+    for (int c = 0; c < output_channels; ++c) {
+      for (int i = 0; i < input_feasize; ++i) {
+        int index = indices_data[i];
+        PADDLE_ENFORCE_LT(
+            index,
+            output_feasize,
+            phi::errors::InvalidArgument(
+                "index should less than output tensor height * output tensor "
+                "width. Expected %ld < %ld, but got "
+                "%ld >= %ld. Please check input value.",
+                index,
+                output_feasize,
+                index,
+                output_feasize));
+        input_grad_data[i] = output_grad_data[index];
+      }
+      input_grad_data += input_feasize;
+      indices_data += input_feasize;
+      output_grad_data += output_feasize;
+    }
+  }
+}
+
+template <typename T, typename Context>
+void Unpool3dGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& indices,
+                        const DenseTensor& out,
+                        const DenseTensor& out_grad,
+                        const std::vector<int>& ksize,
+                        const std::vector<int>& strides,
+                        const std::vector<int>& paddings,
+                        const std::vector<int>& output_size,
+                        const std::string& data_format,
+                        DenseTensor* x_grad) {
+  T* input_grad_data = dev_ctx.template Alloc<T>(x_grad);
+  const T* output_grad_data = out_grad.data<T>();
+  phi::funcs::SetConstant<Context, T> zero;
+  zero(dev_ctx, x_grad, static_cast<T>(0));
+
+  const int batch_size = x.dims()[0];
+  const int input_depth = x.dims()[2];
+  const int input_height = x.dims()[3];
+  const int input_width = x.dims()[4];
+  const int output_channels = out.dims()[1];
+  const int output_depth = out.dims()[2];
+  const int output_height = out.dims()[3];
+  const int output_width = out.dims()[4];
+  int input_feasize = input_depth * input_height * input_width;
+  int output_feasize = output_depth * output_height * output_width;
+  const int* indices_data = indices.data<int>();
+
+  for (int b = 0; b < batch_size; ++b) {
+    for (int c = 0; c < output_channels; ++c) {
+      for (int i = 0; i < input_feasize; ++i) {
+        int index = indices_data[i];
+        PADDLE_ENFORCE_LT(
+            index,
+            output_feasize,
+            phi::errors::InvalidArgument(
+                "index should less than output tensor depth * output tensor "
+                "height "
+                "* output tensor width. Expected %ld < %ld, but got "
+                "%ld >= %ld. Please check input value.",
+                index,
+                output_feasize,
+                index,
+                output_feasize));
+        input_grad_data[i] = output_grad_data[index];
+      }
+      input_grad_data += input_feasize;
+      indices_data += input_feasize;
+      output_grad_data += output_feasize;
+    }
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    unpool_grad, CPU, ALL_LAYOUT, phi::UnpoolGradKernel, float, double) {}
+
+PD_REGISTER_KERNEL(
+    unpool3d_grad, CPU, ALL_LAYOUT, phi::Unpool3dGradKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/unpool_kernel.cc b/paddle/phi/kernels/cpu/unpool_kernel.cc
new file mode 100644
index 00000000000..3ec0c622234
--- /dev/null
+++ b/paddle/phi/kernels/cpu/unpool_kernel.cc
@@ -0,0 +1,132 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/unpool_kernel.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void UnpoolKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& indices,
+                  const std::vector<int>& ksize,
+                  const std::vector<int>& strides,
+                  const std::vector<int>& paddings,
+                  const std::vector<int>& output_size,
+                  const std::string& data_format,
+                  DenseTensor* out) {
+  T* output_data = dev_ctx.template Alloc<T>(out);
+  if (output_data) {
+    phi::funcs::SetConstant<Context, T> set_zero;
+    set_zero(dev_ctx, out, static_cast<T>(0));
+  }
+  const int batch_size = x.dims()[0];
+  const int input_height = x.dims()[2];
+  const int input_width = x.dims()[3];
+  const int output_channels = out->dims()[1];
+  const int output_height = out->dims()[2];
+  const int output_width = out->dims()[3];
+  int input_feasize = input_height * input_width;
+  int output_feasize = output_height * output_width;
+  const T* input_data = x.data<T>();
+  const int* indices_data = indices.data<int>();
+  for (int b = 0; b < batch_size; ++b) {
+    for (int c = 0; c < output_channels; ++c) {
+      for (int i = 0; i < input_feasize; ++i) {
+        int index = indices_data[i];
+        PADDLE_ENFORCE_LT(
+            index,
+            output_feasize,
+            phi::errors::InvalidArgument(
+                "index should less than output tensor height * output tensor "
+                "width. Expected %ld < %ld, but got "
+                "%ld >= %ld. Please check input value.",
+                index,
+                output_feasize,
+                index,
+                output_feasize));
+        output_data[index] = input_data[i];
+      }
+      input_data += input_feasize;
+      indices_data += input_feasize;
+      output_data += output_feasize;
+    }
+  }
+}
+
+template <typename T, typename Context>
+void Unpool3dKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& indices,
+                    const std::vector<int>& ksize,
+                    const std::vector<int>& strides,
+                    const std::vector<int>& paddings,
+                    const std::vector<int>& output_size,
+                    const std::string& data_format,
+                    DenseTensor* out) {
+  T* output_data = dev_ctx.template Alloc<T>(out);
+  if (output_data) {
+    phi::funcs::SetConstant<Context, T> set_zero;
+    set_zero(dev_ctx, out, static_cast<T>(0));
+  }
+  const int batch_size = x.dims()[0];
+  const int input_depth = x.dims()[2];
+  const int input_height = x.dims()[3];
+  const int input_width = x.dims()[4];
+  const int output_channels = out->dims()[1];
+  const int output_depth = out->dims()[2];
+  const int output_height = out->dims()[3];
+  const int output_width = out->dims()[4];
+  int input_feasize = input_depth * input_height * input_width;
+  int output_feasize = output_depth * output_height * output_width;
+  const T* input_data = x.data<T>();
+  const int* indices_data = indices.data<int>();
+  for (int b = 0; b < batch_size; ++b) {
+    for (int c = 0; c < output_channels; ++c) {
+      for (int i = 0; i < input_feasize; ++i) {
+        int index = indices_data[i];
+        PADDLE_ENFORCE_LT(
+            index,
+            output_feasize,
+            phi::errors::InvalidArgument(
+                "index should less than output tensor depth * output tensor "
+                "height "
+                "* output tensor width. Expected %ld < %ld, but got "
+                "%ld >= %ld. Please check input value.",
+                index,
+                output_feasize,
+                index,
+                output_feasize));
+        output_data[index] = input_data[i];
+      }
+      input_data += input_feasize;
+      indices_data += input_feasize;
+      output_data += output_feasize;
+    }
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(unpool, CPU, ALL_LAYOUT, phi::UnpoolKernel, float, double) {}
+
+PD_REGISTER_KERNEL(
+    unpool3d, CPU, ALL_LAYOUT, phi::Unpool3dKernel, float, double) {}
diff --git a/paddle/phi/kernels/fold_grad_kernel.h b/paddle/phi/kernels/fold_grad_kernel.h
new file mode 100644
index 00000000000..2e8614484aa
--- /dev/null
+++ b/paddle/phi/kernels/fold_grad_kernel.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void FoldGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out_grad,
+                    const std::vector<int>& output_sizes,
+                    const std::vector<int>& kernel_sizes,
+                    const std::vector<int>& strides,
+                    const std::vector<int>& paddings,
+                    const std::vector<int>& dilations,
+                    DenseTensor* x_grad);
+}
diff --git a/paddle/phi/kernels/fold_kernel.h b/paddle/phi/kernels/fold_kernel.h
new file mode 100644
index 00000000000..3fd6281b2cc
--- /dev/null
+++ b/paddle/phi/kernels/fold_kernel.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void FoldKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                const std::vector<int>& output_sizes,
+                const std::vector<int>& kernel_sizes,
+                const std::vector<int>& strides,
+                const std::vector<int>& paddings,
+                const std::vector<int>& dilations,
+                DenseTensor* out);
+}
diff --git a/paddle/phi/kernels/gpu/fold_grad_kernel.cu b/paddle/phi/kernels/gpu/fold_grad_kernel.cu
new file mode 100644
index 00000000000..ad469dd7981
--- /dev/null
+++ b/paddle/phi/kernels/gpu/fold_grad_kernel.cu
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/fold_grad_kernel.h"
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/fold_grad_kernel_impl.h"
+
+PD_REGISTER_KERNEL(
+    fold_grad, GPU, ALL_LAYOUT, phi::FoldGradKernel, float, double) {}
diff --git a/paddle/phi/kernels/gpu/fold_kernel.cu b/paddle/phi/kernels/gpu/fold_kernel.cu
new file mode 100644
index 00000000000..b53ef402150
--- /dev/null
+++ b/paddle/phi/kernels/gpu/fold_kernel.cu
@@ -0,0 +1,21 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/fold_kernel.h"
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/fold_kernel_impl.h"
+
+PD_REGISTER_KERNEL(fold, GPU, ALL_LAYOUT, phi::FoldKernel, float, double) {}
diff --git a/paddle/phi/kernels/gpu/unpool_grad_kernel.cu b/paddle/phi/kernels/gpu/unpool_grad_kernel.cu
new file mode 100644
index 00000000000..24d4193ed6c
--- /dev/null
+++ b/paddle/phi/kernels/gpu/unpool_grad_kernel.cu
@@ -0,0 +1,203 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/unpool_grad_kernel.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace phi {
+
+template <typename T>
+__global__ void KernelUnpool2dMaxGrad(const int nthreads,
+                                      const T* input_data,
+                                      const int* indices_data,
+                                      const int input_height,
+                                      const int input_width,
+                                      const int channels,
+                                      const T* output_data,
+                                      const T* output_grad,
+                                      const int output_height,
+                                      const int output_width,
+                                      T* input_grad) {
+  CUDA_KERNEL_LOOP(linearIndex, nthreads) {
+    int c = (linearIndex / input_width / input_height) % channels;
+    int n = linearIndex / input_width / input_height / channels;
+    output_grad += (n * channels + c) * output_height * output_width;
+    int maxind = indices_data[linearIndex];
+    input_grad[linearIndex] = output_grad[maxind];
+  }
+}
+
+template <typename T>
+__global__ void KernelUnpool3dMaxGrad(const int nthreads,
+                                      const T* input_data,
+                                      const int* indices_data,
+                                      const int input_depth,
+                                      const int input_height,
+                                      const int input_width,
+                                      const int channels,
+                                      const T* output_data,
+                                      const T* output_grad,
+                                      const int output_depth,
+                                      const int output_height,
+                                      const int output_width,
+                                      T* input_grad) {
+  CUDA_KERNEL_LOOP(linearIndex, nthreads) {
+    int c = (linearIndex / input_depth / input_width / input_height) % channels;
+    int n = linearIndex / input_depth / input_width / input_height / channels;
+    output_grad +=
+        (n * channels + c) * output_depth * output_height * output_width;
+    int maxind = indices_data[linearIndex];
+    input_grad[linearIndex] = output_grad[maxind];
+  }
+}
+
+template <typename T, typename Context>
+class Unpool2dMaxGradFunctor {
+ public:
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor& input,
+                  const DenseTensor& indices,
+                  const DenseTensor& output,
+                  const DenseTensor& output_grad,
+                  DenseTensor* input_grad) {
+    const int batch_size = input.dims()[0];
+    const int input_height = input.dims()[2];
+    const int input_width = input.dims()[3];
+    const int output_channels = output.dims()[1];
+    const int output_height = output.dims()[2];
+    const int output_width = output.dims()[3];
+    const T* input_data = input.data<T>();
+    const int* indices_data = indices.data<int>();
+    const T* output_data = output.data<T>();
+    const T* output_grad_data = output_grad.data<T>();
+    T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
+#ifdef __HIPCC__
+    int threads = 256;
+#else
+    int threads = 1024;
+#endif
+    int grid = (input.numel() + threads - 1) / threads;
+    KernelUnpool2dMaxGrad<T>
+        <<<grid, threads, 0, dev_ctx.stream()>>>(input.numel(),
+                                                 input_data,
+                                                 indices_data,
+                                                 input_height,
+                                                 input_width,
+                                                 output_channels,
+                                                 output_data,
+                                                 output_grad_data,
+                                                 output_height,
+                                                 output_width,
+                                                 input_grad_data);
+  }
+};
+
+template <typename T, typename Context>
+class Unpool3dMaxGradFunctor {
+ public:
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor& input,
+                  const DenseTensor& indices,
+                  const DenseTensor& output,
+                  const DenseTensor& output_grad,
+                  DenseTensor* input_grad) {
+    const int batch_size = input.dims()[0];
+    const int input_depth = input.dims()[2];
+    const int input_height = input.dims()[3];
+    const int input_width = input.dims()[4];
+    const int output_channels = output.dims()[1];
+    const int output_depth = output.dims()[2];
+    const int output_height = output.dims()[3];
+    const int output_width = output.dims()[4];
+    const T* input_data = input.data<T>();
+    const int* indices_data = indices.data<int>();
+    const T* output_data = output.data<T>();
+    const T* output_grad_data = output_grad.data<T>();
+    T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
+#ifdef __HIPCC__
+    int threads = 256;
+#else
+    int threads = 1024;
+#endif
+    int grid = (input.numel() + threads - 1) / threads;
+    KernelUnpool3dMaxGrad<T>
+        <<<grid, threads, 0, dev_ctx.stream()>>>(input.numel(),
+                                                 input_data,
+                                                 indices_data,
+                                                 input_depth,
+                                                 input_height,
+                                                 input_width,
+                                                 output_channels,
+                                                 output_data,
+                                                 output_grad_data,
+                                                 output_depth,
+                                                 output_height,
+                                                 output_width,
+                                                 input_grad_data);
+  }
+};
+
+template <typename T, typename Context>
+void UnpoolGradKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      const DenseTensor& indices,
+                      const DenseTensor& out,
+                      const DenseTensor& out_grad,
+                      const std::vector<int>& ksize,
+                      const std::vector<int>& strides,
+                      const std::vector<int>& paddings,
+                      const std::vector<int>& output_size,
+                      const std::string& data_format,
+                      DenseTensor* x_grad) {
+  T* input_grad_data = dev_ctx.template Alloc<T>(x_grad);
+  const T* output_grad_data = out_grad.data<T>();
+  phi::funcs::SetConstant<Context, T> zero;
+  zero(dev_ctx, x_grad, static_cast<T>(0));
+  Unpool2dMaxGradFunctor<T, Context> unpool2d_max_backward;
+  unpool2d_max_backward(dev_ctx, x, indices, out, out_grad, x_grad);
+}
+
+template <typename T, typename Context>
+void Unpool3dGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& indices,
+                        const DenseTensor& out,
+                        const DenseTensor& out_grad,
+                        const std::vector<int>& ksize,
+                        const std::vector<int>& strides,
+                        const std::vector<int>& paddings,
+                        const std::vector<int>& output_size,
+                        const std::string& data_format,
+                        DenseTensor* x_grad) {
+  T* input_grad_data = dev_ctx.template Alloc<T>(x_grad);
+  const T* output_grad_data = out_grad.data<T>();
+  phi::funcs::SetConstant<Context, T> zero;
+  zero(dev_ctx, x_grad, static_cast<T>(0));
+  Unpool3dMaxGradFunctor<T, Context> unpool3d_max_backward;
+  unpool3d_max_backward(dev_ctx, x, indices, out, out_grad, x_grad);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    unpool_grad, GPU, ALL_LAYOUT, phi::UnpoolGradKernel, float, double) {}
+
+PD_REGISTER_KERNEL(
+    unpool3d_grad, GPU, ALL_LAYOUT, phi::Unpool3dGradKernel, float, double) {}
diff --git a/paddle/phi/kernels/gpu/unpool_kernel.cu b/paddle/phi/kernels/gpu/unpool_kernel.cu
new file mode 100644
index 00000000000..c9ded2fd822
--- /dev/null
+++ b/paddle/phi/kernels/gpu/unpool_kernel.cu
@@ -0,0 +1,188 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/unpool_kernel.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace phi {
+
+template <typename T>
+__global__ void KernelUnpool2dMax(const int nthreads,
+                                  const T* input_data,
+                                  const int* indices_data,
+                                  const int input_height,
+                                  const int input_width,
+                                  const int channels,
+                                  T* output_data,
+                                  const int output_height,
+                                  const int output_width){
+    CUDA_KERNEL_LOOP(linearIndex, nthreads){
+        int c = (linearIndex / input_width / input_height) % channels;
+int n = linearIndex / input_width / input_height / channels;
+output_data += (n * channels + c) * output_height * output_width;
+int maxind = indices_data[linearIndex];
+output_data[maxind] = input_data[linearIndex];
+}  // namespace phi
+}
+;
+
+template <typename T>
+__global__ void KernelUnpool3dMax(
+    const int nthreads,
+    const T* input_data,
+    const int* indices_data,
+    const int input_depth,
+    const int input_height,
+    const int input_width,
+    const int channels,
+    T* output_data,
+    const int output_depth,
+    const int output_height,
+    const int output_width){CUDA_KERNEL_LOOP(linearIndex, nthreads){
+    int c = (linearIndex / input_depth / input_width / input_height) % channels;
+int n = linearIndex / input_depth / input_width / input_height / channels;
+output_data += (n * channels + c) * output_depth * output_height * output_width;
+int maxind = indices_data[linearIndex];
+output_data[maxind] = input_data[linearIndex];
+}
+}
+;
+
+template <typename T, typename Context>
+class Unpool2dMaxFunctor {
+ public:
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor& input,
+                  const DenseTensor& indices,
+                  DenseTensor* output) {
+    const int batch_size = input.dims()[0];
+    const int input_height = input.dims()[2];
+    const int input_width = input.dims()[3];
+    const int output_channels = output->dims()[1];
+    const int output_height = output->dims()[2];
+    const int output_width = output->dims()[3];
+    const T* input_data = input.data<T>();
+    const int* indices_data = indices.data<int>();
+    T* output_data = dev_ctx.template Alloc<T>(output);
+#ifdef __HIPCC__
+    int threads = 256;
+#else
+    int threads = 1024;
+#endif
+    int grid = (input.numel() + threads - 1) / threads;
+    KernelUnpool2dMax<T>
+        <<<grid, threads, 0, dev_ctx.stream()>>>(input.numel(),
+                                                 input_data,
+                                                 indices_data,
+                                                 input_height,
+                                                 input_width,
+                                                 output_channels,
+                                                 output_data,
+                                                 output_height,
+                                                 output_width);
+  }
+};
+
+template <typename T, typename Context>
+class Unpool3dMaxFunctor {
+ public:
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor& input,
+                  const DenseTensor& indices,
+                  DenseTensor* output) {
+    const int batch_size = input.dims()[0];
+    const int input_depth = input.dims()[2];
+    const int input_height = input.dims()[3];
+    const int input_width = input.dims()[4];
+    const int output_channels = output->dims()[1];
+    const int output_depth = output->dims()[2];
+    const int output_height = output->dims()[3];
+    const int output_width = output->dims()[4];
+    const T* input_data = input.data<T>();
+    const int* indices_data = indices.data<int>();
+    T* output_data = dev_ctx.template Alloc<T>(output);
+#ifdef __HIPCC__
+    int threads = 256;
+#else
+    int threads = 1024;
+#endif
+    int grid = (input.numel() + threads - 1) / threads;
+    KernelUnpool3dMax<T>
+        <<<grid, threads, 0, dev_ctx.stream()>>>(input.numel(),
+                                                 input_data,
+                                                 indices_data,
+                                                 input_depth,
+                                                 input_height,
+                                                 input_width,
+                                                 output_channels,
+                                                 output_data,
+                                                 output_depth,
+                                                 output_height,
+                                                 output_width);
+  }
+};
+
+template <typename T, typename Context>
+void UnpoolKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& indices,
+                  const std::vector<int>& ksize,
+                  const std::vector<int>& strides,
+                  const std::vector<int>& paddings,
+                  const std::vector<int>& output_size,
+                  const std::string& data_format,
+                  DenseTensor* out) {
+  T* output_data = dev_ctx.template Alloc<T>(out);
+  if (output_data) {
+    phi::funcs::SetConstant<Context, T> set_zero;
+    set_zero(dev_ctx, out, static_cast<T>(0));
+  }
+
+  Unpool2dMaxFunctor<T, Context> unpool2d_max_forward;
+  unpool2d_max_forward(dev_ctx, x, indices, out);
+}
+
+template <typename T, typename Context>
+void Unpool3dKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& indices,
+                    const std::vector<int>& ksize,
+                    const std::vector<int>& strides,
+                    const std::vector<int>& paddings,
+                    const std::vector<int>& output_size,
+                    const std::string& data_format,
+                    DenseTensor* out) {
+  T* output_data = dev_ctx.template Alloc<T>(out);
+  if (output_data) {
+    phi::funcs::SetConstant<Context, T> set_zero;
+    set_zero(dev_ctx, out, static_cast<T>(0));
+  }
+
+  Unpool3dMaxFunctor<T, Context> unpool3d_max_forward;
+  unpool3d_max_forward(dev_ctx, x, indices, out);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    unpool, GPU, ALL_LAYOUT, phi::UnpoolKernel, int, float, double) {}
+
+PD_REGISTER_KERNEL(
+    unpool3d, GPU, ALL_LAYOUT, phi::Unpool3dKernel, int, float, double) {}
diff --git a/paddle/phi/kernels/impl/fold_grad_kernel_impl.h b/paddle/phi/kernels/impl/fold_grad_kernel_impl.h
new file mode 100644
index 00000000000..b9320eab850
--- /dev/null
+++ b/paddle/phi/kernels/impl/fold_grad_kernel_impl.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "paddle/fluid/operators/math/im2col.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/unfold_functor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void FoldGradKernel(const Context& ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out_grad,
+                    const std::vector<int>& output_sizes,
+                    const std::vector<int>& kernel_sizes,
+                    const std::vector<int>& strides,
+                    const std::vector<int>& paddings,
+                    const std::vector<int>& dilations,
+                    DenseTensor* x_grad) {
+  ctx.template Alloc<T>(x_grad);
+
+  if (!x_grad) return;
+
+  const auto& x_dims = x_grad->dims();
+  const int batch_size = static_cast<int>(x_dims[0]);
+
+  int output_height = (output_sizes[0] + 2 * paddings[0] -
+                       (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
+                          strides[0] +
+                      1;
+  int output_width = (output_sizes[1] + 2 * paddings[1] -
+                      (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
+                         strides[1] +
+                     1;
+
+  int n_input_plane = x_dims[1];
+  int n_output_plane = n_input_plane / (kernel_sizes[0] * kernel_sizes[1]);
+
+  DDim out_shape =
+      make_ddim({n_output_plane, output_sizes[0], output_sizes[1]});
+  DDim input_matrix_shape = make_ddim({x_dims[0],
+                                       kernel_sizes[0],
+                                       kernel_sizes[1],
+                                       output_height,
+                                       output_width});
+
+  paddle::operators::math::
+      Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
+          im2col;
+
+  for (int i = 0; i < batch_size; i++) {
+    DenseTensor out_grad_batch = out_grad.Slice(i, i + 1).Resize(out_shape);
+    DenseTensor x_grad_batch =
+        x_grad->Slice(i, i + 1).Resize(input_matrix_shape);
+    im2col(ctx, out_grad_batch, dilations, strides, paddings, &x_grad_batch);
+  }
+}
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/impl/fold_kernel_impl.h b/paddle/phi/kernels/impl/fold_kernel_impl.h
new file mode 100644
index 00000000000..415beca7bd9
--- /dev/null
+++ b/paddle/phi/kernels/impl/fold_kernel_impl.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "paddle/fluid/operators/math/im2col.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/unfold_functor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void FoldKernel(const Context& ctx,
+                const DenseTensor& x,
+                const std::vector<int>& output_sizes,
+                const std::vector<int>& kernel_sizes,
+                const std::vector<int>& strides,
+                const std::vector<int>& paddings,
+                const std::vector<int>& dilations,
+                DenseTensor* out) {
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  ctx.template Alloc<T>(out);
+
+  paddle::operators::math::
+      Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
+          col2im;
+  const auto& x_dims = x.dims();
+
+  int output_height = (output_sizes[0] + 2 * paddings[0] -
+                       (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
+                          strides[0] +
+                      1;
+  int output_width = (output_sizes[1] + 2 * paddings[1] -
+                      (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
+                         strides[1] +
+                     1;
+
+  int n_input_plane = x_dims[1];
+  int n_output_plane = n_input_plane / (kernel_sizes[0] * kernel_sizes[1]);
+
+  DDim output_shape =
+      make_ddim({n_output_plane, output_sizes[0], output_sizes[1]});
+
+  DDim input_matrix_shape = make_ddim({x_dims[0],
+                                       kernel_sizes[0],
+                                       kernel_sizes[1],
+                                       output_height,
+                                       output_width});
+
+  phi::funcs::SetConstant<Context, T> set_zero;
+  set_zero(ctx, out, static_cast<T>(0));
+
+  for (int i = 0; i < batch_size; i++) {
+    DenseTensor out_batch =
+        out->Slice(i, i + 1).Resize(output_shape);  // im size=3
+    DenseTensor in_batch =
+        x.Slice(i, i + 1).Resize(input_matrix_shape);  // col size=5
+    col2im(ctx, in_batch, dilations, strides, paddings, &out_batch);
+  }
+}
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/unpool_grad_kernel.h b/paddle/phi/kernels/unpool_grad_kernel.h
new file mode 100644
index 00000000000..a270d700a1c
--- /dev/null
+++ b/paddle/phi/kernels/unpool_grad_kernel.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void UnpoolGradKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      const DenseTensor& indices,
+                      const DenseTensor& out,
+                      const DenseTensor& out_grad,
+                      const std::vector<int>& ksize,
+                      const std::vector<int>& strides,
+                      const std::vector<int>& paddings,
+                      const std::vector<int>& output_size,
+                      const std::string& data_format,
+                      DenseTensor* x_grad);
+
+template <typename T, typename Context>
+void Unpool3dGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& indices,
+                        const DenseTensor& out,
+                        const DenseTensor& out_grad,
+                        const std::vector<int>& ksize,
+                        const std::vector<int>& strides,
+                        const std::vector<int>& paddings,
+                        const std::vector<int>& output_size,
+                        const std::string& data_format,
+                        DenseTensor* x_grad);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/unpool_kernel.h b/paddle/phi/kernels/unpool_kernel.h
new file mode 100644
index 00000000000..fb537f27667
--- /dev/null
+++ b/paddle/phi/kernels/unpool_kernel.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void UnpoolKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& indices,
+                  const std::vector<int>& ksize,
+                  const std::vector<int>& strides,
+                  const std::vector<int>& paddings,
+                  const std::vector<int>& output_size,
+                  const std::string& data_format,
+                  DenseTensor* out);
+
+template <typename T, typename Context>
+void Unpool3dKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& indices,
+                    const std::vector<int>& ksize,
+                    const std::vector<int>& strides,
+                    const std::vector<int>& paddings,
+                    const std::vector<int>& output_size,
+                    const std::string& data_format,
+                    DenseTensor* out);
+
+}  // namespace phi
diff --git a/paddle/phi/ops/compat/fold_sig.cc b/paddle/phi/ops/compat/fold_sig.cc
new file mode 100644
index 00000000000..ed8ac084ba0
--- /dev/null
+++ b/paddle/phi/ops/compat/fold_sig.cc
@@ -0,0 +1,26 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/core/compat/op_utils.h"
+
+namespace phi {
+
+KernelSignature FoldGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "fold_grad",
+      {"X", "Y@GRAD"},
+      {"output_sizes", "kernel_sizes", "strides", "paddings", "dilations"},
+      {"X@GRAD"});
+}
+
+}  // namespace phi
+
+PD_REGISTER_ARG_MAPPING_FN(fold_grad, phi::FoldGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/unpool3d_sig.cc b/paddle/phi/ops/compat/unpool3d_sig.cc
new file mode 100644
index 00000000000..c73aca837d5
--- /dev/null
+++ b/paddle/phi/ops/compat/unpool3d_sig.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/core/compat/op_utils.h"
+namespace phi {
+
+KernelSignature Unpool3dOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "unpool3d",
+      {"X", "Indices"},
+      {"ksize", "strides", "paddings", "output_size", "data_format"},
+      {"Out"});
+}
+
+KernelSignature Unpool3dGradOpArgumentMapping(
+    const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "unpool3d_grad",
+      {"X", "Indices", "Out", "Out@GRAD"},
+      {"ksize", "strides", "paddings", "output_size", "data_format"},
+      {"X@GRAD"});
+}
+
+}  // namespace phi
+PD_REGISTER_ARG_MAPPING_FN(unpool3d, phi::Unpool3dOpArgumentMapping);
+PD_REGISTER_ARG_MAPPING_FN(unpool3d_grad, phi::Unpool3dGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/unpool_sig.cc b/paddle/phi/ops/compat/unpool_sig.cc
new file mode 100644
index 00000000000..fb751b4b4b6
--- /dev/null
+++ b/paddle/phi/ops/compat/unpool_sig.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/core/compat/op_utils.h"
+namespace phi {
+
+KernelSignature UnpoolOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "unpool",
+      {"X", "Indices"},
+      {"ksize", "strides", "paddings", "output_size", "data_format"},
+      {"Out"});
+}
+
+KernelSignature UnpoolGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "unpool_grad",
+      {"X", "Indices", "Out", "Out@GRAD"},
+      {"ksize", "strides", "paddings", "output_size", "data_format"},
+      {"X@GRAD"});
+}
+
+}  // namespace phi
+PD_REGISTER_ARG_MAPPING_FN(unpool, phi::UnpoolOpArgumentMapping);
+PD_REGISTER_ARG_MAPPING_FN(unpool_grad, phi::UnpoolGradOpArgumentMapping);
diff --git a/python/paddle/fluid/tests/unittests/test_fold_op.py b/python/paddle/fluid/tests/unittests/test_fold_op.py
index a919cac6b7d..fc873cda95b 100644
--- a/python/paddle/fluid/tests/unittests/test_fold_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fold_op.py
@@ -91,13 +91,14 @@ class TestFoldOp(OpTest):
 
     def setUp(self):
         self.op_type = 'fold'
+        self.python_api = paddle.nn.functional.fold
         self.set_data()
 
     def test_check_output(self):
-        self.check_output()
+        self.check_output(check_eager=True)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Y')
+        self.check_grad(['X'], 'Y', check_eager=True)
 
 
 class TestFoldAPI(TestFoldOp):
@@ -106,6 +107,7 @@ class TestFoldAPI(TestFoldOp):
 
     def setUp(self):
         self.op_type = 'fold'
+        self.python_api = paddle.nn.functional.fold
         self.set_data()
         self.places = [fluid.CPUPlace()]
         if core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_pad_op.py b/python/paddle/fluid/tests/unittests/test_pad_op.py
index 30044fec755..15b33bc9700 100644
--- a/python/paddle/fluid/tests/unittests/test_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad_op.py
@@ -17,6 +17,7 @@ from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest
+import paddle
 import paddle.fluid.core as core
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_unpool3d_op.py b/python/paddle/fluid/tests/unittests/test_unpool3d_op.py
index 1fbff100a3d..ec04ca56a54 100644
--- a/python/paddle/fluid/tests/unittests/test_unpool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool3d_op.py
@@ -63,10 +63,30 @@ def unpool3dmax_forward_naive(input, indices, ksize, strides, paddings,
     return out
 
 
+def max_unpool3d_wrapper(x,
+                         indices,
+                         kernel_size,
+                         stride=None,
+                         padding=0,
+                         output_size=None,
+                         data_format="NCDHW",
+                         name=None):
+    out = paddle.nn.functional.max_unpool3d(x,
+                                            indices,
+                                            kernel_size,
+                                            stride=stride,
+                                            padding=padding,
+                                            data_format=data_format,
+                                            output_size=output_size,
+                                            name=name)
+    return out
+
+
 class TestUnpool3DOp(OpTest):
 
     def setUp(self):
         self.op_type = "unpool3d"
+        self.python_api = max_unpool3d_wrapper
         self.init_test_case()
         inputs = np.random.randint(0, 100, self.shape)
         nsize, csize, dsize, hsize, wsize = inputs.shape
@@ -102,10 +122,10 @@ class TestUnpool3DOp(OpTest):
         self.outputs = {'Out': output.astype('float64')}
 
     def test_check_output(self):
-        self.check_output()
+        self.check_output(check_eager=True)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out')
+        self.check_grad(['X'], 'Out', check_eager=True)
 
     def init_test_case(self):
         self.unpool3d_forward_naive = unpool3dmax_forward_naive
diff --git a/python/paddle/fluid/tests/unittests/test_unpool_op.py b/python/paddle/fluid/tests/unittests/test_unpool_op.py
index 1b6d3d9dfb7..07e44ce65d6 100644
--- a/python/paddle/fluid/tests/unittests/test_unpool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool_op.py
@@ -17,6 +17,7 @@ from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest
+import paddle
 
 
 def _unpool_output_size(x, kernel_size, stride, padding, output_size):
@@ -53,10 +54,30 @@ def unpool2dmax_forward_naive(input, indices, ksize, strides, paddings,
     return out
 
 
+def max_unpool2d_wrapper(x,
+                         indices,
+                         kernel_size,
+                         stride=None,
+                         padding=0,
+                         output_size=None,
+                         data_format="NCHW",
+                         name=None):
+    out = paddle.nn.functional.max_unpool2d(x,
+                                            indices,
+                                            kernel_size,
+                                            stride=stride,
+                                            padding=padding,
+                                            data_format=data_format,
+                                            output_size=output_size,
+                                            name=name)
+    return out
+
+
 class TestUnpoolOp(OpTest):
 
     def setUp(self):
         self.op_type = "unpool"
+        self.python_api = max_unpool2d_wrapper
         self.init_test_case()
         input = np.random.randint(0, 100, self.shape)
         nsize, csize, hsize, wsize = input.shape
@@ -91,10 +112,10 @@ class TestUnpoolOp(OpTest):
         self.outputs = {'Out': output.astype('float64')}
 
     def test_check_output(self):
-        self.check_output()
+        self.check_output(check_eager=True)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out')
+        self.check_grad(['X'], 'Out', check_eager=True)
 
     def init_test_case(self):
         self.unpool2d_forward_naive = unpool2dmax_forward_naive
@@ -127,7 +148,7 @@ class TestUnpoolOpOutputsize(TestUnpoolOp):
         self.ksize = [4, 4]
         self.strides = [2, 2]
         self.paddings = [0, 0]
-        self.output_size = [9, 9]
+        self.output_size = [12, 12]
 
 
 class TestUnpoolOpOutput(TestUnpoolOp):
@@ -139,7 +160,7 @@ class TestUnpoolOpOutput(TestUnpoolOp):
         self.ksize = [4, 4]
         self.strides = [2, 2]
         self.paddings = [0, 0]
-        self.output_size = [9, 9]
+        self.output_size = [12, 12]
 
 
 class TestUnpoolOpException(unittest.TestCase):
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index b5e34199aaf..90762e7e439 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -2103,7 +2103,10 @@ def fold(x,
             "Unexpected type of paddings, it should be either an integer or a list"
             "of 2 or 4 integers")
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        out = _C_ops.final_state_fold(x, output_sizes, kernel_sizes, strides,
+                                      paddings, dilations)
+    elif in_dynamic_mode():
         out = _C_ops.fold(x, "output_sizes", output_sizes, "kernel_sizes",
                           kernel_sizes, "strides", strides, "paddings",
                           paddings, "dilations", dilations)
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index 4bb53e1737b..c89c0aee50f 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -782,7 +782,11 @@ def max_unpool1d(x,
     output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                       output_size)
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        output = _C_ops.final_state_unpool(x, indices, kernel_size, stride,
+                                           padding, output_size, data_format)
+        return squeeze(output, [2])
+    elif in_dynamic_mode():
         output = _C_ops.unpool(x, indices, 'unpooling_type', 'max', 'ksize',
                                kernel_size, 'strides', stride, 'paddings',
                                padding, "output_size", output_size,
@@ -838,7 +842,6 @@ def max_unpool2d(x,
             it must contain an integer.
         stride (int|list|tuple): The unpool stride size. If unpool stride size is a tuple or list,
             it must contain an integer.
-        kernel_size (int|tuple): Size of the max unpooling window.
         padding (int | tuple): Padding that was added to the input.
         output_size(list|tuple, optional): The target output size. If output_size is not specified, 
                            the actual output shape will be automatically calculated by (input_shape,
@@ -898,7 +901,11 @@ def max_unpool2d(x,
     output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                       output_size)
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        output = _C_ops.final_state_unpool(x, indices, kernel_size, stride,
+                                           padding, output_size, data_format)
+
+    elif in_dynamic_mode():
         output = _C_ops.unpool(x, indices, 'unpooling_type', 'max', 'ksize',
                                kernel_size, 'strides', stride, 'paddings',
                                padding, "output_size", output_size,
@@ -1011,7 +1018,10 @@ def max_unpool3d(x,
     output_size = _unpool_output_size(x, kernel_size, stride, padding,
                                       output_size)
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        output = _C_ops.final_state_unpool3d(x, indices, kernel_size, stride,
+                                             padding, output_size, data_format)
+    elif in_dynamic_mode():
         output = _C_ops.unpool3d(x, indices, 'unpooling_type', 'max', 'ksize',
                                  kernel_size, 'strides', stride, 'paddings',
                                  padding, "output_size", output_size,
-- 
GitLab