From 445fce62f27d965a3c0ede195238bb9719ea2921 Mon Sep 17 00:00:00 2001
From: taixiurong <taixiurong@126.com>
Date: Fri, 2 Sep 2022 15:51:45 +0800
Subject: [PATCH] =?UTF-8?q?xpu-paddlepaddle-38=20[=E4=BB=BB=E5=8A=A1]=20?=
 =?UTF-8?q?=E8=BF=81=E7=A7=BBbilinear=5Finterp=EF=BC=8Cnearest=5Finterp?=
 =?UTF-8?q?=E5=88=B0phi=20test=3Dkunlun=20(#45608)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paddle/fluid/operators/interpolate_op_xpu.cc  | 292 ----------------
 .../fluid/operators/interpolate_v2_op_xpu.cc  | 324 ------------------
 .../phi/kernels/funcs/interpolate_function.h  |   8 +
 .../kernels/xpu/interpolate_grad_kernel.cc    | 236 +++++++++++++
 paddle/phi/kernels/xpu/interpolate_kernel.cc  | 234 +++++++++++++
 5 files changed, 478 insertions(+), 616 deletions(-)
 delete mode 100644 paddle/fluid/operators/interpolate_op_xpu.cc
 delete mode 100644 paddle/fluid/operators/interpolate_v2_op_xpu.cc
 create mode 100644 paddle/phi/kernels/xpu/interpolate_grad_kernel.cc
 create mode 100644 paddle/phi/kernels/xpu/interpolate_kernel.cc
diff --git a/paddle/fluid/operators/interpolate_op_xpu.cc b/paddle/fluid/operators/interpolate_op_xpu.cc
deleted file mode 100644
index 7f8fb5a23d3..00000000000
--- a/paddle/fluid/operators/interpolate_op_xpu.cc
+++ /dev/null
@@ -1,292 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-   http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/interpolate_op.h"
-
-#ifdef PADDLE_WITH_XPU
-
-namespace paddle {
-namespace operators {
-
-using framework::Tensor;
-using DataLayout = framework::DataLayout;
-
-inline std::vector<int> get_new_shape_xpu(
-    const std::vector<const Tensor*>& list_new_shape_tensor) {
-  // get tensor from
-  std::vector<int> vec_new_shape;
-  for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
-    auto tensor = list_new_shape_tensor[i];
-    PADDLE_ENFORCE_EQ(
-        tensor->dims(),
-        phi::make_ddim({1}),
-        platform::errors::InvalidArgument("shape of dim tensor should be [1]"));
-    if (platform::is_xpu_place(tensor->place())) {
-      framework::Tensor temp;
-      paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
-      vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
-    } else {
-      vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
-    }
-  }
-
-  return vec_new_shape;
-}
-
-template <typename T>
-inline std::vector<T> get_new_data_from_tensor_xpu(
-    const Tensor* new_data_tensor) {
-  std::vector<T> vec_new_data;
-  auto* new_data = new_data_tensor->data<T>();
-  framework::Tensor cpu_starts_tensor;
-  if (platform::is_xpu_place(new_data_tensor->place())) {
-    paddle::framework::TensorCopySync(
-        *new_data_tensor, platform::CPUPlace(), &cpu_starts_tensor);
-    new_data = cpu_starts_tensor.data<T>();
-  }
-  vec_new_data = std::vector<T>(new_data, new_data + new_data_tensor->numel());
-  return vec_new_data;
-}
-
-template <typename T>
-class InterpolateXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<Tensor>("X");
-    auto* output = ctx.Output<Tensor>("Out");
-
-    auto input_dims = input->dims();
-    PADDLE_ENFORCE_EQ(
-        input_dims.size(),
-        4,
-        platform::errors::External("XPU Interpolate kernel only support 2d"));
-
-    const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
-    const DataLayout data_layout =
-        framework::StringToDataLayout(data_layout_str);
-    int n, c, in_d, in_h, in_w;
-    ExtractNCDWH(input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w);
-
-    auto interp_method = ctx.Attr<std::string>("interp_method");
-    bool align_corners = ctx.Attr<bool>("align_corners");
-    int align_mode = ctx.Attr<int>("align_mode");
-
-    int out_h = ctx.Attr<int>("out_h");
-    int out_w = ctx.Attr<int>("out_w");
-
-    auto list_new_size_tensor = ctx.MultiInput<framework::Tensor>("SizeTensor");
-    if (list_new_size_tensor.size() > 0) {
-      // have size tensor
-      auto new_size = get_new_shape_xpu(list_new_size_tensor);
-      out_h = new_size[0];
-      out_w = new_size[1];
-    } else {
-      float scale;
-      auto scale_tensor = ctx.Input<Tensor>("Scale");
-      if (scale_tensor != nullptr) {
-        auto scale_data = get_new_data_from_tensor_xpu<float>(scale_tensor);
-        scale = scale_data[0];
-      } else {
-        scale = ctx.Attr<float>("scale");
-      }
-      if (scale > 0) {
-        out_h = static_cast<int>(in_h * scale);
-        out_w = static_cast<int>(in_w * scale);
-      }
-      auto out_size = ctx.Input<Tensor>("OutSize");
-      if (out_size != nullptr) {
-        auto out_size_data = get_new_data_from_tensor_xpu<int>(out_size);
-        out_h = out_size_data[0];
-        out_w = out_size_data[1];
-      }
-    }
-    PADDLE_ENFORCE_GT(
-        out_h,
-        0,
-        platform::errors::InvalidArgument("out_h in Attr(out_shape) of "
-                                          "Op(interpolate) "
-                                          "should be greater than 0."));
-    PADDLE_ENFORCE_GT(
-        out_w,
-        0,
-        platform::errors::InvalidArgument("out_w in Attr(out_shape) of "
-                                          "Op(interpolate) "
-                                          "should be greater than 0."));
-    framework::DDim dim_out;
-    if (data_layout == DataLayout::kNCHW) {
-      dim_out = {n, c, out_h, out_w};
-    } else {
-      dim_out = {n, out_h, out_w, c};
-    }
-    output->mutable_data<T>(dim_out, ctx.GetPlace());
-
-    if (in_h == out_h && in_w == out_w) {
-      framework::TensorCopy(*input, ctx.GetPlace(), output);
-      return;
-    }
-    bool nearest = "nearest" == interp_method;
-    int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
-    auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
-    if (nearest) {
-      PADDLE_ENFORCE_EQ((data_layout == DataLayout::kNCHW),
-                        true,
-                        platform::errors::InvalidArgument(
-                            "XPU nearest is only support NCHW"));
-    }
-    int r = xpu::interpolate2d<float>(dev_ctx.x_context(),
-                                      input->data<float>(),
-                                      output->data<float>(),
-                                      n,
-                                      c,
-                                      in_h,
-                                      in_w,
-                                      out_h,
-                                      out_w,
-                                      nearest,
-                                      trans_mode,
-                                      (data_layout == DataLayout::kNCHW));
-    PADDLE_ENFORCE_EQ(r,
-                      XPU_SUCCESS,
-                      platform::errors::External("XPU interpolate2d kernel "
-                                                 "return wrong value[%d %s]",
-                                                 r,
-                                                 XPUAPIErrorMsg[r]));
-  }
-};
-
-template <typename T>
-class InterpolateGradXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
-
-    auto output_grad_dims = output_grad->dims();
-
-    PADDLE_ENFORCE_EQ(output_grad_dims.size(),
-                      4,
-                      platform::errors::External(
-                          "XPU Interpolategrad kernel only support 2d"));
-
-    auto* input = ctx.Input<Tensor>("X");
-    const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
-    const DataLayout data_layout =
-        framework::StringToDataLayout(data_layout_str);
-    int n, c, in_d, in_h, in_w;
-    ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w);
-
-    auto interp_method = ctx.Attr<std::string>("interp_method");
-    bool align_corners = ctx.Attr<bool>("align_corners");
-    int align_mode = ctx.Attr<int>("align_mode");
-
-    int out_h = ctx.Attr<int>("out_h");
-    int out_w = ctx.Attr<int>("out_w");
-    float scale;
-    auto scale_tensor = ctx.Input<Tensor>("Scale");
-    if (scale_tensor != nullptr) {
-      auto scale_data = get_new_data_from_tensor_xpu<float>(scale_tensor);
-      scale = scale_data[0];
-    } else {
-      scale = ctx.Attr<float>("scale");
-    }
-    if (scale > 0) {
-      out_h = static_cast<int>(in_h * scale);
-      out_w = static_cast<int>(in_w * scale);
-    }
-    auto out_size = ctx.Input<Tensor>("OutSize");
-    if (out_size != nullptr) {
-      auto out_size_data = get_new_data_from_tensor_xpu<int>(out_size);
-      out_h = out_size_data[0];
-      out_w = out_size_data[1];
-    }
-    auto list_new_size_tensor = ctx.MultiInput<framework::Tensor>("SizeTensor");
-    if (list_new_size_tensor.size() > 0) {
-      // have size tensor
-      auto new_size = get_new_shape_xpu(list_new_size_tensor);
-      out_h = new_size[0];
-      out_w = new_size[1];
-    }
-
-    framework::DDim dim_grad;
-    if (data_layout == DataLayout::kNCHW) {
-      dim_grad = {n, c, in_h, in_w};
-    } else {
-      dim_grad = {n, in_h, in_w, c};
-    }
-    input_grad->mutable_data<T>(dim_grad, ctx.GetPlace());
-
-    auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
-
-    int r = XPU_SUCCESS;
-    r = xpu::constant<T>(dev_ctx.x_context(),
-                         input_grad->data<T>(),
-                         input_grad->numel(),
-                         static_cast<T>(0.0));
-    PADDLE_ENFORCE_EQ(r,
-                      XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU constant in interpolate2d_grad kernel return "
-                          "wrong value[%d %s]",
-                          r,
-                          XPUAPIErrorMsg[r]));
-
-    if (in_h == out_h && in_w == out_w) {
-      framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
-      return;
-    }
-
-    bool nearest = "nearest" == interp_method;
-    int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
-
-    if (nearest) {
-      trans_mode = (align_corners) ? (0) : (2);
-    }
-
-    r = xpu::interpolate2d_grad<T>(dev_ctx.x_context(),
-                                   output_grad->data<T>(),
-                                   input_grad->data<T>(),
-                                   n,
-                                   c,
-                                   in_h,
-                                   in_w,
-                                   out_h,
-                                   out_w,
-                                   nearest,
-                                   trans_mode,
-                                   (data_layout == DataLayout::kNCHW));
-    PADDLE_ENFORCE_EQ(
-        r,
-        XPU_SUCCESS,
-        platform::errors::External("XPU interpolate2d_grad kernel return "
-                                   "wrong value[%d %s]",
-                                   r,
-                                   XPUAPIErrorMsg[r]));
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_XPU_KERNEL(bilinear_interp, ops::InterpolateXPUKernel<float>);
-REGISTER_OP_XPU_KERNEL(nearest_interp, ops::InterpolateXPUKernel<float>);
-
-REGISTER_OP_XPU_KERNEL(bilinear_interp_grad,
-                       ops::InterpolateGradXPUKernel<float>);
-REGISTER_OP_XPU_KERNEL(nearest_interp_grad,
-                       ops::InterpolateGradXPUKernel<float>);
-#endif
diff --git a/paddle/fluid/operators/interpolate_v2_op_xpu.cc b/paddle/fluid/operators/interpolate_v2_op_xpu.cc
deleted file mode 100644
index b2fc5ca48c2..00000000000
--- a/paddle/fluid/operators/interpolate_v2_op_xpu.cc
+++ /dev/null
@@ -1,324 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-   http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/kernels/funcs/interpolate_function.h"
-#ifdef PADDLE_WITH_XPU
-
-namespace paddle {
-namespace operators {
-
-using framework::Tensor;
-using DataLayout = framework::DataLayout;
-
-inline std::vector<int> get_new_shape_xpu(
-    const std::vector<const Tensor*>& list_new_shape_tensor) {
-  // get tensor from
-  std::vector<int> vec_new_shape;
-  for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
-    auto tensor = list_new_shape_tensor[i];
-    PADDLE_ENFORCE_EQ(
-        tensor->dims(),
-        phi::make_ddim({1}),
-        platform::errors::InvalidArgument("shape of dim tensor should be [1]"));
-    framework::Tensor temp;
-    paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
-    vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
-  }
-
-  return vec_new_shape;
-}
-
-template <typename T>
-class InterpolateV2XPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<Tensor>("X");
-    auto* output = ctx.Output<Tensor>("Out");
-
-    auto input_dims = input->dims();
-    PADDLE_ENFORCE_EQ(
-        input_dims.size(),
-        4,
-        platform::errors::External("XPU Interpolate kernel only support 2d"));
-
-    const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
-    const DataLayout data_layout =
-        framework::StringToDataLayout(data_layout_str);
-    int n, c, in_d, in_h, in_w;
-    phi::funcs::ExtractNCDWH(
-        input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w);
-
-    auto interp_method = ctx.Attr<std::string>("interp_method");
-    bool align_corners = ctx.Attr<bool>("align_corners");
-    int align_mode = ctx.Attr<int>("align_mode");
-
-    int out_h = ctx.Attr<int>("out_h");
-    int out_w = ctx.Attr<int>("out_w");
-    float scale_h = -1;
-    float scale_w = -1;
-
-    auto list_new_size_tensor = ctx.MultiInput<framework::Tensor>("SizeTensor");
-    if (list_new_size_tensor.size() > 0) {
-      // have size tensor
-      auto new_size = get_new_shape_xpu(list_new_size_tensor);
-      out_h = new_size[0];
-      out_w = new_size[1];
-    } else {
-      auto scale_tensor = ctx.Input<Tensor>("Scale");
-      auto scale = ctx.Attr<std::vector<float>>("scale");
-      if (scale_tensor != nullptr) {
-        auto scale_data =
-            phi::funcs::get_new_data_from_tensor<float>(scale_tensor);
-        if (scale_data.size() > 1) {
-          scale_h = scale_data[0];
-          scale_w = scale_data[1];
-        } else {
-          scale_h = scale_data[0];
-          scale_w = scale_data[0];
-        }
-        PADDLE_ENFORCE_EQ(
-            scale_w > 0 && scale_h > 0,
-            true,
-            platform::errors::InvalidArgument("scale  of Op(interpolate) "
-                                              "should be greater than 0."));
-      } else {
-        if (scale.size() > 1) {
-          scale_h = scale[0];
-          scale_w = scale[1];
-
-          PADDLE_ENFORCE_EQ(
-              scale_w > 0 && scale_h > 0,
-              true,
-              platform::errors::InvalidArgument("scale  of Op(interpolate) "
-                                                "should be greater than 0."));
-        }
-      }
-      if (scale_h > 0. && scale_w > 0.) {
-        out_h = static_cast<int>(in_h * scale_h);
-        out_w = static_cast<int>(in_w * scale_w);
-      }
-      auto out_size = ctx.Input<Tensor>("OutSize");
-      if (out_size != nullptr) {
-        auto out_size_data =
-            phi::funcs::get_new_data_from_tensor<int>(out_size);
-        out_h = out_size_data[0];
-        out_w = out_size_data[1];
-      }
-    }
-    PADDLE_ENFORCE_GT(
-        out_h,
-        0,
-        platform::errors::InvalidArgument("out_h in Attr(out_shape) of "
-                                          "Op(interpolate) "
-                                          "should be greater than 0."));
-    PADDLE_ENFORCE_GT(
-        out_w,
-        0,
-        platform::errors::InvalidArgument("out_w in Attr(out_shape) of "
-                                          "Op(interpolate) "
-                                          "should be greater than 0."));
-    framework::DDim dim_out;
-    if (data_layout == DataLayout::kNCHW) {
-      dim_out = {n, c, out_h, out_w};
-    } else {
-      dim_out = {n, out_h, out_w, c};
-    }
-    output->mutable_data<T>(dim_out, ctx.GetPlace());
-
-    if (in_h == out_h && in_w == out_w) {
-      framework::TensorCopy(*input, ctx.GetPlace(), output);
-      return;
-    }
-    bool nearest = "nearest" == interp_method;
-    int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
-    auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
-    if (nearest) {
-      PADDLE_ENFORCE_EQ((data_layout == DataLayout::kNCHW),
-                        true,
-                        platform::errors::InvalidArgument(
-                            "XPU nearest is only support NCHW"));
-    }
-    int r = xpu::interpolate2d<T>(dev_ctx.x_context(),
-                                  input->data<T>(),
-                                  output->data<T>(),
-                                  n,
-                                  c,
-                                  in_h,
-                                  in_w,
-                                  out_h,
-                                  out_w,
-                                  nearest,
-                                  trans_mode,
-                                  (data_layout == DataLayout::kNCHW));
-    PADDLE_ENFORCE_EQ(r,
-                      XPU_SUCCESS,
-                      platform::errors::External("XPU interpolate2d kernel "
-                                                 "return wrong value[%d %s]",
-                                                 r,
-                                                 XPUAPIErrorMsg[r]));
-  }
-};
-
-template <typename T>
-class InterpolateV2GradXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
-
-    auto output_grad_dims = output_grad->dims();
-
-    PADDLE_ENFORCE_EQ(output_grad_dims.size(),
-                      4,
-                      platform::errors::External(
-                          "XPU Interpolategrad kernel only support 2d"));
-
-    auto* input = ctx.Input<Tensor>("X");
-    const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
-    const DataLayout data_layout =
-        framework::StringToDataLayout(data_layout_str);
-    int n, c, in_d, in_h, in_w;
-    phi::funcs::ExtractNCDWH(
-        input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w);
-
-    auto interp_method = ctx.Attr<std::string>("interp_method");
-    bool align_corners = ctx.Attr<bool>("align_corners");
-    int align_mode = ctx.Attr<int>("align_mode");
-
-    int out_h = ctx.Attr<int>("out_h");
-    int out_w = ctx.Attr<int>("out_w");
-    float scale_h = -1;
-    float scale_w = -1;
-
-    auto list_new_size_tensor = ctx.MultiInput<framework::Tensor>("SizeTensor");
-    if (list_new_size_tensor.size() > 0) {
-      // have size tensor
-      auto new_size = get_new_shape_xpu(list_new_size_tensor);
-      out_h = new_size[0];
-      out_w = new_size[1];
-    } else {
-      auto scale_tensor = ctx.Input<Tensor>("Scale");
-      auto scale = ctx.Attr<std::vector<float>>("scale");
-      if (scale_tensor != nullptr) {
-        auto scale_data =
-            phi::funcs::get_new_data_from_tensor<float>(scale_tensor);
-        if (scale_data.size() > 1) {
-          scale_h = scale_data[0];
-          scale_w = scale_data[1];
-        } else {
-          scale_h = scale_data[0];
-          scale_w = scale_data[0];
-        }
-        PADDLE_ENFORCE_EQ(
-            scale_w > 0 && scale_h > 0,
-            true,
-            platform::errors::InvalidArgument("scale  of Op(interpolate) "
-                                              "should be greater than 0."));
-      } else {
-        if (scale.size() > 1) {
-          scale_h = scale[0];
-          scale_w = scale[1];
-
-          PADDLE_ENFORCE_EQ(
-              scale_w > 0 && scale_h > 0,
-              true,
-              platform::errors::InvalidArgument("scale  of Op(interpolate) "
-                                                "should be greater than 0."));
-        }
-      }
-      if (scale_h > 0. && scale_w > 0.) {
-        out_h = static_cast<int>(in_h * scale_h);
-        out_w = static_cast<int>(in_w * scale_w);
-      }
-      auto out_size = ctx.Input<Tensor>("OutSize");
-      if (out_size != nullptr) {
-        auto out_size_data =
-            phi::funcs::get_new_data_from_tensor<int>(out_size);
-        out_h = out_size_data[0];
-        out_w = out_size_data[1];
-      }
-    }
-
-    framework::DDim dim_grad;
-    if (data_layout == DataLayout::kNCHW) {
-      dim_grad = {n, c, in_h, in_w};
-    } else {
-      dim_grad = {n, in_h, in_w, c};
-    }
-    input_grad->mutable_data<T>(dim_grad, ctx.GetPlace());
-
-    auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
-
-    int r = XPU_SUCCESS;
-    r = xpu::constant<T>(dev_ctx.x_context(),
-                         input_grad->data<T>(),
-                         input_grad->numel(),
-                         static_cast<T>(0.0));
-    PADDLE_ENFORCE_EQ(r,
-                      XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU constant in interpolate2d_grad kernel return "
-                          "wrong value[%d %s]",
-                          r,
-                          XPUAPIErrorMsg[r]));
-
-    if (in_h == out_h && in_w == out_w) {
-      framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
-      return;
-    }
-
-    bool nearest = "nearest" == interp_method;
-    int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
-
-    if (nearest) {
-      trans_mode = (align_corners) ? (0) : (2);
-    }
-
-    r = xpu::interpolate2d_grad<T>(dev_ctx.x_context(),
-                                   output_grad->data<T>(),
-                                   input_grad->data<T>(),
-                                   n,
-                                   c,
-                                   in_h,
-                                   in_w,
-                                   out_h,
-                                   out_w,
-                                   nearest,
-                                   trans_mode,
-                                   (data_layout == DataLayout::kNCHW));
-    PADDLE_ENFORCE_EQ(
-        r,
-        XPU_SUCCESS,
-        platform::errors::External("XPU interpolate2d_grad kernel return "
-                                   "wrong value[%d %s]",
-                                   r,
-                                   XPUAPIErrorMsg[r]));
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_XPU_KERNEL(bilinear_interp_v2, ops::InterpolateV2XPUKernel<float>);
-REGISTER_OP_XPU_KERNEL(nearest_interp_v2, ops::InterpolateV2XPUKernel<float>);
-
-REGISTER_OP_XPU_KERNEL(bilinear_interp_v2_grad,
-                       ops::InterpolateV2GradXPUKernel<float>);
-REGISTER_OP_XPU_KERNEL(nearest_interp_v2_grad,
-                       ops::InterpolateV2GradXPUKernel<float>);
-#endif
diff --git a/paddle/phi/kernels/funcs/interpolate_function.h b/paddle/phi/kernels/funcs/interpolate_function.h
index ecc2d0f893c..42adc94a642 100644
--- a/paddle/phi/kernels/funcs/interpolate_function.h
+++ b/paddle/phi/kernels/funcs/interpolate_function.h
@@ -91,6 +91,14 @@ inline std::vector<int> get_new_shape(
         errors::InvalidArgument("The shape of dimension tensor should be [1],"
                                 "but received d%.",
                                 tensor->dims()));
+#ifdef PADDLE_WITH_XPU
+    if (tensor->place().GetType() == phi::AllocationType::XPU) {
+      DenseTensor temp;
+      paddle::framework::TensorCopySync(*tensor, phi::CPUPlace(), &temp);
+      vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
+      continue;
+    }
+#endif
     if (paddle::platform::is_gpu_place(tensor->place())) {
       DenseTensor temp;
       paddle::framework::TensorCopySync(
diff --git a/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc b/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc
new file mode 100644
index 00000000000..33f0e3e948f
--- /dev/null
+++ b/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc
@@ -0,0 +1,236 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/interpolate_grad_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/common/layout.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/interpolate_function.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void InterpolateGradKernel(
+    const Context& dev_ctx,
+    const DenseTensor& x,
+    const paddle::optional<DenseTensor>& out_size,
+    const paddle::optional<std::vector<const DenseTensor*>>& size_tensor,
+    const paddle::optional<DenseTensor>& scale_tensor,
+    const DenseTensor& output_grad,
+    const std::string& data_layout_str,
+    int out_d,
+    int out_h,
+    int out_w,
+    const std::vector<float>& scale,
+    const std::string& interp_method,
+    bool align_corners,
+    int align_mode,
+    DenseTensor* x_grad) {
+  const DataLayout data_layout =
+      paddle::framework::StringToDataLayout(data_layout_str);
+  int n, c, in_d, in_h, in_w;
+  funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w);
+
+  float scale_h = -1;
+  float scale_w = -1;
+  if (scale_tensor) {
+    auto scale_data =
+        funcs::get_new_data_from_tensor<float>(scale_tensor.get_ptr());
+    if (scale_data.size() > 1) {
+      scale_h = scale_data[0];
+      scale_w = scale_data[1];
+    } else {
+      scale_w = scale_data[0];
+      scale_h = scale_data[0];
+    }
+    PADDLE_ENFORCE_EQ(
+        scale_w > 0,
+        true,
+        errors::InvalidArgument(
+            "The scale_w in input 'Scale' Tensor of Operator(interpolate) "
+            "should be greater than 0, but received value is %d.",
+            scale_w));
+    PADDLE_ENFORCE_EQ(
+        scale_h > 0,
+        true,
+        errors::InvalidArgument(
+            "The scale_h in input 'Scale' Tensor of Operator(interpolate) "
+            "should be greater than 0, but received value is %d.",
+            scale_h));
+  } else {
+    if (scale.size() > 1) {
+      scale_h = scale[0];
+      scale_w = scale[1];
+      PADDLE_ENFORCE_EQ(
+          scale_w > 0,
+          true,
+          errors::InvalidArgument(
+              "The scale_w in Attr(scale) of Operator(interpolate) "
+              "should be greater than 0, but received value is %d.",
+              scale_w));
+      PADDLE_ENFORCE_EQ(
+          scale_h > 0,
+          true,
+          errors::InvalidArgument(
+              "The scale_h in Attr(scale) of Operator(interpolate) "
+              "should be greater than 0, but received value is %d.",
+              scale_h));
+    }
+  }
+  if (scale_h > 0. && scale_w > 0.) {
+    out_h = static_cast<int>(in_h * scale_h);
+    out_w = static_cast<int>(in_w * scale_w);
+  }
+  if (out_size) {
+    auto out_size_data =
+        funcs::get_new_data_from_tensor<int>(out_size.get_ptr());
+    out_h = out_size_data[0];
+    out_w = out_size_data[1];
+  }
+  if (size_tensor && size_tensor->size() > 0) {
+    // have size tensor
+    auto new_size = funcs::get_new_shape(size_tensor.get());
+    out_h = new_size[0];
+    out_w = new_size[1];
+  }
+
+  phi::DDim dim_grad;
+  if (data_layout == DataLayout::kNCHW) {
+    dim_grad = {n, c, in_h, in_w};
+  } else {
+    dim_grad = {n, in_h, in_w, c};
+  }
+
+  x_grad->Resize(dim_grad);
+  dev_ctx.template Alloc<T>(x_grad);
+
+  int r = XPU_SUCCESS;
+  r = xpu::constant<T>(dev_ctx.x_context(),
+                       x_grad->data<T>(),
+                       x_grad->numel(),
+                       static_cast<T>(0.0));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
+
+  if (in_h == out_h && in_w == out_w) {
+    phi::Copy<Context>(dev_ctx, output_grad, dev_ctx.GetPlace(), false, x_grad);
+    return;
+  }
+
+  bool nearest = "nearest" == interp_method;
+  int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
+
+  if (nearest) {
+    trans_mode = (align_corners) ? (0) : (2);
+  }
+
+  r = xpu::interpolate2d_grad<T>(dev_ctx.x_context(),
+                                 output_grad.data<T>(),
+                                 x_grad->data<T>(),
+                                 n,
+                                 c,
+                                 in_h,
+                                 in_w,
+                                 out_h,
+                                 out_w,
+                                 nearest,
+                                 trans_mode,
+                                 (data_layout == DataLayout::kNCHW));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "interpolate2d_grad");
+}
+
+template <typename T, typename Context>
+void BilinearInterpGradKernel(
+    const Context& dev_ctx,
+    const DenseTensor& x,
+    const paddle::optional<DenseTensor>& out_size,
+    const paddle::optional<std::vector<const DenseTensor*>>& size_tensor,
+    const paddle::optional<DenseTensor>& scale_tensor,
+    const DenseTensor& out_grad,
+    const std::string& data_layout,
+    int out_d,
+    int out_h,
+    int out_w,
+    const std::vector<float>& scale,
+    const std::string& interp_method,
+    bool align_corners,
+    int align_mode,
+    DenseTensor* x_grad) {
+  InterpolateGradKernel<T, Context>(dev_ctx,
+                                    x,
+                                    out_size,
+                                    size_tensor,
+                                    scale_tensor,
+                                    out_grad,
+                                    data_layout,
+                                    out_d,
+                                    out_h,
+                                    out_w,
+                                    scale,
+                                    interp_method,
+                                    align_corners,
+                                    align_mode,
+                                    x_grad);
+}
+
+template <typename T, typename Context>
+void NearestInterpGradKernel(
+    const Context& dev_ctx,
+    const DenseTensor& x,
+    const paddle::optional<DenseTensor>& out_size,
+    const paddle::optional<std::vector<const DenseTensor*>>& size_tensor,
+    const paddle::optional<DenseTensor>& scale_tensor,
+    const DenseTensor& out_grad,
+    const std::string& data_layout,
+    int out_d,
+    int out_h,
+    int out_w,
+    const std::vector<float>& scale,
+    const std::string& interp_method,
+    bool align_corners,
+    int align_mode,
+    DenseTensor* x_grad) {
+  InterpolateGradKernel<T, Context>(dev_ctx,
+                                    x,
+                                    out_size,
+                                    size_tensor,
+                                    scale_tensor,
+                                    out_grad,
+                                    data_layout,
+                                    out_d,
+                                    out_h,
+                                    out_w,
+                                    scale,
+                                    interp_method,
+                                    align_corners,
+                                    align_mode,
+                                    x_grad);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(bilinear_interp_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::BilinearInterpGradKernel,
+                   float) {
+  kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
+  kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
+}
+PD_REGISTER_KERNEL(
+    nearest_interp_grad, XPU, ALL_LAYOUT, phi::NearestInterpGradKernel, float) {
+  kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
+  kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
+}
diff --git a/paddle/phi/kernels/xpu/interpolate_kernel.cc b/paddle/phi/kernels/xpu/interpolate_kernel.cc
new file mode 100644
index 00000000000..e84ede46158
--- /dev/null
+++ b/paddle/phi/kernels/xpu/interpolate_kernel.cc
@@ -0,0 +1,234 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/interpolate_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/common/layout.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/interpolate_function.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void InterpolateKernel(
+    const Context& ctx,
+    const DenseTensor& x,
+    const paddle::optional<DenseTensor>& out_size,
+    const paddle::optional<std::vector<const DenseTensor*>>& size_tensor,
+    const paddle::optional<DenseTensor>& scale_tensor,
+    const std::string& data_layout_str,
+    int out_d,
+    int out_h,
+    int out_w,
+    const std::vector<float>& scale,
+    const std::string& interp_method,
+    bool align_corners,
+    int align_mode,
+    DenseTensor* output) {
+  const DataLayout data_layout =
+      paddle::framework::StringToDataLayout(data_layout_str);
+  int n, c, in_d, in_h, in_w;
+  phi::funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w);
+
+  float scale_h = -1;
+  float scale_w = -1;
+
+  if (size_tensor && size_tensor->size() > 0) {
+    // have size tensor
+    auto new_size = funcs::get_new_shape(size_tensor.get());
+    out_h = new_size[0];
+    out_w = new_size[1];
+  } else {
+    if (scale_tensor) {
+      auto scale_data =
+          funcs::get_new_data_from_tensor<float>(scale_tensor.get_ptr());
+      if (scale_data.size() > 1) {
+        scale_h = scale_data[0];
+        scale_w = scale_data[1];
+      } else {
+        scale_h = scale_data[0];
+        scale_w = scale_data[0];
+      }
+      PADDLE_ENFORCE_EQ(
+          scale_w > 0,
+          true,
+          errors::InvalidArgument(
+              "The scale_w in input 'Scale' Tensor of Operator(interpolate) "
+              "should be greater than 0, but received value is %d.",
+              scale_w));
+      PADDLE_ENFORCE_EQ(
+          scale_h > 0,
+          true,
+          errors::InvalidArgument(
+              "The scale_h in input 'Scale' Tensor of Operator(interpolate) "
+              "should be greater than 0, but received value is %d.",
+              scale_h));
+    } else {
+      if (scale.size() > 1) {
+        scale_h = scale[0];
+        scale_w = scale[1];
+
+        PADDLE_ENFORCE_EQ(
+            scale_w > 0,
+            true,
+            errors::InvalidArgument(
+                "The scale_w in Attr(scale) of Operator(interpolate) "
+                "should be greater than 0, but received value is %d.",
+                scale_w));
+        PADDLE_ENFORCE_EQ(
+            scale_h > 0,
+            true,
+            errors::InvalidArgument(
+                "The scale_h in Attr(scale) of Operator(interpolate) "
+                "should be greater than 0, but received value is %d.",
+                scale_h));
+      }
+    }
+    if (scale_h > 0. && scale_w > 0.) {
+      out_h = static_cast<int>(in_h * scale_h);
+      out_w = static_cast<int>(in_w * scale_w);
+    }
+    if (out_size) {
+      auto out_size_data =
+          funcs::get_new_data_from_tensor<int>(out_size.get_ptr());
+      out_h = out_size_data[0];
+      out_w = out_size_data[1];
+    }
+  }
+  PADDLE_ENFORCE_GT(
+      out_h,
+      0,
+      errors::InvalidArgument("out_h in Attr(out_shape) of Op(interpolate) "
+                              "should be greater than 0."));
+  PADDLE_ENFORCE_GT(
+      out_w,
+      0,
+      errors::InvalidArgument("out_w in Attr(out_shape) of Op(interpolate) "
+                              "should be greater than 0."));
+
+  phi::DDim dim_out;
+  if (data_layout == DataLayout::kNCHW) {
+    dim_out = {n, c, out_h, out_w};
+  } else {
+    dim_out = {n, out_h, out_w, c};
+  }
+  output->Resize(dim_out);
+  ctx.template Alloc<T>(output);
+
+  if (in_h == out_h && in_w == out_w) {
+    phi::Copy<Context>(ctx, x, ctx.GetPlace(), false, output);
+    return;
+  }
+  bool nearest = "nearest" == interp_method;
+  int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
+  if (nearest) {
+    PADDLE_ENFORCE_EQ(
+        (data_layout == DataLayout::kNCHW),
+        true,
+        errors::InvalidArgument("XPU nearest is only support NCHW"));
+  }
+
+  int r = xpu::interpolate2d<T>(ctx.x_context(),
+                                x.data<T>(),
+                                output->data<T>(),
+                                n,
+                                c,
+                                in_h,
+                                in_w,
+                                out_h,
+                                out_w,
+                                nearest,
+                                trans_mode,
+                                (data_layout == DataLayout::kNCHW));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "interpolate2d");
+}
+
+template <typename T, typename Context>
+void BilinearInterpKernel(
+    const Context& ctx,
+    const DenseTensor& x,
+    const paddle::optional<DenseTensor>& out_size,
+    const paddle::optional<std::vector<const DenseTensor*>>& size_tensor,
+    const paddle::optional<DenseTensor>& scale_tensor,
+    const std::string& data_layout,
+    int out_d,
+    int out_h,
+    int out_w,
+    const std::vector<float>& scale,
+    const std::string& interp_method,
+    bool align_corners,
+    int align_mode,
+    DenseTensor* output) {
+  InterpolateKernel<T, Context>(ctx,
+                                x,
+                                out_size,
+                                size_tensor,
+                                scale_tensor,
+                                data_layout,
+                                out_d,
+                                out_h,
+                                out_w,
+                                scale,
+                                interp_method,
+                                align_corners,
+                                align_mode,
+                                output);
+}
+
+template <typename T, typename Context>
+void NearestInterpKernel(
+    const Context& ctx,
+    const DenseTensor& x,
+    const paddle::optional<DenseTensor>& out_size,
+    const paddle::optional<std::vector<const DenseTensor*>>& size_tensor,
+    const paddle::optional<DenseTensor>& scale_tensor,
+    const std::string& data_layout,
+    int out_d,
+    int out_h,
+    int out_w,
+    const std::vector<float>& scale,
+    const std::string& interp_method,
+    bool align_corners,
+    int align_mode,
+    DenseTensor* output) {
+  InterpolateKernel<T, Context>(ctx,
+                                x,
+                                out_size,
+                                size_tensor,
+                                scale_tensor,
+                                data_layout,
+                                out_d,
+                                out_h,
+                                out_w,
+                                scale,
+                                interp_method,
+                                align_corners,
+                                align_mode,
+                                output);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    bilinear_interp, XPU, ALL_LAYOUT, phi::BilinearInterpKernel, float) {
+  kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
+  kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
+}
+PD_REGISTER_KERNEL(
+    nearest_interp, XPU, ALL_LAYOUT, phi::NearestInterpKernel, float) {
+  kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
+  kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
+}
-- 
GitLab