support auto-gen psroi_pool,roi_pool,roi_align (#54958)

bae4cbec · lzydev · GitHub · e6b3e283 · e6b3e283 · e6b3e283
12 changed file
--- a/paddle/fluid/operators/psroi_pool_op.cc
+++ b/paddle/fluid/operators/psroi_pool_op.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/backward.h"
-#include "paddle/phi/infermeta/multiary.h"
-
-namespace paddle {
-namespace operators {
-
-class PSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X",
-             "(Tensor), "
-             "the input of PSROIPoolOp. "
-             "The format of input tensor is NCHW. Where N is the batch size, "
-             "C is the number of input channels, "
-             "H is the height of the input feature map, and "
-             "W is the width. The data type can be float32 or float64");
-    AddInput("ROIs",
-             "(phi::DenseTensor), "
-             "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D phi::DenseTensor of shape (num_rois, 4) "
-             "given as [(x1, y1, x2, y2), ...]. "
-             "where (x1, y1) is the top left coordinates, and "
-             "(x2, y2) is the bottom right coordinates. "
-             "The roi batch index can be calculated from LoD.");
-    AddInput("RoisNum",
-             "(Tensor), "
-             "The number of RoIs in each image.")
-        .AsDispensable();
-    AddOutput("Out",
-              "(Tensor), "
-              "the output of PSROIPoolOp is a 4-D Tensor with shape "
-              "(num_rois, output_channels, pooled_h, pooled_w). "
-              "The data type is the same as `x` ");
-    AddAttr<int>(
-        "output_channels",
-        "(int), "
-        "the number of channels of the output feature map. "
-        "For a task of C classes of objects, output_channels should be "
-        "(C + 1) for classification only.");
-    AddAttr<float>("spatial_scale",
-                   "(float, default 1.0), "
-                   "Multiplicative spatial scale factor "
-                   "to translate ROI coords from their input scale "
-                   "to the scale used when pooling.")
-        .SetDefault(1.0);
-    AddAttr<int>("pooled_height",
-                 "(int, default 1), "
-                 "the pooled output height.")
-        .SetDefault(1);
-    AddAttr<int>("pooled_width",
-                 "(int, default 1), "
-                 "the pooled output width.")
-        .SetDefault(1);
-    AddComment(R"Doc(
-Position sensitive region of interest pooling (also known as PSROIPooling) is to perform
-position-sensitive average pooling on regions of interest specified by input, takes as
-input N position-sensitive score maps and a list of num_rois regions of interest.
-
-PSROIPooling for R-FCN. Please refer to https://arxiv.org/abs/1605.06409 for more details.
-    )Doc");
-  }
-};
-
-class PSROIPoolOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-                          ctx.GetPlace());
-  }
-};
-
-class PSROIPoolGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-                          ctx.GetPlace());
-  }
-};
-
-template <typename T>
-class PSROIPoolGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("psroi_pool_grad");
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("ROIs", this->Input("ROIs"));
-    op->SetInput("RoisNum", this->Input("RoisNum"));
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-DECLARE_INFER_SHAPE_FUNCTOR(psroi_pool,
-                            PsroiPoolInferShapeFunctor,
-                            PD_INFER_META(phi::PsroiPoolInferMeta));
-DECLARE_INFER_SHAPE_FUNCTOR(psroi_pool_grad,
-                            PsroiPoolGradInferShapeFunctor,
-                            PD_INFER_META(phi::PsroiPoolGradInferMeta));
-REGISTER_OPERATOR(psroi_pool,
-                  ops::PSROIPoolOp,
-                  ops::PSROIPoolOpMaker,
-                  ops::PSROIPoolGradMaker<paddle::framework::OpDesc>,
-                  ops::PSROIPoolGradMaker<paddle::imperative::OpBase>,
-                  PsroiPoolInferShapeFunctor);
-REGISTER_OPERATOR(psroi_pool_grad,
-                  ops::PSROIPoolGradOp,
-                  PsroiPoolGradInferShapeFunctor);
--- a/paddle/fluid/operators/roi_align_op.cc
+++ b/paddle/fluid/operators/roi_align_op.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/ternary.h"
-
-namespace paddle {
-namespace operators {
-
-class ROIAlignOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-                          ctx.GetPlace());
-  }
-};
-
-class ROIAlignGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput(framework::GradVarName("Out")),
-        true,
-        platform::errors::NotFound("The GRAD@Out of ROIAlignGradOp "
-                                   "is not found."));
-    PADDLE_ENFORCE_EQ(ctx->HasOutputs(framework::GradVarName("X")),
-                      true,
-                      platform::errors::NotFound("The GRAD@X of ROIAlignGradOp "
-                                                 "is not found."));
-    ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "ROIs"),
-                          ctx.GetPlace());
-  }
-};
-
-class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X",
-             "(Tensor), "
-             "The input of ROIAlignOp. The data type is float32 or float64."
-             "The format of input tensor is NCHW. Where N is batch size, "
-             "C is the number of input channels, "
-             "H is the height of the feature, and "
-             "W is the width of the feature.");
-    AddInput("ROIs",
-             "(phi::DenseTensor), "
-             "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D phi::DenseTensor of shape (num_rois, 4)"
-             "given as [[x1, y1, x2, y2], ...]. "
-             "(x1, y1) is the top left coordinates, and "
-             "(x2, y2) is the bottom right coordinates.");
-    AddInput("RoisNum",
-             "(Tensor), "
-             "The number of RoIs in each image.")
-        .AsDispensable();
-    AddOutput("Out",
-              "(Tensor), "
-              "The output of ROIAlignOp is a 4-D tensor with shape "
-              "(num_rois, channels, pooled_h, pooled_w). The data type is "
-              "float32 or float64.");
-    AddAttr<float>("spatial_scale",
-                   "(float, default 1.0), "
-                   "Multiplicative spatial scale factor "
-                   "to translate ROI coords from their input scale "
-                   "to the scale used when pooling.")
-        .SetDefault(1.0);
-    AddAttr<int>("pooled_height",
-                 "(int, default 1), "
-                 "The pooled output height.")
-        .SetDefault(1);
-    AddAttr<int>("pooled_width",
-                 "(int, default 1), "
-                 "The pooled output width.")
-        .SetDefault(1);
-    AddAttr<int>("sampling_ratio",
-                 "(int,default -1),"
-                 "number of sampling points in the interpolation grid"
-                 "If <=0, then grid points are adaptive to roi_width "
-                 "and pooled_w, likewise for height")
-        .SetDefault(-1);
-    AddAttr<bool>("aligned",
-                  "(bool, default False),"
-                  "If true, pixel shift it by -0.5 for align more perfectly")
-        .SetDefault(false);
-    AddComment(R"DOC(
-**RoIAlign Operator**
-
-Region of interest align (also known as RoI align) is to perform
-bilinear interpolation on inputs of nonuniform sizes to obtain
-fixed-size feature maps (e.g. 7*7)
-
-Dividing each region proposal into equal-sized sections with
-the pooled_width and pooled_height. Location remains the origin
-result.
-
-In each ROI bin, the value of the four regularly sampled locations
-are computed directly through bilinear interpolation. The output is
-the mean of four locations.
-Thus avoid the misaligned problem.
-    )DOC");
-  }
-};
-
-template <typename T>
-class ROIAlignGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("roi_align_grad");
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("ROIs", this->Input("ROIs"));
-    op->SetInput("RoisNum", this->Input("RoisNum"));
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-DECLARE_NO_NEED_BUFFER_VARS_INFERER(RoiAlignGradNoNeedBufVarsInferer, "X");
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-DECLARE_INFER_SHAPE_FUNCTOR(roi_align,
-                            RoiAlignInferShapeFunctor,
-                            PD_INFER_META(phi::RoiAlignInferMeta));
-
-REGISTER_OPERATOR(roi_align,
-                  ops::ROIAlignOp,
-                  ops::ROIAlignOpMaker,
-                  ops::ROIAlignGradMaker<paddle::framework::OpDesc>,
-                  ops::ROIAlignGradMaker<paddle::imperative::OpBase>,
-                  RoiAlignInferShapeFunctor);
-REGISTER_OPERATOR(roi_align_grad,
-                  ops::ROIAlignGradOp,
-                  ops::RoiAlignGradNoNeedBufVarsInferer);
-
-REGISTER_OP_VERSION(roi_align)
-    .AddCheckpoint(
-        R"ROC(
-              Incompatible upgrade of input [RpnRoisLod])ROC",
-        paddle::framework::compatible::OpVersionDesc().DeleteInput(
-            "RpnRoisLod",
-            "Delete RpnRoisLod due to incorrect input name and "
-            "it is not used in object detection models yet."))
-    .AddCheckpoint(
-        R"ROC(
-             Upgrade roi_align add a new input [RoisNum])ROC",
-        paddle::framework::compatible::OpVersionDesc().NewInput(
-            "RoisNum",
-            "The number of RoIs in each image. RoisNum is dispensable."))
-    .AddCheckpoint(
-        R"ROC(
-             Upgrade roi_align add a new input [aligned])ROC",
-        paddle::framework::compatible::OpVersionDesc().NewAttr(
-            "aligned",
-            "If true, pixel shift it by -0.5 for align more perfectly.",
-            false));
--- a/paddle/fluid/operators/roi_pool_op.cc
+++ b/paddle/fluid/operators/roi_pool_op.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/ternary.h"
-
-namespace paddle {
-namespace operators {
-
-class ROIPoolOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-                          ctx.GetPlace());
-  }
-};
-
-class ROIPoolGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")),
-                   "Input",
-                   framework::GradVarName("Out"),
-                   "roi_pool");
-    OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output",
-                   framework::GradVarName("X"),
-                   "roi_pool");
-    ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-                          ctx.GetPlace());
-  }
-};
-
-class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X",
-             "(Tensor), "
-             "the input of ROIPoolOp. "
-             "The format of input tensor is NCHW. Where N is batch size, "
-             "C is the number of input channels, "
-             "H is the height of the feature, and "
-             "W is the width of the feature.");
-    AddInput("ROIs",
-             "(phi::DenseTensor), "
-             "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D phi::DenseTensor of shape (num_rois, 4)"
-             "given as [[x1, y1, x2, y2], ...]. "
-             "Where batch_id is the id of the data, "
-             "(x1, y1) is the top left coordinates, and "
-             "(x2, y2) is the bottom right coordinates.");
-    AddInput("RoisNum", "(Tensor), The number of RoIs in each image.")
-        .AsDispensable();
-    AddOutput("Out",
-              "(Tensor), "
-              "The output of ROIPoolOp is a 4-D tensor with shape "
-              "(num_rois, channels, pooled_h, pooled_w).");
-    AddOutput("Argmax",
-              "(Tensor), "
-              "Argmaxes corresponding to indices in X used "
-              "for gradient computation. Only output "
-              "if arg \"is_test\" is false.")
-        .AsIntermediate();
-    AddAttr<float>("spatial_scale",
-                   "(float, default 1.0), "
-                   "Multiplicative spatial scale factor "
-                   "to translate ROI coords from their input scale "
-                   "to the scale used when pooling.")
-        .SetDefault(1.0);
-    AddAttr<int>("pooled_height",
-                 "(int, default 1), "
-                 "The pooled output height.")
-        .SetDefault(1);
-    AddAttr<int>("pooled_width",
-                 "(int, default 1), "
-                 "The pooled output width.")
-        .SetDefault(1);
-    AddComment(R"DOC(
-**ROIPool Operator**
-
-Region of interest pooling (also known as RoI pooling) is to perform
-is to perform max pooling on inputs of nonuniform sizes to obtain
-fixed-size feature maps (e.g. 7*7).
-
-The operator has three steps:
-
-1. Dividing each region proposal into equal-sized sections with
-   the pooled_width and pooled_height
-
-2. Finding the largest value in each section
-
-3. Copying these max values to the output buffer
-
-ROI Pooling for Faster-RCNN. The link below is a further introduction:
-https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
-    )DOC");
-  }
-};
-
-template <typename T>
-class ROIPoolGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("roi_pool_grad");
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("ROIs", this->Input("ROIs"));
-    op->SetInput("RoisNum", this->Input("RoisNum"));
-    op->SetInput("Argmax", this->Output("Argmax"));
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-DECLARE_INFER_SHAPE_FUNCTOR(roi_pool,
-                            RoiPoolInferShapeFunctor,
-                            PD_INFER_META(phi::RoiPoolInferMeta));
-
-REGISTER_OPERATOR(roi_pool,
-                  ops::ROIPoolOp,
-                  ops::ROIPoolOpMaker,
-                  ops::ROIPoolGradMaker<paddle::framework::OpDesc>,
-                  ops::ROIPoolGradMaker<paddle::imperative::OpBase>,
-                  RoiPoolInferShapeFunctor);
-REGISTER_OPERATOR(roi_pool_grad, ops::ROIPoolGradOp);
-
-REGISTER_OP_VERSION(roi_pool)
-    .AddCheckpoint(
-        R"ROC(
-              Incompatible upgrade of input [RpnRoisLod])ROC",
-        paddle::framework::compatible::OpVersionDesc().DeleteInput(
-            "RpnRoisLod",
-            "Delete RpnRoisLod due to incorrect input name and "
-            "it is not used in object detection models yet."))
-    .AddCheckpoint(
-        R"ROC(
-              Upgrade roi_pool add a new input [RoisNum])ROC",
-        paddle::framework::compatible::OpVersionDesc().NewInput(
-            "RoisNum",
-            "The number of RoIs in each image. RoisNum is dispensable."));
--- a/paddle/phi/api/yaml/backward.yaml
+++ b/paddle/phi/api/yaml/backward.yaml
@@ -1645,6 +1645,18 @@
    func : prelu_grad
    data_type : x

+- backward_op : psroi_pool_grad
+  forward : psroi_pool (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, int output_channels=1, float spatial_scale=1.0) -> Tensor(out)
+  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor out_grad, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : GeneralUnaryGradInferMeta
+    param : [x]
+  kernel :
+    func : psroi_pool_grad
+    data_type : x
+  optional : boxes_num
+
 - backward_op : put_along_axis_grad
  forward : put_along_axis (Tensor arr, Tensor indices, Tensor value, int axis, str reduce = "assign") -> Tensor(out)
  args : (Tensor arr, Tensor indices, Tensor out_grad, int axis, str reduce)
@@ -1726,6 +1738,31 @@
  output : Tensor(x_grad)
  invoke : reverse(out_grad, axis)

+- backward_op : roi_align_grad
+  forward : roi_align (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, float spatial_scale=1.0, int sampling_ratio=-1, bool aligned=false) -> Tensor(out)
+  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor out_grad, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : roi_align_grad
+    data_type : boxes
+  no_need_buffer : x
+  optional : boxes_num
+
+- backward_op : roi_pool_grad
+  forward : roi_pool (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, float spatial_scale=1.0) -> Tensor(out), Tensor(arg_max)
+  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor arg_max, Tensor out_grad, int pooled_height, int pooled_width, float spatial_scale)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : roi_pool_grad
+    data_type : x
+  optional : boxes_num
+
 - backward_op : roll_grad
  forward : roll(Tensor x, IntArray shifts, int64_t[] axis) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, IntArray shifts, int64_t[] axis)

--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -535,18 +535,6 @@
    func : prod_grad
  composite: prod_grad(x, out, out_grad, dims, keep_dim, reduce_all, x_grad)

- backward_op : psroi_pool_grad
-  forward : psroi_pool (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, int output_channels, float spatial_scale) -> Tensor(out)
-  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor out_grad, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : GeneralUnaryGradInferMeta
-    param : [x]
-  kernel :
-    func : psroi_pool_grad
-    data_type : x
-  optional : boxes_num
-
 - backward_op : relu6_grad
  forward : relu6 (Tensor x) -> Tensor(out)
  args : (Tensor out, Tensor out_grad)
@@ -619,31 +607,6 @@
    data_type: out_grad
  optional : sequence_length

- backward_op : roi_align_grad
-  forward : roi_align (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned) -> Tensor(out)
-  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor out_grad, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : roi_align_grad
-    data_type : boxes
-  no_need_buffer : x
-  optional : boxes_num
-
- backward_op : roi_pool_grad
-  forward : roi_pool (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale) -> Tensor(out), Tensor(arg_max)
-  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor arg_max, Tensor out_grad, int pooled_height, int pooled_width, float spatial_scale)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : roi_pool_grad
-    data_type : x
-  optional : boxes_num
-
 - backward_op : rrelu_grad
  forward : rrelu (Tensor x, float lower, float upper, bool is_test) -> Tensor(out), Tensor(noise)
  args : (Tensor x, Tensor noise, Tensor out_grad)

--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -717,17 +717,6 @@
    func : prod
  backward : prod_grad

- op : psroi_pool
-  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
-  output : Tensor
-  infer_meta :
-    func : PsroiPoolInferMeta
-  kernel :
-    func : psroi_pool
-    data_type : x
-  optional : boxes_num
-  backward : psroi_pool_grad
-
 - op : randint
  args : (int low, int high, IntArray shape, DataType dtype=DataType::INT64, Place place={})
  output : Tensor(out)
@@ -817,29 +806,6 @@
  intermediate : reserve
  view : (dropout_state_in -> dropout_state_out)

- op : roi_align
-  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned)
-  output : Tensor
-  infer_meta :
-    func : RoiAlignInferMeta
-  kernel :
-    func : roi_align
-    data_type : x
-  optional : boxes_num
-  backward : roi_align_grad
-
- op : roi_pool
-  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale)
-  output : Tensor(out), Tensor(arg_max)
-  infer_meta :
-    func : RoiPoolInferMeta
-  kernel :
-    func : roi_pool
-    data_type : x
-  optional : boxes_num
-  intermediate : arg_max
-  backward : roi_pool_grad
-
 - op : rrelu
  args : (Tensor x, float lower, float upper, bool is_test)
  output : Tensor(out), Tensor(noise)

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -2054,6 +2054,13 @@
    prod_grad : GetReduceGradExpectedKernelType
  manual_signature : [prod]

+- op : psroi_pool
+  backward : psroi_pool_grad
+  inputs :
+    {x : X, boxes : ROIs, boxes_num : RoisNum}
+  outputs :
+    out : Out
+
 - op : put_along_axis
  backward : put_along_axis_grad
  inputs :
@@ -2178,6 +2185,20 @@
    { out : Out, dropout_state_out : DropoutState, state : State, reserve : Reserve}
  drop_empty_grad : [pre_state_grad, weight_list_grad]

+- op : roi_align
+  backward : roi_align_grad
+  inputs :
+    {x : X, boxes : ROIs, boxes_num : RoisNum}
+  outputs :
+    out : Out
+
+- op : roi_pool
+  backward : roi_pool_grad
+  inputs :
+    {x : X, boxes : ROIs, boxes_num : RoisNum}
+  outputs :
+    {out : Out, arg_max : Argmax}
+
 - op : roll
  backward : roll_grad
  inputs :

--- a/paddle/phi/api/yaml/op_version.yaml
+++ b/paddle/phi/api/yaml/op_version.yaml
@@ -360,6 +360,33 @@
          comment : Specify the data format of the input data
          default : "true"

+- op : roi_align
+  version :
+    - checkpoint : Incompatible upgrade of input [RpnRoisLod])
+      action :
+        - delete_input : RpnRoisLod
+          comment : Delete RpnRoisLod due to incorrect input name and it is not used in object detection models yet
+    - checkpoint :  Upgrade roi_pool add a new input [RoisNum]
+      action :
+        - add_input : RoisNum
+          comment : The number of RoIs in each image. RoisNum is dispensable
+    - checkpoint :  Upgrade roi_align add a new input [aligned]
+      action :
+        - add_attr : aligned
+          comment : If true, pixel shift it by -0.5 for align more perfectly.
+          default : "false"
+
+- op : roi_pool
+  version :
+    - checkpoint :  Incompatible upgrade of input [RpnRoisLod]
+      action :
+        - delete_input : RpnRoisLod
+          comment : Delete RpnRoisLod due to incorrect input name and it is not used in object detection models yet.
+    - checkpoint :  Upgrade roi_pool add a new input [RoisNum]
+      action :
+        - add_input : RoisNum
+          comment : The number of RoIs in each image. RoisNum is dispensable
+
 - op : roll
  version :
    - checkpoint : Upgrade roll add 1 attribute [axis], delete 1 attribute[dims].

--- a/paddle/phi/api/yaml/ops.yaml
+++ b/paddle/phi/api/yaml/ops.yaml
@@ -1864,6 +1864,17 @@
    func : prior_box
    data_type : input

+- op : psroi_pool
+  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, int output_channels=1, float spatial_scale=1.0)
+  output : Tensor
+  infer_meta :
+    func : PsroiPoolInferMeta
+  kernel :
+    func : psroi_pool
+    data_type : x
+  optional : boxes_num
+  backward : psroi_pool_grad
+
 - op : put_along_axis
  args : (Tensor arr, Tensor indices, Tensor values, int axis, str reduce = "assign")
  output : Tensor(out)
@@ -1956,6 +1967,29 @@
  optional : mean_grad, master_param, master_param_outs
  inplace : (param -> param_out), (moment -> moment_out), (mean_square -> mean_square_out), (mean_grad -> mean_grad_out), (master_param->master_param_outs)

+- op : roi_align
+  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, float spatial_scale=1.0, int sampling_ratio=-1, bool aligned=false)
+  output : Tensor
+  infer_meta :
+    func : RoiAlignInferMeta
+  kernel :
+    func : roi_align
+    data_type : x
+  optional : boxes_num
+  backward : roi_align_grad
+
+- op : roi_pool
+  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, float spatial_scale=1.0)
+  output : Tensor(out), Tensor(arg_max)
+  infer_meta :
+    func : RoiPoolInferMeta
+  kernel :
+    func : roi_pool
+    data_type : x
+  optional : boxes_num
+  intermediate : arg_max
+  backward : roi_pool_grad
+
 - op : roll
  args : (Tensor x, IntArray shifts={}, int64_t[] axis={})
  output : Tensor(out)

--- a/paddle/phi/ops/compat/psroi_pool_sig.cc
+++ b/paddle/phi/ops/compat/psroi_pool_sig.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature PsroiPoolOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature(
-      "psroi_pool",
-      {"X", "ROIs", "RoisNum"},
-      {"pooled_height", "pooled_width", "output_channels", "spatial_scale"},
-      {"Out"});
-}
-
-KernelSignature PsroiPoolGradOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature(
-      "psroi_pool_grad",
-      {"X", "ROIs", "RoisNum", "Out@GRAD"},
-      {"pooled_height", "pooled_width", "output_channels", "spatial_scale"},
-      {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(psroi_pool, phi::PsroiPoolOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(psroi_pool_grad,
-                           phi::PsroiPoolGradOpArgumentMapping);
--- a/paddle/phi/ops/compat/roi_align_sig.cc
+++ b/paddle/phi/ops/compat/roi_align_sig.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature RoiAlignOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature("roi_align",
-                         {"X", "ROIs", "RoisNum"},
-                         {"pooled_height",
-                          "pooled_width",
-                          "spatial_scale",
-                          "sampling_ratio",
-                          "aligned"},
-                         {"Out"});
-}
-
-KernelSignature RoiAlignGradOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature("roi_align_grad",
-                         {"X", "ROIs", "RoisNum", "Out@GRAD"},
-                         {"pooled_height",
-                          "pooled_width",
-                          "spatial_scale",
-                          "sampling_ratio",
-                          "aligned"},
-                         {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(roi_align, phi::RoiAlignOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(roi_align_grad, phi::RoiAlignGradOpArgumentMapping);
--- a/paddle/phi/ops/compat/roi_pool_sig.cc
+++ b/paddle/phi/ops/compat/roi_pool_sig.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature RoiPoolOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature("roi_pool",
-                         {"X", "ROIs", "RoisNum"},
-                         {"pooled_height", "pooled_width", "spatial_scale"},
-                         {"Out", "Argmax"});
-}
-
-KernelSignature RoiPoolOpGradArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature("roi_pool_grad",
-                         {"X", "ROIs", "RoisNum", "Argmax", "Out@GRAD"},
-                         {"pooled_height", "pooled_width", "spatial_scale"},
-                         {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(roi_pool, phi::RoiPoolOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(roi_pool_grad, phi::RoiPoolOpGradArgumentMapping);