diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc
index 2ab094cd8b88702bc27826cfaeb2cd9ba73f8ba3..f9b82b66185002b1aca1c0d76430ef1a09b72854 100644
--- a/paddle/fluid/operators/detection/generate_proposals_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cc
@@ -341,6 +341,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
     lod0.push_back(0);
     anchors.Resize({anchors.numel() / 4, 4});
     variances.Resize({variances.numel() / 4, 4});
+    std::vector<int64_t> tmp_lod;
 
     int64_t num_proposals = 0;
     for (int64_t i = 0; i < num; ++i) {
@@ -362,6 +363,16 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
       AppendProposals(rpn_roi_probs, num_proposals, scores);
       num_proposals += proposals.dims()[0];
       lod0.push_back(num_proposals);
+      tmp_lod.push_back(num_proposals);
+    }
+    if (context.HasOutput("RpnRoisLod")) {
+      auto *rpn_rois_lod = context.Output<Tensor>("RpnRoisLod");
+      rpn_rois_lod->mutable_data<int64_t>({num}, context.GetPlace());
+      int64_t *lod_data = rpn_rois_lod->data<int64_t>();
+      for (int i = 0; i < num; i++) {
+        lod_data[i] = tmp_lod[i];
+      }
+      rpn_rois_lod->Resize({num, 1});
     }
     rpn_rois->set_lod(lod);
     rpn_roi_probs->set_lod(lod);
@@ -464,6 +475,7 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
               "(LoDTensor), Output proposals with shape (rois_num, 4).");
     AddOutput("RpnRoiProbs",
               "(LoDTensor) Scores of proposals with shape (rois_num, 1).");
+    AddOutput("RpnRoisLod", "(Tensor), rpn rois's lod info").AsDispensable();
     AddAttr<int>("pre_nms_topN",
                  "Number of top scoring RPN proposals to keep before "
                  "applying NMS.");
diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu
index 10e111d6673cf43041c4267bebc089f834ddb99c..1144bff68da61b440a1b333bce5afe39c47ea4da 100644
--- a/paddle/fluid/operators/detection/generate_proposals_op.cu
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cu
@@ -416,9 +416,12 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
     T *rpn_roi_probs_data = rpn_roi_probs->data<T>();
 
     auto place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace());
+    auto cpu_place = platform::CPUPlace();
 
     int64_t num_proposals = 0;
     std::vector<size_t> offset(1, 0);
+    std::vector<int64_t> tmp_lod;
+
     for (int64_t i = 0; i < num; ++i) {
       Tensor im_info_slice = im_info->Slice(i, i + 1);
       Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
@@ -444,6 +447,15 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
       dev_ctx.Wait();
       num_proposals += proposals.dims()[0];
       offset.emplace_back(num_proposals);
+      tmp_lod.push_back(num_proposals);
+    }
+    if (context.HasOutput("RpnRoisLod")) {
+      auto *rpn_rois_lod = context.Output<Tensor>("RpnRoisLod");
+      rpn_rois_lod->mutable_data<int64_t>({num}, context.GetPlace());
+      int64_t *lod_data = rpn_rois_lod->data<int64_t>();
+      memory::Copy(place, lod_data, cpu_place, &tmp_lod[0],
+                   sizeof(int64_t) * num, dev_ctx.stream());
+      rpn_rois_lod->Resize({num});
     }
     framework::LoD lod;
     lod.emplace_back(offset);
diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc
index b8fa1caadae545f851033dd41c6b0a70841c6321..1c8d8d3a392be46689f88c5b4c44f394c03fc56b 100644
--- a/paddle/fluid/operators/roi_align_op.cc
+++ b/paddle/fluid/operators/roi_align_op.cc
@@ -35,6 +35,14 @@ class ROIAlignOp : public framework::OperatorWithKernel {
     auto input_dims = ctx->GetInputDim("X");
     auto rois_dims = ctx->GetInputDim("ROIs");
 
+    if (ctx->HasInput("RoisLod")) {
+      auto rois_lod_dims = ctx->GetInputDim("RoisLod");
+      PADDLE_ENFORCE_EQ(
+          rois_lod_dims.size(), 1,
+          platform::errors::InvalidArgument("The RoisLod dimension should be 1"
+                                            ", but got dim = %d",
+                                            rois_lod_dims.size()));
+    }
     PADDLE_ENFORCE_EQ(
         input_dims.size(), 4,
         platform::errors::InvalidArgument(
@@ -136,6 +144,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
              "given as [[x1, y1, x2, y2], ...]. "
              "(x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates.");
+    AddInput("RoisLod",
+             "(Tensor), "
+             "The lod info of rois.")
+        .AsDispensable();
     AddOutput("Out",
               "(Tensor), "
               "The output of ROIAlignOp is a 4-D tensor with shape "
@@ -190,6 +202,7 @@ class ROIAlignGradMaker : public framework::SingleGradOpMaker<T> {
     op->SetType("roi_align_grad");
     op->SetInput("X", this->Input("X"));
     op->SetInput("ROIs", this->Input("ROIs"));
+    op->SetInput("RoisLod", this->Input("RoisLod"));
     op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
     op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
     op->SetAttrMap(this->Attrs());
@@ -210,8 +223,10 @@ REGISTER_OPERATOR(roi_align_grad, ops::ROIAlignGradOp,
 REGISTER_OP_CPU_KERNEL(
     roi_align,
     ops::CPUROIAlignOpKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::CPUROIAlignOpKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::CPUROIAlignOpKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::CPUROIAlignOpKernel<paddle::platform::CPUDeviceContext, int>);
 REGISTER_OP_CPU_KERNEL(
     roi_align_grad,
     ops::CPUROIAlignGradOpKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::CPUROIAlignGradOpKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::CPUROIAlignGradOpKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::CPUROIAlignGradOpKernel<paddle::platform::CPUDeviceContext, int>);
diff --git a/paddle/fluid/operators/roi_align_op.cu b/paddle/fluid/operators/roi_align_op.cu
index 6ebae590177fa1c44a7d0a5bbeb9edc9dc4f4f96..6e477892d91ee6a0295c968e96c4c34657f3b12b 100644
--- a/paddle/fluid/operators/roi_align_op.cu
+++ b/paddle/fluid/operators/roi_align_op.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <vector>
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/operators/roi_align_op.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
@@ -258,32 +259,53 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
     roi_batch_id_list.Resize({rois_num});
     auto cplace = platform::CPUPlace();
     int* roi_batch_id_data = roi_batch_id_list.mutable_data<int>(cplace);
-    auto lod = rois->lod();
-    PADDLE_ENFORCE_EQ(
-        lod.empty(), false,
-        "Input(ROIs) Tensor of ROIAlignOp does not contain LoD information.");
-    auto rois_lod = lod.back();
-    int rois_batch_size = rois_lod.size() - 1;
-    PADDLE_ENFORCE_EQ(
-        rois_batch_size, batch_size,
-        platform::errors::InvalidArgument(
-            "The rois_batch_size and imgs "
-            "batch_size must be the same. But received rois_batch_size = %d, "
-            "batch_size = %d",
-            rois_batch_size, batch_size));
-    int rois_num_with_lod = rois_lod[rois_batch_size];
-    PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
-                      "The rois_num from input and lod must be the same.");
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        roi_batch_id_data[i] = n;
+    auto& dev_ctx = ctx.cuda_device_context();
+    auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
+    if (ctx.HasInput("RoisLod")) {
+      auto* rois_lod = ctx.Input<Tensor>("RoisLod");
+      int rois_batch_size = rois_lod->numel();
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size - 1, batch_size,
+          platform::errors::InvalidArgument(
+              "The rois_batch_size and imgs "
+              "batch_size must be the same. But received rois_batch_size = %d, "
+              "batch_size = %d",
+              rois_batch_size, batch_size));
+
+      std::vector<int64_t> rois_lod_(rois_batch_size);
+      memory::Copy(cplace, rois_lod_.data(), gplace, rois_lod->data<int64_t>(),
+                   sizeof(int64_t) * rois_batch_size, 0);
+      for (int n = 0; n < rois_batch_size - 1; ++n) {
+        for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
+      }
+    } else {
+      auto lod = rois->lod();
+      PADDLE_ENFORCE_EQ(
+          lod.empty(), false,
+          "Input(ROIs) Tensor of ROIAlignOp does not contain LoD information.");
+      auto rois_lod = lod.back();
+      int rois_batch_size = rois_lod.size() - 1;
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          platform::errors::InvalidArgument(
+              "The rois_batch_size and imgs "
+              "batch_size must be the same. But received rois_batch_size = %d, "
+              "batch_size = %d",
+              rois_batch_size, batch_size));
+      int rois_num_with_lod = rois_lod[rois_batch_size];
+      PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
+                        "The rois_num from input and lod must be the same.");
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
       }
     }
-    auto& dev_ctx = ctx.cuda_device_context();
     int bytes = roi_batch_id_list.numel() * sizeof(int);
     auto roi_ptr = memory::Alloc(dev_ctx, bytes);
     int* roi_id_data = reinterpret_cast<int*>(roi_ptr->ptr());
-    const auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
     memory::Copy(gplace, roi_id_data, cplace, roi_batch_id_data, bytes,
                  dev_ctx.stream());
     GPUROIAlignForward<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
@@ -320,19 +342,33 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
     roi_batch_id_list.Resize({rois_num});
     auto cplace = platform::CPUPlace();
     int* roi_batch_id_data = roi_batch_id_list.mutable_data<int>(cplace);
-    auto rois_lod = rois->lod().back();
-    int rois_batch_size = rois_lod.size() - 1;
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        roi_batch_id_data[i] = n;
+
+    auto& dev_ctx = ctx.cuda_device_context();
+    auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
+    if (ctx.HasInput("RoisLod")) {
+      auto* rois_lod = ctx.Input<Tensor>("RoisLod");
+      int rois_batch_size = rois_lod->numel();
+      std::vector<int64_t> rois_lod_(rois_batch_size);
+      memory::Copy(cplace, rois_lod_.data(), gplace, rois_lod->data<int64_t>(),
+                   sizeof(int64_t) * rois_batch_size, 0);
+      for (int n = 0; n < rois_batch_size - 1; ++n) {
+        for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
+      }
+    } else {
+      auto rois_lod = rois->lod().back();
+      int rois_batch_size = rois_lod.size() - 1;
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
       }
     }
-    auto& dev_ctx = ctx.cuda_device_context();
     auto roi_ptr =
         memory::Alloc(dev_ctx, roi_batch_id_list.numel() * sizeof(int));
     int* roi_id_data = reinterpret_cast<int*>(roi_ptr->ptr());
     int bytes = roi_batch_id_list.numel() * sizeof(int);
-    const auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
     memory::Copy(gplace, roi_id_data, cplace, roi_batch_id_data, bytes,
                  dev_ctx.stream());
     in_grad->mutable_data<T>(ctx.GetPlace());
diff --git a/paddle/fluid/operators/roi_align_op.h b/paddle/fluid/operators/roi_align_op.h
index e40bc1d031266c53f31efe66fcc573d5fbdb674b..26ef5e396a086d545be2bec86f88048db1c86f8a 100644
--- a/paddle/fluid/operators/roi_align_op.h
+++ b/paddle/fluid/operators/roi_align_op.h
@@ -12,6 +12,7 @@ limitations under the License. */
 #pragma once
 #include <algorithm>
 #include <limits>
+#include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
 
@@ -163,26 +164,44 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
     roi_batch_id_list.Resize({rois_num});
     int* roi_batch_id_data =
         roi_batch_id_list.mutable_data<int>(ctx.GetPlace());
-
-    auto lod = rois->lod();
-    PADDLE_ENFORCE_EQ(
-        lod.empty(), false,
-        "Input(ROIs) Tensor of ROIAlignOp does not contain LoD information.");
-    auto rois_lod = lod.back();
-    int rois_batch_size = rois_lod.size() - 1;
-    PADDLE_ENFORCE_EQ(
-        rois_batch_size, batch_size,
-        platform::errors::InvalidArgument(
-            "The rois_batch_size and imgs "
-            "batch_size must be the same. But received rois_batch_size = %d, "
-            "batch_size = %d",
-            rois_batch_size, batch_size));
-    int rois_num_with_lod = rois_lod[rois_batch_size];
-    PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
-                      "The rois_num from input and lod must be the same.");
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        roi_batch_id_data[i] = n;
+    int rois_batch_size;
+    if (ctx.HasInput("RoisLod")) {
+      auto* rois_lod_t = ctx.Input<framework::Tensor>("RoisLod");
+      rois_batch_size = rois_lod_t->numel();
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size - 1, batch_size,
+          platform::errors::InvalidArgument(
+              "The rois_batch_size and imgs "
+              "batch_size must be the same. But received rois_batch_size = %d, "
+              "batch_size = %d",
+              rois_batch_size, batch_size));
+      auto* rois_lod = rois_lod_t->data<int64_t>();
+      for (int n = 0; n < rois_batch_size - 1; ++n) {
+        for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
+      }
+    } else {
+      auto lod = rois->lod();
+      PADDLE_ENFORCE_EQ(
+          lod.empty(), false,
+          "Input(ROIs) Tensor of ROIAlignOp does not contain LoD information.");
+      auto rois_lod = lod.back();
+      int rois_batch_size = rois_lod.size() - 1;
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          platform::errors::InvalidArgument(
+              "The rois_batch_size and imgs "
+              "batch_size must be the same. But received rois_batch_size = %d, "
+              "batch_size = %d",
+              rois_batch_size, batch_size));
+      int rois_num_with_lod = rois_lod[rois_batch_size];
+      PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
+                        "The rois_num from input and lod must be the same.");
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
       }
     }
     T* output_data = out->mutable_data<T>(ctx.GetPlace());
@@ -276,11 +295,23 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
     int* roi_batch_id_data =
         roi_batch_id_list.mutable_data<int>(ctx.GetPlace());
 
-    auto rois_lod = rois->lod().back();
-    int rois_batch_size = rois_lod.size() - 1;
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        roi_batch_id_data[i] = n;
+    int rois_batch_size;
+    if (ctx.HasInput("RoisLod")) {
+      auto* rois_lod_t = ctx.Input<framework::Tensor>("RoisLod");
+      rois_batch_size = rois_lod_t->numel();
+      auto* rois_lod = rois_lod_t->data<int64_t>();
+      for (int n = 0; n < rois_batch_size - 1; ++n) {
+        for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
+      }
+    } else {
+      auto rois_lod = rois->lod().back();
+      rois_batch_size = rois_lod.size() - 1;
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
       }
     }
     in_grad->mutable_data<T>(ctx.GetPlace());
diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc
index e72f0dd751ba679552307005aa9e51ebb70bff80..acfb46db65304c952abfbaa2ad7ce22fea5c53ce 100644
--- a/paddle/fluid/operators/roi_pool_op.cc
+++ b/paddle/fluid/operators/roi_pool_op.cc
@@ -36,7 +36,10 @@ class ROIPoolOp : public framework::OperatorWithKernel {
                    "Output(Argmax) of ROIPoolOp should not be null.");
     auto input_dims = ctx->GetInputDim("X");
     auto rois_dims = ctx->GetInputDim("ROIs");
-
+    if (ctx->HasInput("RoisLod")) {
+      auto rois_lod_dims = ctx->GetInputDim("RoisLod");
+      PADDLE_ENFORCE(rois_lod_dims.size() == 1, "");
+    }
     PADDLE_ENFORCE(input_dims.size() == 4,
                    "The format of input tensor is NCHW.");
     PADDLE_ENFORCE(rois_dims.size() == 2,
@@ -115,6 +118,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
              "Where batch_id is the id of the data, "
              "(x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates.");
+    AddInput("RoisLod", "(Tensor), The lod info of rois.").AsDispensable();
     AddOutput("Out",
               "(Tensor), "
               "The output of ROIPoolOp is a 4-D tensor with shape "
@@ -171,6 +175,7 @@ class ROIPoolGradMaker : public framework::SingleGradOpMaker<T> {
     op->SetType("roi_pool_grad");
     op->SetInput("X", this->Input("X"));
     op->SetInput("ROIs", this->Input("ROIs"));
+    op->SetInput("RoisLod", this->Input("RoisLod"));
     op->SetInput("Argmax", this->Output("Argmax"));
     op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
     op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
@@ -189,8 +194,10 @@ REGISTER_OPERATOR(roi_pool_grad, ops::ROIPoolGradOp);
 REGISTER_OP_CPU_KERNEL(
     roi_pool,
     ops::CPUROIPoolOpKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::CPUROIPoolOpKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::CPUROIPoolOpKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::CPUROIPoolOpKernel<paddle::platform::CPUDeviceContext, int>);
 REGISTER_OP_CPU_KERNEL(
     roi_pool_grad,
     ops::CPUROIPoolGradOpKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::CPUROIPoolGradOpKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::CPUROIPoolGradOpKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::CPUROIPoolGradOpKernel<paddle::platform::CPUDeviceContext, int>);
diff --git a/paddle/fluid/operators/roi_pool_op.cu b/paddle/fluid/operators/roi_pool_op.cu
index da8088d2ea70f589b6a5b8a443f16429cd0d1034..cf53c642a144903619cc89d45d5abd9364903c17 100644
--- a/paddle/fluid/operators/roi_pool_op.cu
+++ b/paddle/fluid/operators/roi_pool_op.cu
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-
+#include <vector>
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/operators/roi_pool_op.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
@@ -155,25 +155,40 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
     roi_batch_id_list.Resize({rois_num});
     auto cplace = platform::CPUPlace();
     int* roi_batch_id_data = roi_batch_id_list.mutable_data<int>(cplace);
-    auto rois_lod = rois->lod().back();
-    int rois_batch_size = rois_lod.size() - 1;
-    PADDLE_ENFORCE_EQ(
-        rois_batch_size, batch_size,
-        "The rois_batch_size and imgs batch_size must be the same.");
-    int rois_num_with_lod = rois_lod[rois_batch_size];
-    PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
-                      "The rois_num from input and lod must be the same.");
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        roi_batch_id_data[i] = n;
+    auto& dev_ctx = ctx.cuda_device_context();
+    auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
+    if (ctx.HasInput("RoisLod")) {
+      auto* rois_lod = ctx.Input<Tensor>("RoisLod");
+      int rois_batch_size = rois_lod->numel();
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size - 1, batch_size,
+          "The rois_batch_size and imgs batch_size must be the same.");
+      std::vector<int64_t> rois_lod_(rois_batch_size);
+      memory::Copy(cplace, rois_lod_.data(), gplace, rois_lod->data<int64_t>(),
+                   sizeof(int64_t) * rois_batch_size, 0);
+      for (int n = 0; n < rois_batch_size - 1; ++n) {
+        for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
+      }
+    } else {
+      auto rois_lod = rois->lod().back();
+      int rois_batch_size = rois_lod.size() - 1;
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          "The rois_batch_size and imgs batch_size must be the same.");
+      int rois_num_with_lod = rois_lod[rois_batch_size];
+      PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
+                        "The rois_num from input and lod must be the same.");
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
       }
     }
-
-    auto& dev_ctx = ctx.cuda_device_context();
     int bytes = roi_batch_id_list.numel() * sizeof(int);
     auto roi_ptr = memory::Alloc(dev_ctx, bytes);
     int* roi_id_data = reinterpret_cast<int*>(roi_ptr->ptr());
-    const auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
     memory::Copy(gplace, roi_id_data, cplace, roi_batch_id_data, bytes,
                  dev_ctx.stream());
 
@@ -191,6 +206,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<Tensor>("X");
     auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois_lod = ctx.Input<Tensor>("RoisLod");
     auto* argmax = ctx.Input<Tensor>("Argmax");
 
     auto* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
@@ -210,19 +226,33 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
       roi_batch_id_list.Resize({rois_num});
       auto cplace = platform::CPUPlace();
       int* roi_batch_id_data = roi_batch_id_list.mutable_data<int>(cplace);
-      auto rois_lod = rois->lod().back();
-      int rois_batch_size = rois_lod.size() - 1;
-      for (int n = 0; n < rois_batch_size; ++n) {
-        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-          roi_batch_id_data[i] = n;
-        }
-      }
 
       auto& dev_ctx = ctx.cuda_device_context();
+      auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
+      if (ctx.HasInput("RoisLod")) {
+        auto* rois_lod = ctx.Input<Tensor>("RoisLod");
+        int rois_batch_size = rois_lod->numel();
+        std::vector<int64_t> rois_lod_(rois_batch_size);
+        memory::Copy(cplace, rois_lod_.data(), gplace,
+                     rois_lod->data<int64_t>(),
+                     sizeof(int64_t) * rois_batch_size, 0);
+        for (int n = 0; n < rois_batch_size - 1; ++n) {
+          for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) {
+            roi_batch_id_data[i] = n;
+          }
+        }
+      } else {
+        auto rois_lod = rois->lod().back();
+        int rois_batch_size = rois_lod.size() - 1;
+        for (int n = 0; n < rois_batch_size; ++n) {
+          for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+            roi_batch_id_data[i] = n;
+          }
+        }
+      }
       int bytes = roi_batch_id_list.numel() * sizeof(int);
       auto roi_ptr = memory::Alloc(dev_ctx, bytes);
       int* roi_id_data = reinterpret_cast<int*>(roi_ptr->ptr());
-      const auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
       memory::Copy(gplace, roi_id_data, cplace, roi_batch_id_data, bytes,
                    dev_ctx.stream());
 
diff --git a/paddle/fluid/operators/roi_pool_op.h b/paddle/fluid/operators/roi_pool_op.h
index 07de7c9f0e070cef7c6f38f8d564ab76910842db..4a369bbb4213e786f82abdad0a4cf8facb2c12b2 100644
--- a/paddle/fluid/operators/roi_pool_op.h
+++ b/paddle/fluid/operators/roi_pool_op.h
@@ -15,7 +15,9 @@ limitations under the License. */
 #pragma once
 #include <algorithm>
 #include <limits>
+#include <vector>
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/operators/math/math_function.h"
 
 namespace paddle {
@@ -55,17 +57,32 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
     int* roi_batch_id_data =
         roi_batch_id_list.mutable_data<int>(ctx.GetPlace());
 
-    auto rois_lod = rois->lod().back();
-    int rois_batch_size = rois_lod.size() - 1;
-    PADDLE_ENFORCE_EQ(
-        rois_batch_size, batch_size,
-        "The rois_batch_size and imgs batch_size must be the same.");
-    int rois_num_with_lod = rois_lod[rois_batch_size];
-    PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
-                      "The rois_num from input and lod must be the same.");
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        roi_batch_id_data[i] = n;
+    int rois_batch_size;
+    if (ctx.HasInput("RoisLod")) {
+      auto* rois_lod_t = ctx.Input<framework::Tensor>("RoisLod");
+      rois_batch_size = rois_lod_t->numel();
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size - 1, batch_size,
+          "The rois_batch_size and imgs batch_size must be the same.");
+      auto* rois_lod = rois_lod_t->data<int64_t>();
+      for (int n = 0; n < rois_batch_size - 1; ++n) {
+        for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
+      }
+    } else {
+      auto rois_lod = rois->lod().back();
+      rois_batch_size = rois_lod.size() - 1;
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          "The rois_batch_size and imgs batch_size must be the same.");
+      int rois_num_with_lod = rois_lod[rois_batch_size];
+      PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
+                        "The rois_num from input and lod must be the same.");
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          roi_batch_id_data[i] = n;
+        }
       }
     }
 
@@ -163,11 +180,23 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
       int* roi_batch_id_data =
           roi_batch_id_list.mutable_data<int>(ctx.GetPlace());
 
-      auto rois_lod = rois->lod().back();
-      int rois_batch_size = rois_lod.size() - 1;
-      for (int n = 0; n < rois_batch_size; ++n) {
-        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-          roi_batch_id_data[i] = n;
+      int rois_batch_size;
+      if (ctx.HasInput("RoisLod")) {
+        auto* rois_lod_t = ctx.Input<framework::Tensor>("RoisLod");
+        rois_batch_size = rois_lod_t->numel();
+        auto* rois_lod = rois_lod_t->data<int64_t>();
+        for (int n = 0; n < rois_batch_size - 1; ++n) {
+          for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+            roi_batch_id_data[i] = n;
+          }
+        }
+      } else {
+        auto rois_lod = rois->lod().back();
+        rois_batch_size = rois_lod.size() - 1;
+        for (int n = 0; n < rois_batch_size; ++n) {
+          for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+            roi_batch_id_data[i] = n;
+          }
         }
       }
 
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 142edf40ad9a7a032f76cd8789be23a9f60486e6..4cf114f79cac598397147f534e40c2d2a2e31911 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -2779,6 +2779,8 @@ def generate_proposals(scores,
         dtype=bbox_deltas.dtype)
     rpn_roi_probs = helper.create_variable_for_type_inference(
         dtype=scores.dtype)
+    rpn_rois_lod = helper.create_variable_for_type_inference(dtype='int32')
+
     helper.append_op(
         type="generate_proposals",
         inputs={
@@ -2795,12 +2797,16 @@ def generate_proposals(scores,
             'min_size': min_size,
             'eta': eta
         },
-        outputs={'RpnRois': rpn_rois,
-                 'RpnRoiProbs': rpn_roi_probs})
+        outputs={
+            'RpnRois': rpn_rois,
+            'RpnRoiProbs': rpn_roi_probs,
+            'RpnRoisLod': rpn_rois_lod
+        })
     rpn_rois.stop_gradient = True
     rpn_roi_probs.stop_gradient = True
+    rpn_rois_lod.stop_gradient = True
 
-    return rpn_rois, rpn_roi_probs
+    return rpn_rois, rpn_roi_probs, rpn_rois_lod
 
 
 def box_clip(input, im_info, name=None):
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index a0eb6c93bc436161182834717385dc9161f2e4ca..ee7d355508fe4ab93bf646d5661652f9eba392fb 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -6606,7 +6606,12 @@ def label_smooth(label,
 
 
 @templatedoc()
-def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
+def roi_pool(input,
+             rois,
+             pooled_height=1,
+             pooled_width=1,
+             spatial_scale=1.0,
+             rois_lod=None):
     """
     This operator implements the roi_pooling layer. 
     Region of interest pooling (also known as RoI pooling) is to perform max pooling on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7).
@@ -6622,6 +6627,7 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
     Args:
         input (Variable): Input feature, 4D-Tensor with the shape of [N,C,H,W], where N is the batch size, C is the input channel, H is Height, W is weight. The data type is float32 or float64.
         rois (Variable): ROIs (Regions of Interest) to pool over. 2D-LoDTensor with the shape of [num_rois,4], the lod level is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates, and (x2, y2) is the bottom right coordinates.
+        rois_lod (Variable): The lod info of rois. Default: None
         pooled_height (int, optional): The pooled output height, data type is int32. Default: 1
         pooled_width (int, optional): The pooled output height, data type is int32. Default: 1
         spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0
@@ -6644,19 +6650,22 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
     
         input_data = np.array([i for i in range(1,17)]).reshape(1,1,4,4).astype(DATATYPE)
         roi_data =fluid.create_lod_tensor(np.array([[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(DATATYPE),[[2]], place)
-    
+        rois_lod_data = np.array([0, 2])
+
         x = fluid.data(name='input', shape=[None,1,4,4], dtype=DATATYPE)
         rois = fluid.data(name='roi', shape=[None,4], dtype=DATATYPE)
-    
+        rois_lod = fluid.data(name='rois_lod', shape=[None], dtype='int64') 
+
         pool_out = fluid.layers.roi_pool(
                 input=x,
                 rois=rois,
                 pooled_height=1,
                 pooled_width=1,
-                spatial_scale=1.0)
+                spatial_scale=1.0,
+                rois_lod=rois_lod)
     
         exe = fluid.Executor(place)
-        out, = exe.run(feed={'input':input_data ,'roi':roi_data}, fetch_list=[pool_out.name])
+        out, = exe.run(feed={'input':input_data ,'roi':roi_data, 'rois_lod': rois_lod_data}, fetch_list=[pool_out.name])
         print(out)   #array([[[[11.]]], [[[16.]]]], dtype=float32)
         print(np.array(out).shape)  # (2, 1, 1, 1)
     """
@@ -6667,7 +6676,8 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
     helper.append_op(
         type="roi_pool",
         inputs={"X": input,
-                "ROIs": rois},
+                "ROIs": rois,
+                "RoisLod": rois_lod},
         outputs={"Out": pool_out,
                  "Argmax": argmaxes},
         attrs={
@@ -6685,7 +6695,8 @@ def roi_align(input,
               pooled_width=1,
               spatial_scale=1.0,
               sampling_ratio=-1,
-              name=None):
+              name=None,
+              rois_lod=None):
     """
     ${comment}
 
@@ -6695,7 +6706,8 @@ def roi_align(input,
             a 2-D LoDTensor of shape (num_rois, 4), the lod level is 1. The 
             data type is float32 or float64. Given as [[x1, y1, x2, y2], ...], 
             (x1, y1) is the top left coordinates, and (x2, y2) is the bottom
-            right coordinates. 
+            right coordinates.
+        rois_lod (Variable): The lod info of rois. Default: None
         pooled_height (int32, optional): ${pooled_height_comment} Default: 1
         pooled_width (int32, optional): ${pooled_width_comment} Default: 1
         spatial_scale (float32, optional): ${spatial_scale_comment} Default: 1.0
@@ -6718,12 +6730,14 @@ def roi_align(input,
                 name='data', shape=[None, 256, 32, 32], dtype='float32')
             rois = fluid.data(
                 name='rois', shape=[None, 4], dtype='float32')
+            rois_lod = fluid.data(name='rois_lod', shape=[None], dtype='int64')
             align_out = fluid.layers.roi_align(input=x,
                                                rois=rois,
                                                pooled_height=7,
                                                pooled_width=7,
                                                spatial_scale=0.5,
-                                               sampling_ratio=-1)
+                                               sampling_ratio=-1,
+                                               rois_lod=rois_lod)
     """
     check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                              'roi_align')
@@ -6734,7 +6748,8 @@ def roi_align(input,
     helper.append_op(
         type="roi_align",
         inputs={"X": input,
-                "ROIs": rois},
+                "ROIs": rois,
+                "RoisLod": rois_lod},
         outputs={"Out": align_out},
         attrs={
             "pooled_height": pooled_height,
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index cb798c8ed595d13dd8ff5e33323d6e796aaac6f9..0a220fe18f42b27021be4091293c8baa8f038e53 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -480,7 +480,7 @@ class TestGenerateProposals(unittest.TestCase):
                 name='bbox_deltas',
                 shape=[num_anchors * 4, 8, 8],
                 dtype='float32')
-            rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
+            rpn_rois, rpn_roi_probs, _ = fluid.layers.generate_proposals(
                 name='generate_proposals',
                 scores=scores,
                 bbox_deltas=bbox_deltas,
diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
index fceaa0c14c4dcfb75bcb597cade1dc838a71c065..5d4b453fa64205cb13b88ccfc5bf622348bbd9fa 100644
--- a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
@@ -281,7 +281,9 @@ class TestGenerateProposalsOp(OpTest):
 
         self.outputs = {
             'RpnRois': (self.rpn_rois[0], [self.lod]),
-            'RpnRoiProbs': (self.rpn_roi_probs[0], [self.lod])
+            'RpnRoiProbs': (self.rpn_roi_probs[0], [self.lod]),
+            'RpnRoisLod': (np.asarray(
+                self.lod, dtype=np.int32))
         }
 
     def test_check_output(self):
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 8059720312b54319f09726cc5219fd3f7e46e491..fdf8931bdaf55e469c7854ef0a342c4d797dcc3e 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3217,7 +3217,9 @@ class TestBook(LayerTest):
             x = layers.data(name="x", shape=[256, 30, 30], dtype="float32")
             rois = layers.data(
                 name="rois", shape=[4], dtype="float32", lod_level=1)
-            output = layers.roi_pool(x, rois, 7, 7, 0.6)
+            rois_lod = layers.data(
+                name="rois_lod", shape=[None, ], dtype="int", lod_level=1)
+            output = layers.roi_pool(x, rois, 7, 7, 0.6, rois_lod)
             return (output)
 
     def test_sequence_enumerate(self):
@@ -3232,7 +3234,10 @@ class TestBook(LayerTest):
             x = layers.data(name="x", shape=[256, 30, 30], dtype="float32")
             rois = layers.data(
                 name="rois", shape=[4], dtype="float32", lod_level=1)
-            output = layers.roi_align(x, rois, 14, 14, 0.5, 2)
+            rois_lod = layers.data(
+                name="rois_lod", shape=[None, ], dtype="int", lod_level=1)
+            output = layers.roi_align(x, rois, 14, 14, 0.5, 2, 'roi_align',
+                                      rois_lod)
             return (output)
 
     def test_roi_perspective_transform(self):
diff --git a/python/paddle/fluid/tests/unittests/test_roi_align_op.py b/python/paddle/fluid/tests/unittests/test_roi_align_op.py
index c8d5e4c00597a1f6c0c6c313a17dbdbb1535fbe7..b01863880866e247f2aee4b94ae3121c9d891f92 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_align_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_align_op.py
@@ -26,7 +26,11 @@ class TestROIAlignOp(OpTest):
         self.init_test_case()
         self.make_rois()
         self.calc_roi_align()
-        self.inputs = {'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod)}
+
+        self.inputs = {
+            'X': self.x,
+            'ROIs': (self.rois[:, 1:5], self.rois_lod),
+        }
         self.attrs = {
             'spatial_scale': self.spatial_scale,
             'pooled_height': self.pooled_height,
@@ -170,5 +174,34 @@ class TestROIAlignOp(OpTest):
         self.check_grad(['X'], 'Out')
 
 
+class TestROIAlignInLodOp(TestROIAlignOp):
+    def set_data(self):
+        self.init_test_case()
+        self.make_rois()
+        self.calc_roi_align()
+
+        seq_len = self.rois_lod[0]
+        cur_len = 0
+        lod = [cur_len]
+        for l in seq_len:
+            cur_len += l
+            lod.append(cur_len)
+
+        self.inputs = {
+            'X': self.x,
+            'ROIs': (self.rois[:, 1:5], self.rois_lod),
+            'RoisLod': np.asarray(lod).astype('int64')
+        }
+
+        self.attrs = {
+            'spatial_scale': self.spatial_scale,
+            'pooled_height': self.pooled_height,
+            'pooled_width': self.pooled_width,
+            'sampling_ratio': self.sampling_ratio
+        }
+
+        self.outputs = {'Out': self.out_data}
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
index 363f465b9d4645e6bcf33c6ba85d6ef60e104afa..79f9127760bc2978d8771049c7ca66f29fd654c1 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
@@ -28,7 +28,10 @@ class TestROIPoolOp(OpTest):
         self.make_rois()
         self.calc_roi_pool()
 
-        self.inputs = {'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod)}
+        self.inputs = {
+            'X': self.x,
+            'ROIs': (self.rois[:, 1:5], self.rois_lod),
+        }
 
         self.attrs = {
             'spatial_scale': self.spatial_scale,
@@ -138,5 +141,33 @@ class TestROIPoolOp(OpTest):
         self.check_grad(['X'], 'Out')
 
 
+class TestROIPoolInLodOp(TestROIPoolOp):
+    def set_data(self):
+        self.init_test_case()
+        self.make_rois()
+        self.calc_roi_pool()
+
+        seq_len = self.rois_lod[0]
+        cur_len = 0
+        lod = [cur_len]
+        for l in seq_len:
+            cur_len += l
+            lod.append(cur_len)
+
+        self.inputs = {
+            'X': self.x,
+            'ROIs': (self.rois[:, 1:5], self.rois_lod),
+            'RoisLod': np.asarray(lod).astype('int64')
+        }
+
+        self.attrs = {
+            'spatial_scale': self.spatial_scale,
+            'pooled_height': self.pooled_height,
+            'pooled_width': self.pooled_width
+        }
+
+        self.outputs = {'Out': self.outs, 'Argmax': self.argmaxes}
+
+
 if __name__ == '__main__':
     unittest.main()