diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index f723b1ec2174849c23754254dd4a886f50e46090..46726ab4945b051880494321d0e9336f73f3bd2e 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -303,6 +303,7 @@ paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', '
 paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
 paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3))
 paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
+paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None))
 paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
 paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
index 1301c8ae2b145298b18f68f86dadd3c5cbe4271a..f4983c65432991a45f226d97f0fb05b08a30ca89 100644
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -29,6 +29,7 @@ target_assign_op.cu)
 detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
 polygon_box_transform_op.cu)
 detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
+detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
 detection_library(generate_proposals_op SRCS generate_proposals_op.cc)
 #Export local libraries to parent
 set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0571c46f6be99c9a06b7dd2abb310eeda506ecd5
--- /dev/null
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -0,0 +1,515 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <math.h>
+#include <algorithm>
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/gather.h"
+#include "paddle/fluid/operators/math/concat.h"
+#include "paddle/fluid/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+const int kBoxDim = 4;
+
+template <typename T>
+void AppendRois(LoDTensor* out, int64_t offset, Tensor* to_add) {
+  auto* out_data = out->data<T>();
+  auto* to_add_data = to_add->data<T>();
+  memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
+}
+
+class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("RpnRois"),
+                   "Input(RpnRois) shouldn't be null.");
+    PADDLE_ENFORCE(ctx->HasInput("GtClasses"),
+                   "Input(GtClasses) shouldn't be null.");
+    PADDLE_ENFORCE(ctx->HasInput("GtBoxes"),
+                   "Input(GtBoxes) shouldn't be null.");
+    PADDLE_ENFORCE(ctx->HasInput("ImScales"),
+                   "Input(ImScales) shouldn't be null.");
+
+    PADDLE_ENFORCE(ctx->HasOutput("Rois"),
+                   "Output(Rois) of RpnTargetAssignOp should not be null");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("LabelsInt32"),
+        "Output(LabelsInt32) of RpnTargetAssignOp should not be null");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("BboxTargets"),
+        "Output(BboxTargets) of RpnTargetAssignOp should not be null");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("BboxInsideWeights"),
+        "Output(BboxInsideWeights) of RpnTargetAssignOp should not be null");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("BboxOutsideWeights"),
+        "Output(BboxOutsideWeights) of RpnTargetAssignOp should not be null");
+
+    auto rpn_rois_dims = ctx->GetInputDim("RpnRois");
+    auto gt_classes_dims = ctx->GetInputDim("GtClasses");
+    auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
+    auto im_scales_dims = ctx->GetInputDim("ImScales");
+
+    PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2,
+                      "The rank of Input(RpnRois) must be 2.");
+    PADDLE_ENFORCE_EQ(gt_classes_dims.size(), 1,
+                      "The rank of Input(GtClasses) must be 1.");
+    PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2,
+                      "The rank of Input(GtBoxes) must be 2.");
+    PADDLE_ENFORCE_EQ(im_scales_dims.size(), 1,
+                      "The rank of Input(ImScales) must be 1.");
+
+    int class_nums = ctx->Attrs().Get<int>("class_nums");
+
+    ctx->SetOutputDim("Rois", {-1, 4});
+    ctx->SetOutputDim("LabelsInt32", {-1});
+    ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums});
+    ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums});
+    ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums});
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("RpnRois"));
+    return framework::OpKernelType(data_type, platform::CPUPlace());
+  }
+};
+
+template <typename T>
+void Concat(const platform::CPUDeviceContext& context,
+            const Tensor& in_tensor_a, const Tensor& in_tensor_b,
+            Tensor* out_tensor) {
+  int axis = 0;
+  std::vector<Tensor> inputs;
+  inputs.emplace_back(in_tensor_a);
+  inputs.emplace_back(in_tensor_b);
+  math::ConcatFunctor<platform::CPUDeviceContext, T> concat_functor;
+  concat_functor(context, inputs, axis, out_tensor);
+}
+
+template <typename T>
+void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
+                  Tensor* overlaps) {
+  auto r_boxes_et = framework::EigenTensor<T, 2>::From(r_boxes);
+  auto c_boxes_et = framework::EigenTensor<T, 2>::From(c_boxes);
+  auto overlaps_et = framework::EigenTensor<T, 2>::From(*overlaps);
+  int r_num = r_boxes.dims()[0];
+  int c_num = c_boxes.dims()[0];
+  auto zero = static_cast<T>(0.0);
+  T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h,
+      inter_area;
+  for (int i = 0; i < r_num; ++i) {
+    r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) *
+                 (r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1);
+    for (int j = 0; j < c_num; ++j) {
+      c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) *
+                   (c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1);
+      x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0));
+      y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1));
+      x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2));
+      y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3));
+      inter_w = std::max(x_max - x_min + 1, zero);
+      inter_h = std::max(y_max - y_min + 1, zero);
+      inter_area = inter_w * inter_h;
+      overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area);
+    }
+  }
+}
+
+template <typename T>
+void BoxToDelta(int box_num, const Tensor& ex_boxes, const Tensor& gt_boxes,
+                const std::vector<float>& weights, Tensor* box_delta) {
+  auto ex_boxes_et = framework::EigenTensor<T, 2>::From(ex_boxes);
+  auto gt_boxes_et = framework::EigenTensor<T, 2>::From(gt_boxes);
+  auto box_delta_et = framework::EigenTensor<T, 2>::From(*box_delta);
+  T ex_w, ex_h, ex_ctr_x, ex_ctr_y, gt_w, gt_h, gt_ctr_x, gt_ctr_y;
+  for (int64_t i = 0; i < box_num; ++i) {
+    ex_w = ex_boxes_et(i, 2) - ex_boxes_et(i, 0) + 1;
+    ex_h = ex_boxes_et(i, 3) - ex_boxes_et(i, 1) + 1;
+    ex_ctr_x = ex_boxes_et(i, 0) + 0.5 * ex_w;
+    ex_ctr_y = ex_boxes_et(i, 1) + 0.5 * ex_h;
+
+    gt_w = gt_boxes_et(i, 2) - gt_boxes_et(i, 0) + 1;
+    gt_h = gt_boxes_et(i, 3) - gt_boxes_et(i, 1) + 1;
+    gt_ctr_x = gt_boxes_et(i, 0) + 0.5 * gt_w;
+    gt_ctr_y = gt_boxes_et(i, 1) + 0.5 * gt_h;
+
+    box_delta_et(i, 0) = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0];
+    box_delta_et(i, 1) = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1];
+    box_delta_et(i, 2) = log(gt_w / ex_w) / ex_w / weights[2];
+    box_delta_et(i, 3) = log(gt_h / ex_h) / ex_h / weights[3];
+  }
+}
+
+template <typename T>
+std::vector<std::vector<int>> SampleFgBgGt(
+    const platform::CPUDeviceContext& context, Tensor* iou,
+    const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
+    const float bg_thresh_hi, const float bg_thresh_lo,
+    std::minstd_rand engine) {
+  std::vector<int> fg_inds;
+  std::vector<int> bg_inds;
+  std::vector<int> gt_inds;
+  T* proposal_to_gt_overlaps = iou->mutable_data<T>(context.GetPlace());
+  int64_t row = iou->dims()[0];
+  int64_t col = iou->dims()[1];
+  float epsilon = 0.00001;
+
+  // Follow the Faster RCNN's implementation
+  for (int64_t i = 0; i < row; ++i) {
+    const T* v = proposal_to_gt_overlaps + i * col;
+    T max_overlap = *std::max_element(v, v + col);
+    if (max_overlap > fg_thresh) {
+      for (int64_t j = 0; j < col; ++j) {
+        T val = proposal_to_gt_overlaps[i * col + j];
+        auto diff = std::abs(max_overlap - val);
+        if (diff < epsilon) {
+          fg_inds.emplace_back(i);
+          gt_inds.emplace_back(j);
+          break;
+        }
+      }
+    } else {
+      if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) {
+        bg_inds.emplace_back(i);
+      }
+    }
+  }
+
+  // Reservoir Sampling
+  int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
+  int fg_rois_this_image = fg_inds.size();
+  int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image);
+  std::uniform_real_distribution<float> uniform(0, 1);
+  const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
+  if (fg_size > fg_rois_per_this_image) {
+    for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
+      int rng_ind = std::floor(uniform(engine) * i);
+      if (rng_ind < fg_rois_per_this_image) {
+        std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
+        std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i);
+      }
+    }
+  }
+  std::vector<int> new_fg_inds(fg_inds.begin(),
+                               fg_inds.begin() + fg_rois_per_this_image);
+  std::vector<int> new_gt_inds(gt_inds.begin(),
+                               gt_inds.begin() + fg_rois_per_this_image);
+
+  int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
+  int bg_rois_this_image = bg_inds.size();
+  int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image);
+  const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
+  if (bg_size > bg_rois_per_this_image) {
+    for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
+      int rng_ind = std::floor(uniform(engine) * i);
+      if (rng_ind < fg_rois_per_this_image)
+        std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
+    }
+  }
+  std::vector<int> new_bg_inds(bg_inds.begin(),
+                               bg_inds.begin() + bg_rois_per_this_image);
+  std::vector<std::vector<int>> res;
+  res.emplace_back(new_fg_inds);
+  res.emplace_back(new_bg_inds);
+  res.emplace_back(new_gt_inds);
+  return res;
+}
+
+template <typename T>
+void GatherBoxesLabels(const platform::CPUDeviceContext& context,
+                       const Tensor& boxes, const Tensor& gt_boxes,
+                       const Tensor& gt_classes,
+                       const std::vector<int>& fg_inds,
+                       const std::vector<int>& bg_inds,
+                       const std::vector<int>& gt_inds, Tensor* sampled_boxes,
+                       Tensor* sampled_labels, Tensor* sampled_gts) {
+  int fg_num = fg_inds.size();
+  int bg_num = bg_inds.size();
+  int gt_num = fg_num + bg_num;
+  Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
+  int* fg_inds_data = fg_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
+  int* bg_inds_data = bg_inds_t.mutable_data<int>({bg_num}, context.GetPlace());
+  int* gt_box_inds_data =
+      gt_box_inds_t.mutable_data<int>({gt_num}, context.GetPlace());
+  int* gt_label_inds_data =
+      gt_label_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
+  std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data);
+  std::copy(bg_inds.begin(), bg_inds.end(), bg_inds_data);
+  std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data);
+  std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data);
+
+  Tensor fg_boxes, bg_boxes, fg_labels, bg_labels;
+  fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
+  CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
+  bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace());
+  CPUGather<T>(context, boxes, bg_inds_t, &bg_boxes);
+  Concat<T>(context, fg_boxes, bg_boxes, sampled_boxes);
+  CPUGather<T>(context, gt_boxes, gt_box_inds_t, sampled_gts);
+  fg_labels.mutable_data<int>({fg_num}, context.GetPlace());
+  CPUGather<int>(context, gt_classes, gt_label_inds_t, &fg_labels);
+  bg_labels.mutable_data<int>({bg_num}, context.GetPlace());
+  math::set_constant(context, &bg_labels, 0);
+  Concat<int>(context, fg_labels, bg_labels, sampled_labels);
+}
+
+template <typename T>
+std::vector<Tensor> SampleRoisForOneImage(
+    const platform::CPUDeviceContext& context, Tensor* rpn_rois,
+    Tensor* gt_classes, Tensor* gt_boxes, Tensor* im_scale,
+    const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
+    const float bg_thresh_hi, const float bg_thresh_lo,
+    const std::vector<float>& bbox_reg_weights, const int class_nums,
+    std::minstd_rand engine) {
+  auto rpn_rois_et = framework::EigenTensor<T, 2>::From(*rpn_rois);
+  auto im_scale_data = im_scale->data<T>()[0];
+  rpn_rois_et = rpn_rois_et / im_scale_data;
+
+  Tensor boxes;
+  int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0];
+  boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
+  Concat<T>(context, *gt_boxes, *rpn_rois, &boxes);
+
+  // Overlaps
+  Tensor proposal_to_gt_overlaps;
+  proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes->dims()[0]},
+                                          context.GetPlace());
+  BboxOverlaps<T>(boxes, *gt_boxes, &proposal_to_gt_overlaps);
+
+  // Generate proposal index
+  std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>(
+      context, &proposal_to_gt_overlaps, batch_size_per_im, fg_fraction,
+      fg_thresh, bg_thresh_hi, bg_thresh_lo, engine);
+  std::vector<int> fg_inds = fg_bg_gt[0];
+  std::vector<int> bg_inds = fg_bg_gt[1];
+  std::vector<int> gt_inds = fg_bg_gt[2];
+
+  // Gather boxes and labels
+  Tensor sampled_boxes, sampled_labels, sampled_gts;
+  int boxes_num = fg_inds.size() + bg_inds.size();
+  framework::DDim bbox_dim({boxes_num, kBoxDim});
+  sampled_boxes.mutable_data<T>(bbox_dim, context.GetPlace());
+  sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace());
+  sampled_gts.mutable_data<T>(bbox_dim, context.GetPlace());
+  GatherBoxesLabels<T>(context, boxes, *gt_boxes, *gt_classes, fg_inds, bg_inds,
+                       gt_inds, &sampled_boxes, &sampled_labels, &sampled_gts);
+
+  // Compute targets
+  Tensor bbox_targets_single;
+  bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
+  BoxToDelta<T>(boxes_num, sampled_boxes, sampled_gts, bbox_reg_weights,
+                &bbox_targets_single);
+
+  // Scale rois
+  Tensor sampled_rois;
+  sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
+  auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
+  auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
+  sampled_rois_et = sampled_boxes_et * im_scale_data;
+
+  // Expand box targets
+  Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
+  framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums});
+  bbox_targets.mutable_data<T>(bbox_expand_dim, context.GetPlace());
+  bbox_inside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
+  bbox_outside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
+  math::set_constant(context, &bbox_targets, 0.0);
+  math::set_constant(context, &bbox_inside_weights, 0.0);
+  math::set_constant(context, &bbox_outside_weights, 0.0);
+
+  auto* bbox_targets_single_data = bbox_targets_single.data<T>();
+  auto* sampled_labels_data = sampled_labels.data<int>();
+  auto* bbox_targets_data = bbox_targets.data<T>();
+  auto* bbox_inside_weights_data = bbox_inside_weights.data<T>();
+  auto* bbox_outside_weights_data = bbox_outside_weights.data<T>();
+  int width = kBoxDim * class_nums;
+  for (int64_t i = 0; i < boxes_num; ++i) {
+    int label = sampled_labels_data[i];
+    if (label > 0) {
+      int dst_idx = i * width + kBoxDim * label;
+      int src_idx = kBoxDim * i;
+      bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx];
+      bbox_targets_data[dst_idx + 1] = bbox_targets_single_data[src_idx + 1];
+      bbox_targets_data[dst_idx + 2] = bbox_targets_single_data[src_idx + 2];
+      bbox_targets_data[dst_idx + 3] = bbox_targets_single_data[src_idx + 3];
+      bbox_inside_weights_data[dst_idx] = 1;
+      bbox_inside_weights_data[dst_idx + 1] = 1;
+      bbox_inside_weights_data[dst_idx + 2] = 1;
+      bbox_inside_weights_data[dst_idx + 3] = 1;
+      bbox_outside_weights_data[dst_idx] = 1;
+      bbox_outside_weights_data[dst_idx + 1] = 1;
+      bbox_outside_weights_data[dst_idx + 2] = 1;
+      bbox_outside_weights_data[dst_idx + 3] = 1;
+    }
+  }
+  std::vector<Tensor> res;
+  res.emplace_back(sampled_rois);
+  res.emplace_back(sampled_labels);
+  res.emplace_back(bbox_targets);
+  res.emplace_back(bbox_inside_weights);
+  res.emplace_back(bbox_outside_weights);
+  return res;
+}
+
+template <typename T>
+class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* rpn_rois = context.Input<LoDTensor>("RpnRois");
+    auto* gt_classes = context.Input<LoDTensor>("GtClasses");
+    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
+    auto* im_scales = context.Input<LoDTensor>("ImScales");
+
+    auto* rois = context.Output<LoDTensor>("Rois");
+    auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32");
+    auto* bbox_targets = context.Output<LoDTensor>("BboxTargets");
+    auto* bbox_inside_weights = context.Output<LoDTensor>("BboxInsideWeights");
+    auto* bbox_outside_weights =
+        context.Output<LoDTensor>("BboxOutsideWeights");
+
+    int batch_size_per_im = context.Attr<int>("batch_size_per_im");
+    float fg_fraction = context.Attr<float>("fg_fraction");
+    float fg_thresh = context.Attr<float>("fg_thresh");
+    float bg_thresh_hi = context.Attr<float>("bg_thresh_hi");
+    float bg_thresh_lo = context.Attr<float>("bg_thresh_lo");
+    std::vector<float> bbox_reg_weights =
+        context.Attr<std::vector<float>>("bbox_reg_weights");
+    int class_nums = context.Attr<int>("class_nums");
+
+    PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
+                      "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
+    PADDLE_ENFORCE_EQ(
+        gt_classes->lod().size(), 1UL,
+        "GenerateProposalLabelsOp gt_classes needs 1 level of LoD");
+    PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
+                      "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD");
+    int64_t n = static_cast<int64_t>(rpn_rois->lod().back().size() - 1);
+
+    rois->mutable_data<T>({n * batch_size_per_im, kBoxDim}, context.GetPlace());
+    labels_int32->mutable_data<int>({n * batch_size_per_im},
+                                    context.GetPlace());
+    bbox_targets->mutable_data<T>({n * batch_size_per_im, kBoxDim * class_nums},
+                                  context.GetPlace());
+    bbox_inside_weights->mutable_data<T>(
+        {n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace());
+    bbox_outside_weights->mutable_data<T>(
+        {n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace());
+
+    std::random_device rnd;
+    std::minstd_rand engine;
+    int seed =
+        context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
+    engine.seed(seed);
+
+    framework::LoD lod;
+    std::vector<size_t> lod0(1, 0);
+
+    int64_t num_rois = 0;
+    auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
+
+    auto rpn_rois_lod = rpn_rois->lod().back();
+    auto gt_classes_lod = gt_classes->lod().back();
+    auto gt_boxes_lod = gt_boxes->lod().back();
+    for (size_t i = 0; i < n; ++i) {
+      Tensor rpn_rois_slice =
+          rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
+      Tensor gt_classes_slice =
+          gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
+      Tensor gt_boxes_slice =
+          gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
+      Tensor im_scales_slice = im_scales->Slice(i, i + 1);
+      std::vector<Tensor> tensor_output = SampleRoisForOneImage<T>(
+          dev_ctx, &rpn_rois_slice, &gt_classes_slice, &gt_boxes_slice,
+          &im_scales_slice, batch_size_per_im, fg_fraction, fg_thresh,
+          bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, engine);
+      Tensor sampled_rois = tensor_output[0];
+      Tensor sampled_labels_int32 = tensor_output[1];
+      Tensor sampled_bbox_targets = tensor_output[2];
+      Tensor sampled_bbox_inside_weights = tensor_output[3];
+      Tensor sampled_bbox_outside_weights = tensor_output[4];
+
+      AppendRois<T>(rois, kBoxDim * num_rois, &sampled_rois);
+      AppendRois<int>(labels_int32, num_rois, &sampled_labels_int32);
+      AppendRois<T>(bbox_targets, kBoxDim * num_rois * class_nums,
+                    &sampled_bbox_targets);
+      AppendRois<T>(bbox_inside_weights, kBoxDim * num_rois * class_nums,
+                    &sampled_bbox_inside_weights);
+      AppendRois<T>(bbox_outside_weights, kBoxDim * num_rois * class_nums,
+                    &sampled_bbox_outside_weights);
+
+      num_rois += sampled_rois.dims()[0];
+      lod0.emplace_back(num_rois);
+    }
+
+    lod.emplace_back(lod0);
+    rois->set_lod(lod);
+    labels_int32->set_lod(lod);
+    bbox_targets->set_lod(lod);
+    bbox_inside_weights->set_lod(lod);
+    bbox_outside_weights->set_lod(lod);
+    rois->Resize({num_rois, kBoxDim});
+    labels_int32->Resize({num_rois});
+    bbox_targets->Resize({num_rois, kBoxDim * class_nums});
+    bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums});
+    bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums});
+  }
+};
+
+class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    // TODO(buxingyuan): Add Document
+    AddInput("RpnRois", "RpnRois.");
+    AddInput("GtClasses", "GtClasses.");
+    AddInput("GtBoxes", "GtBoxes.");
+    AddInput("ImScales", "ImScales.");
+
+    AddOutput("Rois", "Rois.");
+    AddOutput("LabelsInt32", "LabelsInt32.");
+    AddOutput("BboxTargets", "BboxTargets.");
+    AddOutput("BboxInsideWeights", "BboxInsideWeights.");
+    AddOutput("BboxOutsideWeights", "BboxOutsideWeights.");
+
+    AddAttr<int>("batch_size_per_im", "batch_size_per_im");
+    AddAttr<float>("fg_fraction", "fg_fraction");
+    AddAttr<float>("fg_thresh", "fg_thresh");
+    AddAttr<float>("bg_thresh_hi", "bg_thresh_hi");
+    AddAttr<float>("bg_thresh_lo", "bg_thresh_lo");
+    AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights");
+    AddAttr<int>("class_nums", "class_nums");
+    AddAttr<bool>("fix_seed", "fix_seed").SetDefault(false);
+    AddAttr<int>("seed", "seed").SetDefault(0);
+
+    AddComment(R"DOC(
+Generate Proposals Labels Operator.
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(generate_proposal_labels, ops::GenerateProposalLabelsOp,
+                  ops::GenerateProposalLabelsOpMaker,
+                  paddle::framework::EmptyGradOpMaker);
+REGISTER_OP_CPU_KERNEL(generate_proposal_labels,
+                       ops::GenerateProposalLabelsKernel<float>,
+                       ops::GenerateProposalLabelsKernel<double>);
diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
index 9a1643d5b35c067ba9064286bab32019fb34fbe8..177ff7cf187bc9daf69889e99ca57ae18766de90 100644
--- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc
+++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
@@ -86,7 +86,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
                          std::minstd_rand engine,
                          std::vector<int>* inds) const {
     std::uniform_real_distribution<float> uniform(0, 1);
-    const int64_t size = static_cast<int64_t>(inds->size());
+    const int64_t size = static_cast<int64_t>(inds->size() - offset);
     if (size > num) {
       for (int64_t i = num; i < size; ++i) {
         int rng_ind = std::floor(uniform(engine) * i);
@@ -126,7 +126,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
                 neg_threshold, target_label_data, fg_inds, bg_inds);
     // Reservoir Sampling
     ReservoirSampling(fg_num, fg_offset, engine, fg_inds);
-    int bg_num = rpn_batch_size - fg_inds->size();
+    int bg_num = rpn_batch_size - (fg_inds->size() - fg_offset);
     ReservoirSampling(bg_num, bg_offset, engine, bg_inds);
   }
 
diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc
index aa3e05b83b23569a4dd9c83294916e289f993abc..089b541a0a61adb5efda6b2e027c913d5808dff0 100644
--- a/paddle/fluid/operators/gather_op.cc
+++ b/paddle/fluid/operators/gather_op.cc
@@ -101,5 +101,8 @@ namespace ops = paddle::operators;
 REGISTER_OPERATOR(gather, ops::GatherOp, ops::GatherOpMaker,
                   paddle::framework::DefaultGradOpDescMaker<true>);
 REGISTER_OPERATOR(gather_grad, ops::GatherGradOp);
-REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel<float>);
-REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel<float>);
+REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel<float>,
+                       ops::GatherOpKernel<int>, ops::GatherOpKernel<double>);
+REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel<float>,
+                       ops::GatherGradientOpKernel<int>,
+                       ops::GatherGradientOpKernel<double>);
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index a5bc1fa8f801066a8281a828eb87dbccb4bb0eff..8bb161495badb3272f8c103f5aced6351abab1f0 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -39,6 +39,7 @@ __all__ = [
     'detection_map',
     'rpn_target_assign',
     'anchor_generator',
+    'generate_proposal_labels',
     'generate_proposals',
 ]
 
@@ -1256,6 +1257,64 @@ def anchor_generator(input,
     return anchor, var
 
 
+def generate_proposal_labels(rpn_rois,
+                             gt_classes,
+                             gt_boxes,
+                             im_scales,
+                             batch_size_per_im=256,
+                             fg_fraction=0.25,
+                             fg_thresh=0.25,
+                             bg_thresh_hi=0.5,
+                             bg_thresh_lo=0.0,
+                             bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
+                             class_nums=None):
+    """
+    ** Generate proposal labels Faster-RCNN **
+    TODO(buxingyuan): Add Document
+    """
+
+    helper = LayerHelper('generate_proposal_labels', **locals())
+
+    rois = helper.create_tmp_variable(dtype=rpn_rois.dtype)
+    labels_int32 = helper.create_tmp_variable(dtype=gt_classes.dtype)
+    bbox_targets = helper.create_tmp_variable(dtype=rpn_rois.dtype)
+    bbox_inside_weights = helper.create_tmp_variable(dtype=rpn_rois.dtype)
+    bbox_outside_weights = helper.create_tmp_variable(dtype=rpn_rois.dtype)
+
+    helper.append_op(
+        type="generate_proposal_labels",
+        inputs={
+            'RpnRois': rpn_rois,
+            'GtClasses': gt_classes,
+            'GtBoxes': gt_boxes,
+            'ImScales': im_scales
+        },
+        outputs={
+            'Rois': rois,
+            'LabelsInt32': labels_int32,
+            'BboxTargets': bbox_targets,
+            'BboxInsideWeights': bbox_inside_weights,
+            'BboxOutsideWeights': bbox_outside_weights
+        },
+        attrs={
+            'batch_size_per_im': batch_size_per_im,
+            'fg_fraction': fg_fraction,
+            'fg_thresh': fg_thresh,
+            'bg_thresh_hi': bg_thresh_hi,
+            'bg_thresh_lo': bg_thresh_lo,
+            'bbox_reg_weights': bbox_reg_weights,
+            'class_nums': class_nums
+        })
+
+    rois.stop_gradient = True
+    labels_int32.stop_gradient = True
+    bbox_targets.stop_gradient = True
+    bbox_inside_weights.stop_gradient = True
+    bbox_outside_weights.stop_gradient = True
+
+    return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights
+
+
 def generate_proposals(scores,
                        bbox_deltas,
                        im_info,
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index b71b440d3c9155e63220f5f2d7e849e8332bfb16..9cbd8b68666f0ea82186c517c10515ebe1b9d2a5 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -146,6 +146,55 @@ class TestAnchorGenerator(unittest.TestCase):
         assert anchor.shape[3] == 4
 
 
+class TestGenerateProposalLabels(unittest.TestCase):
+    def test_generate_proposal_labels(self):
+        rpn_rois = layers.data(
+            name='rpn_rois',
+            shape=[4, 4],
+            dtype='float32',
+            lod_level=1,
+            append_batch_size=False)
+        gt_classes = layers.data(
+            name='gt_classes',
+            shape=[6],
+            dtype='int32',
+            lod_level=1,
+            append_batch_size=False)
+        gt_boxes = layers.data(
+            name='gt_boxes',
+            shape=[6, 4],
+            dtype='float32',
+            lod_level=1,
+            append_batch_size=False)
+        im_scales = layers.data(
+            name='im_scales',
+            shape=[1],
+            dtype='float32',
+            lod_level=1,
+            append_batch_size=False)
+        class_nums = 5
+        rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels(
+            rpn_rois=rpn_rois,
+            gt_classes=gt_classes,
+            gt_boxes=gt_boxes,
+            im_scales=im_scales,
+            batch_size_per_im=2,
+            fg_fraction=0.5,
+            fg_thresh=0.5,
+            bg_thresh_hi=0.5,
+            bg_thresh_lo=0.0,
+            bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
+            class_nums=class_nums)
+        assert rois.shape[1] == 4
+        assert rois.shape[0] == labels_int32.shape[0]
+        assert rois.shape[0] == bbox_targets.shape[0]
+        assert rois.shape[0] == bbox_inside_weights.shape[0]
+        assert rois.shape[0] == bbox_outside_weights.shape[0]
+        assert bbox_targets.shape[1] == 4 * class_nums
+        assert bbox_inside_weights.shape[1] == 4 * class_nums
+        assert bbox_outside_weights.shape[1] == 4 * class_nums
+
+
 class TestMultiBoxHead(unittest.TestCase):
     def test_multi_box_head(self):
         data_shape = [3, 224, 224]
diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce766fffbce98a6a2cee4c508d6db85ee0163401
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
@@ -0,0 +1,317 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://w_idxw.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import sys
+import math
+import paddle.fluid as fluid
+from op_test import OpTest
+
+
+def generate_proposal_labels_in_python(
+        rpn_rois, gt_classes, gt_boxes, im_scales, batch_size_per_im,
+        fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
+        class_nums):
+    rois = []
+    labels_int32 = []
+    bbox_targets = []
+    bbox_inside_weights = []
+    bbox_outside_weights = []
+    lod = []
+    assert len(rpn_rois) == len(
+        im_scales), 'batch size of rpn_rois and ground_truth is not matched'
+
+    for im_i in range(len(im_scales)):
+        frcn_blobs = _sample_rois(
+            rpn_rois[im_i], gt_classes[im_i], gt_boxes[im_i], im_scales[im_i],
+            batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
+            bg_thresh_lo, bbox_reg_weights, class_nums)
+
+        lod.append(frcn_blobs['rois'].shape[0])
+
+        rois.append(frcn_blobs['rois'])
+        labels_int32.append(frcn_blobs['labels_int32'])
+        bbox_targets.append(frcn_blobs['bbox_targets'])
+        bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
+        bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
+
+    return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod
+
+
+def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
+                 fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo,
+                 bbox_reg_weights, class_nums):
+    rois_per_image = int(batch_size_per_im)
+    fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
+
+    # Roidb
+    inv_im_scale = 1. / im_scale
+    rpn_rois = rpn_rois * inv_im_scale
+
+    boxes = np.vstack([gt_boxes, rpn_rois])
+    gt_overlaps = np.zeros((boxes.shape[0], class_nums))
+    box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
+    if len(gt_boxes) > 0:
+        proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes)
+
+        overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
+        overlaps_max = proposal_to_gt_overlaps.max(axis=1)
+        # Boxes which with non-zero overlap with gt boxes
+        overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
+        overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
+            overlapped_boxes_ind]]
+        gt_overlaps[overlapped_boxes_ind,
+                    overlapped_boxes_gt_classes] = overlaps_max[
+                        overlapped_boxes_ind]
+        box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
+            overlapped_boxes_ind]
+
+    max_overlaps = gt_overlaps.max(axis=1)
+    max_classes = gt_overlaps.argmax(axis=1)
+
+    # Foreground
+    fg_inds = np.where(max_overlaps >= fg_thresh)[0]
+    fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
+    # Sample foreground if there are too many
+    if fg_inds.shape[0] > fg_rois_per_this_image:
+        fg_inds = np.random.choice(
+            fg_inds, size=fg_rois_per_this_image, replace=False)
+
+    # Background
+    bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
+                                                        bg_thresh_lo))[0]
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
+                                        bg_inds.shape[0])
+    # Sample background if there are too many
+    if bg_inds.shape[0] > bg_rois_per_this_image:
+        bg_inds = np.random.choice(
+            bg_inds, size=bg_rois_per_this_image, replace=False)
+
+    keep_inds = np.append(fg_inds, bg_inds)
+    sampled_labels = max_classes[keep_inds]
+    sampled_labels[fg_rois_per_this_image:] = 0
+    sampled_boxes = boxes[keep_inds]
+    sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
+    sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
+
+    bbox_label_targets = _compute_targets(sampled_boxes, sampled_gts,
+                                          sampled_labels, bbox_reg_weights)
+    bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_label_targets,
+                                                             class_nums)
+    bbox_outside_weights = np.array(
+        bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
+
+    # Scale rois
+    sampled_rois = sampled_boxes * im_scale
+
+    # Faster RCNN blobs
+    frcn_blobs = dict(
+        rois=sampled_rois,
+        labels_int32=sampled_labels,
+        bbox_targets=bbox_targets,
+        bbox_inside_weights=bbox_inside_weights,
+        bbox_outside_weights=bbox_outside_weights)
+    return frcn_blobs
+
+
+def _bbox_overlaps(roi_boxes, gt_boxes):
+    w1 = np.maximum(roi_boxes[:, 2] - roi_boxes[:, 0] + 1, 0)
+    h1 = np.maximum(roi_boxes[:, 3] - roi_boxes[:, 1] + 1, 0)
+    w2 = np.maximum(gt_boxes[:, 2] - gt_boxes[:, 0] + 1, 0)
+    h2 = np.maximum(gt_boxes[:, 3] - gt_boxes[:, 1] + 1, 0)
+    area1 = w1 * h1
+    area2 = w2 * h2
+
+    overlaps = np.zeros((roi_boxes.shape[0], gt_boxes.shape[0]))
+    for ind1 in range(roi_boxes.shape[0]):
+        for ind2 in range(gt_boxes.shape[0]):
+            inter_x1 = np.maximum(roi_boxes[ind1, 0], gt_boxes[ind2, 0])
+            inter_y1 = np.maximum(roi_boxes[ind1, 1], gt_boxes[ind2, 1])
+            inter_x2 = np.minimum(roi_boxes[ind1, 2], gt_boxes[ind2, 2])
+            inter_y2 = np.minimum(roi_boxes[ind1, 3], gt_boxes[ind2, 3])
+            inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0)
+            inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0)
+            inter_area = inter_w * inter_h
+            iou = inter_area / (area1[ind1] + area2[ind2] - inter_area)
+            overlaps[ind1, ind2] = iou
+    return overlaps
+
+
+def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
+    assert roi_boxes.shape[0] == gt_boxes.shape[0]
+    assert roi_boxes.shape[1] == 4
+    assert gt_boxes.shape[1] == 4
+
+    targets = np.zeros(roi_boxes.shape)
+    bbox_reg_weights = np.asarray(bbox_reg_weights)
+    targets = _box_to_delta(
+        ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights)
+
+    return np.hstack([labels[:, np.newaxis], targets]).astype(
+        np.float32, copy=False)
+
+
+def _box_to_delta(ex_boxes, gt_boxes, weights):
+    ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
+    ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
+    ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w
+    ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h
+
+    gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
+    gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
+    gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
+    gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h
+
+    dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
+    dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
+    dw = (np.log(gt_w / ex_w)) / ex_w / weights[2]
+    dh = (np.log(gt_h / ex_h)) / ex_h / weights[3]
+
+    targets = np.vstack([dx, dy, dw, dh]).transpose()
+    return targets
+
+
+def _expand_bbox_targets(bbox_targets_input, class_nums):
+    class_labels = bbox_targets_input[:, 0]
+    fg_inds = np.where(class_labels > 0)[0]
+
+    bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums))
+    bbox_inside_weights = np.zeros(bbox_targets.shape)
+    for ind in fg_inds:
+        class_label = int(class_labels[ind])
+        start_ind = class_label * 4
+        end_ind = class_label * 4 + 4
+        bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:]
+        bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0)
+
+    return bbox_targets, bbox_inside_weights
+
+
+class TestGenerateProposalLabelsOp(OpTest):
+    def set_data(self):
+        self.init_test_params()
+        self.init_test_input()
+        self.init_test_output()
+        self.inputs = {
+            'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod),
+            'GtClasses': (self.gt_classes[0], self.gts_lod),
+            'GtBoxes': (self.gt_boxes[0], self.gts_lod),
+            'ImScales': self.im_scales[0]
+        }
+        self.attrs = {
+            'batch_size_per_im': self.batch_size_per_im,
+            'fg_fraction': self.fg_fraction,
+            'fg_thresh': self.fg_thresh,
+            'bg_thresh_hi': self.bg_thresh_hi,
+            'bg_thresh_lo': self.bg_thresh_lo,
+            'bbox_reg_weights': self.bbox_reg_weights,
+            'class_nums': self.class_nums
+        }
+        self.outputs = {
+            'Rois': (self.rois[0], [self.lod]),
+            'LabelsInt32': (self.labels_int32[0], [self.lod]),
+            'BboxTargets': (self.bbox_targets[0], [self.lod]),
+            'BboxInsideWeights': (self.bbox_inside_weights[0], [self.lod]),
+            'BboxOutsideWeights': (self.bbox_outside_weights[0], [self.lod]),
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+    def setUp(self):
+        self.op_type = 'generate_proposal_labels'
+        self.set_data()
+
+    def init_test_params(self):
+        self.batch_size_per_im = 10
+        self.fg_fraction = 1.0
+        self.fg_thresh = 0.5
+        self.bg_thresh_hi = 0.5
+        self.bg_thresh_lo = 0.0
+        self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
+        self.class_nums = 81
+
+    def init_test_input(self):
+        np.random.seed(0)
+        image_nums = 1
+        gt_nums = 6  # Keep same with batch_size_per_im for unittest
+        proposal_nums = self.batch_size_per_im - gt_nums
+        images_shape = []
+        self.im_scales = []
+        for i in range(image_nums):
+            images_shape.append(np.random.randint(200, size=2))
+            self.im_scales.append(np.ones((1)).astype(np.float32))
+
+        self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape,
+                                                               proposal_nums)
+        ground_truth, self.gts_lod = _generate_groundtruth(
+            images_shape, self.class_nums, gt_nums)
+        self.gt_classes = [gt['gt_classes'] for gt in ground_truth]
+        self.gt_boxes = [gt['boxes'] for gt in ground_truth]
+
+    def init_test_output(self):
+        self.rois, self.labels_int32, self.bbox_targets, \
+        self.bbox_inside_weights, self.bbox_outside_weights, \
+        self.lod = generate_proposal_labels_in_python(
+                self.rpn_rois, self.gt_classes, self.gt_boxes, self.im_scales,
+                self.batch_size_per_im, self.fg_fraction,
+                self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo,
+                self.bbox_reg_weights, self.class_nums
+            )
+
+
+def _generate_proposals(images_shape, proposal_nums):
+    rpn_rois = []
+    rpn_rois_lod = []
+    num_proposals = 0
+    for i, image_shape in enumerate(images_shape):
+        proposals = _generate_boxes(image_shape, proposal_nums)
+        rpn_rois.append(proposals)
+        num_proposals += len(proposals)
+        rpn_rois_lod.append(num_proposals)
+    return rpn_rois, [rpn_rois_lod]
+
+
+def _generate_groundtruth(images_shape, class_nums, gt_nums):
+    ground_truth = []
+    gts_lod = []
+    num_gts = 0
+    for i, image_shape in enumerate(images_shape):
+        # Avoid background
+        gt_classes = np.random.randint(
+            low=1, high=class_nums, size=gt_nums).astype(np.int32)
+        gt_boxes = _generate_boxes(image_shape, gt_nums)
+        ground_truth.append(dict(gt_classes=gt_classes, boxes=gt_boxes))
+        num_gts += len(gt_classes)
+        gts_lod.append(num_gts)
+    return ground_truth, [gts_lod]
+
+
+def _generate_boxes(image_size, box_nums):
+    width = image_size[0]
+    height = image_size[1]
+    xywh = np.random.rand(box_nums, 4)
+    xy1 = xywh[:, [0, 1]] * image_size
+    wh = xywh[:, [2, 3]] * (image_size - xy1)
+    xy2 = xy1 + wh
+    boxes = np.hstack([xy1, xy2])
+    boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]]))
+    boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]]))
+    return boxes.astype(np.float32)
+
+
+if __name__ == '__main__':
+    unittest.main()