Refine target_assign_op to unify the classification and regression targets assigning. (#8326)

* Refine target_assign_op to unify the classification and regression targets assignment. * Fix the unit testing. * Fix conflicts.

Refine target_assign_op to unify the classification and regression targets assigning. (#8326)
* Refine target_assign_op to unify the classification and regression targets assignment. * Fix the unit testing. * Fix conflicts.
72bcf72c · qingqing01 · GitHub · 9030a655 · 72bcf72c · 72bcf72c
4 changed file
--- a/paddle/fluid/operators/target_assign_op.cc
+++ b/paddle/fluid/operators/target_assign_op.cc
@@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
-    // checkout inputs
-    PADDLE_ENFORCE(ctx->HasInput("EncodedGTBBox"),
-                   "Input(EncodedGTBBox) of TargetAssignOp should not be null");
-    PADDLE_ENFORCE(ctx->HasInput("GTScoreLabel"),
-                   "Input(GTScoreLabel) of TargetAssignOp should not be null");
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of TargetAssignOp should not be null");
    PADDLE_ENFORCE(ctx->HasInput("MatchIndices"),
                   "Input(MatchIndices) of TargetAssignOp should not be null");
-    PADDLE_ENFORCE(ctx->HasInput("NegIndices"),
-                   "Input(NegIndices) of TargetAssignOp should not be null");
-
-    // checkout outputs
-    PADDLE_ENFORCE(
-        ctx->HasOutput("PredBBoxLabel"),
-        "Output(PredBBoxLabel) of TargetAssignOp should not be null.");
-    PADDLE_ENFORCE(
-        ctx->HasOutput("PredBBoxWeight"),
-        "Output(PredBBoxWeight) of TargetAssignOp should not be null.");
-    PADDLE_ENFORCE(
-        ctx->HasOutput("PredScoreLabel"),
-        "Output(PredScoreLabel) of TargetAssignOp should not be null.");
-    PADDLE_ENFORCE(
-        ctx->HasOutput("PredScoreWeight"),
-        "Output(PredScoreWeight) of TargetAssignOp should not be null.");
-
-    auto blabel_dims = ctx->GetInputDim("EncodedGTBBox");
-    auto slabel_dims = ctx->GetInputDim("GTScoreLabel");
+
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of TargetAssignOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("OutWeight"),
+                   "Output(OutWeight) of TargetAssignOp should not be null.");
+
+    auto in_dims = ctx->GetInputDim("X");
    auto mi_dims = ctx->GetInputDim("MatchIndices");
-    auto neg_dims = ctx->GetInputDim("NegIndices");

-    PADDLE_ENFORCE_EQ(blabel_dims.size(), 3UL,
-                      "The rank of Input(EncodedGTBBox) must be 3.");
-    PADDLE_ENFORCE_EQ(slabel_dims.size(), 2UL,
-                      "The rank of Input(GTScoreLabel) must be 2.");
-    PADDLE_ENFORCE_EQ(mi_dims.size(), 2UL,
+    PADDLE_ENFORCE_EQ(in_dims.size(), 3, "The rank of Input(X) must be 3.");
+    PADDLE_ENFORCE_EQ(mi_dims.size(), 2,
                      "The rank of Input(MatchIndices) must be 2.");
-    PADDLE_ENFORCE_EQ(neg_dims.size(), 2UL,
-                      "The rank of Input(NegIndices) must be 2.");
-
-    PADDLE_ENFORCE_EQ(blabel_dims[0], slabel_dims[0],
-                      "The 1st dimension (means the total number of "
-                      "ground-truth bounding boxes) of Input(EncodedGTBBox) "
-                      "and Input(GTScoreLabel) must be the same.");
-    PADDLE_ENFORCE_EQ(blabel_dims[1], mi_dims[1],
-                      "The 2nd dimension (means the number of priod boxes) "
-                      "of Input(EncodedGTBBox) and "
-                      "Input(MatchIndices) must be the same.");
-    PADDLE_ENFORCE_EQ(blabel_dims[2], 4,
-                      "The 3rd dimension of Input(EncodedGTBBox) must be 4.");
+
+    if (ctx->HasInput("NegIndices")) {
+      auto neg_dims = ctx->GetInputDim("NegIndices");
+      PADDLE_ENFORCE_EQ(neg_dims.size(), 2,
+                        "The rank of Input(NegIndices) must be 2.");
+      PADDLE_ENFORCE_EQ(neg_dims[1], 1,
+                        "The last dimenstion of Out(NegIndices) must be 1.");
+    }

    auto n = mi_dims[0];
-    auto np = mi_dims[1];
-    ctx->SetOutputDim("PredBBoxLabel", {n, np, 4});
-    ctx->SetOutputDim("PredBBoxWeight", {n, np, 1});
-    ctx->SetOutputDim("PredScoreLabel", {n, np, 1});
-    ctx->SetOutputDim("PredScoreWeight", {n, np, 1});
+    auto m = mi_dims[1];
+    auto k = in_dims[in_dims.size() - 1];
+    ctx->SetOutputDim("Out", {n, m, k});
+    ctx->SetOutputDim("OutWeight", {n, m, 1});
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
-        framework::ToDataType(
-            ctx.Input<framework::LoDTensor>("EncodedGTBBox")->type()),
+        framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
        ctx.device_context());
  }
 };
@@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  TargetAssignOpMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("EncodedGTBBox",
-             "(LoDTensor), The encoded ground-truth bounding boxes with shape "
-             "[Ng, Np, 4], where Ng is the total number of ground-truth boxes "
-             "in this mini-batch, Np the number of predictions, 4 is the "
-             "number of coordinate in [xmin, ymin, xmax, ymax] layout.");
-    AddInput("GTScoreLabel",
-             "(LoDTensor, default LoDTensor<int>),  The input ground-truth "
-             "labels with shape [Ng, 1], where the Ng is the same as it in "
-             "the input of EncodedGTBBox.");
+    AddInput("X",
+             "(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
+             "Some elements in X will be assigned to Out based on the "
+             "MatchIndices and NegIndices.");
    AddInput("MatchIndices",
             "(Tensor, default Tensor<int>), The input matched indices "
-             "with shape [N, Np], where N is the batch size, Np is the same "
-             "as it in the input of EncodedGTBBox. If MatchIndices[i][j] "
-             "is -1, the j-th prior box is not matched to any ground-truh "
-             "box in i-th instance.");
+             "with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
+             "of column is not matched to any entity of row in i-th instance.");
    AddInput("NegIndices",
             "(LoDTensor, default LoDTensor<int>), The input negative example "
-             "indices with shape [Neg, 1], where is the total number of "
-             "negative example indices.");
-    AddAttr<int>("background_label",
-                 "(int, default 0), Label index of background class.")
+             "indices are an optional input with shape [Neg, 1], where Neg is "
+             "the total number of negative example indices.")
+        .AsDispensable();
+    AddAttr<int>("mismatch_value",
+                 "(int, default 0), Fill this value to the "
+                 "mismatched location.")
        .SetDefault(0);
-    AddOutput("PredBBoxLabel",
-              "(Tensor), The output encoded ground-truth labels "
-              "with shape [N, Np, 4], N is the batch size and Np, 4 is the "
-              "same as they in input of EncodedGTBBox. If MatchIndices[i][j] "
-              "is -1, the PredBBoxLabel[i][j][:] is the encoded ground-truth "
-              "box for background_label in i-th instance.");
-    AddOutput("PredBBoxWeight",
-              "(Tensor), The weight for PredBBoxLabel with the shape "
-              "of [N, Np, 1]");
-    AddOutput("PredScoreLabel",
-              "(Tensor, default Tensor<int>), The output score labels for "
-              "each predictions with shape [N, Np, 1]. If MatchIndices[i][j] "
-              "is -1, PredScoreLabel[i][j] = background_label.");
-    AddOutput("PredScoreWeight",
-              "(Tensor), The weight for PredScoreLabel with the shape "
-              "of [N, Np, 1]");
+    AddOutput("Out",
+              "(Tensor), The output is a 3D Tensor with shape [N, P, K], "
+              "N and P is the same as they are in NegIndices, K is the "
+              "same as it in input of X. If MatchIndices[i][j] "
+              "is -1, the Out[i][j][0 : K] is the mismatch_value.");
+    AddOutput("OutWeight",
+              "(Tensor), The weight for output with the shape of [N, P, 1]");
    AddComment(R"DOC(
-This operator is, for given the encoded boxes between prior boxes and
-ground-truth boxes and ground-truth class labels, to assign classification
-and regression targets to each prior box as well as weights to each
-prior box. The weights is used to specify which prior box would not contribute
-to training loss.
-
-For each instance, the output `PredBBoxLabel`, `PredBBoxWeight`,
-`PredScoreLabel` and `PredScoreWeight` are assigned based on `MatchIndices`.
-Assumed that the row offset for each instance in `EncodedGTBBox` is called lod,
-this operato assigns classification/regression targets by performing the
+This operator can be, for given the target bounding boxes or labels,
+to assign classification and regression targets to each prediction as well as
+weights to prediction. The weights is used to specify which prediction would
+not contribute to training loss.
+
+For each instance, the output `Out` and`OutWeight` are assigned based on
+`MatchIndices` and `NegIndices`.
+Assumed that the row offset for each instance in `X` is called lod,
+this operator assigns classification/regression targets by performing the
 following steps:

 1. Assigning all outpts based on `MatchIndices`:

 If id = MatchIndices[i][j] > 0,

-    PredBBoxLabel[i][j] = EncodedGTBBox[lod[i] + id][j]
-    PredBBoxWeight[i][j] = 1.
-    PredScoreLabel[i][j] = GTScoreLabel[lod[i] + id]
-    PredScoreWeight[i][j] = 1.
+    Out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
+    OutWeight[i][j] = 1.

 Otherwise, 

-    PredBBoxLabel[j][j] = [0., 0., 0., 0.]
-    PredBBoxWeight[i][j] = 0.
-    PredScoreLabel[i][j] = background_label
-    PredScoreWeight[i][j] = 0.
+    Out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
+    OutWeight[i][j] = 0.

-2. Assigning PredScoreWeight based on `NegIndices`:
+2. Assigning OutWeight based on `NegIndices` if `NegIndices` is provided:

-Assumed that the row offset for each instance in `NegIndices` is caleed neg_lod,
-for i-th instance and all ids of NegIndices in this instance:
+Assumed that the row offset for each instance in `NegIndices` is called neg_lod,
+for i-th instance and each `id` of NegIndices in this instance:

-    PredScoreLabel[i][id] = background_label
-    PredScoreWeight[i][id] = 1.0
+    Out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
+    OutWeight[i][id] = 1.0

    )DOC");
  }
 };

-template <typename T>
-struct NegTargetAssignFunctor<platform::CPUDeviceContext, T> {
+template <typename T, typename WT>
+struct NegTargetAssignFunctor<platform::CPUDeviceContext, T, WT> {
  void operator()(const platform::CPUDeviceContext& ctx, const int* neg_indices,
-                  const size_t* lod, const int num, const int num_prior_box,
-                  const int background_label, int* out_label, T* out_label_wt) {
-    for (int i = 0; i < num; ++i) {
+                  const size_t* lod, const int N, const int M, const int K,
+                  const int mismatch_value, T* out, WT* out_wt) {
+    for (int i = 0; i < N; ++i) {
      for (size_t j = lod[i]; j < lod[i + 1]; ++j) {
        int id = neg_indices[j];
-        out_label[i * num_prior_box + id] = background_label;
-        out_label_wt[i * num_prior_box + id] = static_cast<T>(1.0);
+        int off = (i * M + id) * K;
+        for (int k = 0; k < K; ++k) {
+          out[off + k] = mismatch_value;
+          out_wt[off + k] = static_cast<WT>(1.0);
+        }
      }
    }
  }
 };

-template struct NegTargetAssignFunctor<platform::CPUDeviceContext, float>;
-template struct NegTargetAssignFunctor<platform::CPUDeviceContext, double>;
+template struct NegTargetAssignFunctor<platform::CPUDeviceContext, int, float>;
+template struct NegTargetAssignFunctor<platform::CPUDeviceContext, float,
+                                       float>;

 }  // namespace operators
 }  // namespace paddle
@@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp,
                             ops::TargetAssignOpMaker);
 REGISTER_OP_CPU_KERNEL(
    target_assign,
-    ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, int, float>,
+    ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, float, float>);
--- a/paddle/fluid/operators/target_assign_op.cu
+++ b/paddle/fluid/operators/target_assign_op.cu
@@ -17,39 +17,41 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

-template <typename T>
+template <typename T, typename WT>
 __global__ void NegTargetAssignKernel(const int* neg_indices, const size_t* lod,
-                                      const int num, const int num_prior_box,
-                                      const int background_label,
-                                      int* out_label, T* out_label_wt) {
+                                      const int N, const int M, const int K,
+                                      const int mismatch_value, T* out,
+                                      WT* out_wt) {
  int bidx = blockIdx.x;
  int st = lod[bidx];
  int ed = lod[bidx + 1];

-  int row_start = bidx * num_prior_box;
+  int row_start = bidx * M;
  for (int i = st + threadIdx.x; i < ed; i += blockDim.x) {
    int id = row_start + neg_indices[i];
-    out_label[id] = background_label;
-    out_label_wt[id] = 1.;
+    for (int k = 0; k < K; ++k) {
+      out[id * K + k] = T(mismatch_value);
+      out_wt[id * K + k] = WT(1.);
+    }
  }
 }

-template <typename T>
-struct NegTargetAssignFunctor<platform::CUDADeviceContext, T> {
+template <typename T, typename WT>
+struct NegTargetAssignFunctor<platform::CUDADeviceContext, T, WT> {
  void operator()(const platform::CUDADeviceContext& ctx,
-                  const int* neg_indices, const size_t* lod, const int num,
-                  const int num_prior_box, const int background_label,
-                  int* out_label, T* out_label_wt) {
+                  const int* neg_indices, const size_t* lod, const int N,
+                  const int M, const int K, const int mismatch_value, T* out,
+                  WT* out_wt) {
    const int block_size = 256;
-    const int grid_size = num;
-    NegTargetAssignKernel<T><<<grid_size, block_size, 0, ctx.stream()>>>(
-        neg_indices, lod, num, num_prior_box, background_label, out_label,
-        out_label_wt);
+    const int grid_size = N;
+    NegTargetAssignKernel<T, WT><<<grid_size, block_size, 0, ctx.stream()>>>(
+        neg_indices, lod, N, M, K, mismatch_value, out, out_wt);
  }
 };

-template struct NegTargetAssignFunctor<platform::CUDADeviceContext, float>;
-template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
+template struct NegTargetAssignFunctor<platform::CUDADeviceContext, int, float>;
+template struct NegTargetAssignFunctor<platform::CUDADeviceContext, float,
+                                       float>;

 }  // namespace operators
 }  // namespace paddle
@@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
 namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(
    target_assign,
-    ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, int, float>,
+    ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, float, float>);
--- a/paddle/fluid/operators/target_assign_op.h
+++ b/paddle/fluid/operators/target_assign_op.h
@@ -19,140 +19,113 @@ limitations under the License. */

 namespace paddle {
 namespace operators {
-
-template <typename T>
+template <typename T, typename WT>
 struct TargetAssignFunctor {
-  const T* gt_box_;
-  const int* gt_label_;
+  const T* in_;
  const int* match_indices_;
  const size_t* lod_;
-  const int background_label_;
-  const int64_t num_;
-  const int64_t num_prior_box_;
-
-  T* out_box_;
-  T* out_box_wt_;
-  int* out_label_;
-  T* out_label_wt_;
-
-  TargetAssignFunctor(const T* gt_box, const int* gt_label,
-                      const int* match_indices, const size_t* lod,
-                      const int background_label, const int64_t num,
-                      const int64_t np, T* out_box, T* out_box_wt,
-                      int* out_label, T* out_label_wt)
-      : gt_box_(gt_box),
-        gt_label_(gt_label),
+  const int mismatch_value_;
+  const int64_t N_;
+  const int64_t M_;
+  const int64_t P_;
+  const int64_t K_;
+
+  T* out_;
+  WT* out_wt_;
+
+  TargetAssignFunctor(const T* input, const int* match_indices,
+                      const size_t* lod, const int mismatch_value,
+                      const int64_t N, const int64_t M, const int64_t P,
+                      const int64_t K, T* out, WT* out_wt)
+      : in_(input),
        match_indices_(match_indices),
        lod_(lod),
-        background_label_(background_label),
-        num_(num),
-        num_prior_box_(np),
-        out_box_(out_box),
-        out_box_wt_(out_box_wt),
-        out_label_(out_label),
-        out_label_wt_(out_label_wt) {}
+        mismatch_value_(mismatch_value),
+        N_(N),
+        M_(M),
+        P_(P),
+        K_(K),
+        out_(out),
+        out_wt_(out_wt) {}

  HOSTDEVICE void operator()(size_t i) const {
-    int row = i / num_prior_box_;
-    int col = i - row * num_prior_box_;
+    int h = i / M_;
+    int w = i - h * M_;

-    size_t row_off = lod_[row];
-    int offset = row * num_prior_box_ + col;
+    size_t off = lod_[h];
+    int id = match_indices_[i];

-    int id = match_indices_[offset];
-    T* obox = out_box_ + offset * 4;
-    int* olabel = out_label_ + offset;
-    T* obox_wt = out_box_wt_ + offset;
-    T* olabel_wt = out_label_wt_ + offset;
+    T* out = out_ + i * K_;
+    WT* out_wt = out_wt_ + i;

    if (id > -1) {
-      const T* gtbox = gt_box_ + ((row_off + id) * num_prior_box_ + col) * 4;
-
-      obox[0] = gtbox[0];
-      obox[1] = gtbox[1];
-      obox[2] = gtbox[2];
-      obox[3] = gtbox[3];
-
-      olabel[0] = gt_label_[row_off + id];
-      obox_wt[0] = static_cast<T>(1.);
-      olabel_wt[0] = static_cast<T>(1.);
+      int w_off = w % P_;
+      const T* in = in_ + ((off + id) * P_ + w_off) * K_;
+      for (int64_t k = 0; k < K_; ++k) {
+        out[k] = in[k];
+      }
+      out_wt[0] = static_cast<WT>(1.);
    } else {
-      obox[0] = static_cast<T>(0.);
-      obox[1] = static_cast<T>(0.);
-      obox[2] = static_cast<T>(0.);
-      obox[3] = static_cast<T>(0.);
-
-      olabel[0] = background_label_;
-      obox_wt[0] = static_cast<T>(0.);
-      olabel_wt[0] = static_cast<T>(0.);
+      for (int64_t k = 0; k < K_; ++k) {
+        out[k] = static_cast<T>(mismatch_value_);
+      }
+      out_wt[0] = static_cast<WT>(0.);
    }
  }
 };

-template <typename DeviceContext, typename T>
+template <typename DeviceContext, typename T, typename WT>
 struct NegTargetAssignFunctor {
  void operator()(const platform::DeviceContext& ctx, const int* neg_indices,
-                  const size_t* lod, const int num, const int num_prior_box,
-                  const int background_label, int* out_label,
-                  T* out_label_wt) const;
+                  const size_t* lod, const int N, const int M, const int K,
+                  const int mismatch_value, T* out, WT* out_wt) const;
 };

-template <typename DeviceContext, typename T>
+template <typename DeviceContext, typename T, typename WT>
 class TargetAssignKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* enc_gt_box = ctx.Input<framework::LoDTensor>("EncodedGTBBox");
-    auto* gt_label = ctx.Input<framework::LoDTensor>("GTScoreLabel");
+    auto* x = ctx.Input<framework::LoDTensor>("X");
    auto* match_indices = ctx.Input<framework::Tensor>("MatchIndices");
-    auto* neg_indices = ctx.Input<framework::LoDTensor>("NegIndices");
-
-    auto* out_box = ctx.Output<framework::Tensor>("PredBBoxLabel");
-    auto* out_box_wt = ctx.Output<framework::Tensor>("PredBBoxWeight");
-    auto* out_label = ctx.Output<framework::Tensor>("PredScoreLabel");
-    auto* out_label_wt = ctx.Output<framework::Tensor>("PredScoreWeight");

-    PADDLE_ENFORCE_EQ(enc_gt_box->lod().size(), 1UL);
-    PADDLE_ENFORCE_EQ(gt_label->lod().size(), 1UL);
-    PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
+    auto* out = ctx.Output<framework::Tensor>("Out");
+    auto* out_wt = ctx.Output<framework::Tensor>("OutWeight");

-    int background_label = ctx.Attr<int>("background_label");
+    PADDLE_ENFORCE_EQ(x->lod().size(), 1UL);
+    int mismatch_value = ctx.Attr<int>("mismatch_value");

-    const T* box_data = enc_gt_box->data<T>();
-    const int* label_data = gt_label->data<int>();
+    const T* x_data = x->data<T>();
    const int* match_idx_data = match_indices->data<int>();
-    const int* neg_idx_data = neg_indices->data<int>();

-    T* obox_data = out_box->mutable_data<T>(ctx.GetPlace());
-    T* obox_wt_data = out_box_wt->mutable_data<T>(ctx.GetPlace());
-    int* olabel_data = out_label->mutable_data<int>(ctx.GetPlace());
-    T* olabel_wt_data = out_label_wt->mutable_data<T>(ctx.GetPlace());
+    T* out_data = out->mutable_data<T>(ctx.GetPlace());
+    WT* out_wt_data = out_wt->mutable_data<WT>(ctx.GetPlace());

-    int64_t num = match_indices->dims()[0];
-    int64_t num_prior_box = match_indices->dims()[1];
+    int64_t n = match_indices->dims()[0];
+    int64_t m = match_indices->dims()[1];
+    int64_t p = x->dims()[1];
+    int64_t k = x->dims()[2];

-    auto gt_lod = enc_gt_box->lod().back();
-    auto gt_label_lod = gt_label->lod().back();
-    auto neg_lod = neg_indices->lod().back();
-    for (size_t i = 0; i < gt_lod.size(); ++i) {
-      PADDLE_ENFORCE_EQ(gt_lod.data()[i], gt_label_lod.data()[i]);
-    }
-
-    size_t* gt_lod_data = gt_lod.MutableData(ctx.GetPlace());
-    size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
+    auto x_lod = x->lod().back();
+    size_t* x_lod_data = x_lod.MutableData(ctx.GetPlace());

-    TargetAssignFunctor<T> functor(box_data, label_data, match_idx_data,
-                                   gt_lod_data, background_label, num,
-                                   num_prior_box, obox_data, obox_wt_data,
-                                   olabel_data, olabel_wt_data);
+    TargetAssignFunctor<T, WT> functor(x_data, match_idx_data, x_lod_data,
+                                       mismatch_value, n, m, p, k, out_data,
+                                       out_wt_data);

    auto& device_ctx = ctx.template device_context<DeviceContext>();
-    platform::ForRange<DeviceContext> for_range(device_ctx,
-                                                num * num_prior_box);
+    platform::ForRange<DeviceContext> for_range(device_ctx, n * m);
    for_range(functor);

-    NegTargetAssignFunctor<DeviceContext, T> neg_trg_functor;
-    neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, num, num_prior_box,
-                    background_label, olabel_data, olabel_wt_data);
+    auto* neg_indices = ctx.Input<framework::LoDTensor>("NegIndices");
+    if (neg_indices) {
+      PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
+      const int* neg_idx_data = neg_indices->data<int>();
+      auto neg_lod = neg_indices->lod().back();
+      size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
+      NegTargetAssignFunctor<DeviceContext, T, WT> neg_trg_functor;
+      neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k,
+                      mismatch_value, out_data, out_wt_data);
+    }
  }
 };


--- a/python/paddle/v2/fluid/tests/test_target_assign_op.py
+++ b/python/paddle/v2/fluid/tests/test_target_assign_op.py
@@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):


 def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
-                  neg_lod, background_label):
+                  neg_lod, mismatch_value):
    batch_size, num_prior = match_indices.shape

    # init target bbox
@@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
    trg_box_wt = np.zeros((batch_size, num_prior, 1)).astype('float32')
    # init target label
    trg_label = np.ones((batch_size, num_prior, 1)).astype('int32')
-    trg_label = trg_label * background_label
+    trg_label = trg_label * mismatch_value
    # init weight for target label
    trg_label_wt = np.zeros((batch_size, num_prior, 1)).astype('float32')

@@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
        # target bbox
        for v, c in zip(col_val + gt_start, col_ids[0].tolist()):
            trg_box[i][c][:] = encoded_box[v][c][:]
-
        # weight for target bbox
        trg_box_wt[i][col_ids] = 1.0

        trg_label[i][col_ids] = gt_label[col_val + gt_start]
-
        trg_label_wt[i][col_ids] = 1.0
        # set target label weight to 1.0 for the negative samples
-        neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]]
-        trg_label_wt[i][neg_ids] = 1.0
+        if neg_indices is not None:
+            neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]]
+            trg_label_wt[i][neg_ids] = 1.0

    return trg_box, trg_box_wt, trg_label, trg_label_wt


-class TestTargetAssginOp(OpTest):
+class TestTargetAssginFloatType(OpTest):
    def setUp(self):
        self.op_type = "target_assign"
+        num_prior = 120
+        num_class = 21
+        gt_lod = [0, 5, 11, 23]
+        neg_lod = [0, 4, 7, 13]
+        mismatch_value = 0
+        batch_size = len(gt_lod) - 1
+        num_gt = gt_lod[-1]
+
+        encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
+        gt_label = np.random.randint(
+            num_class, size=(num_gt, 1)).astype('int32')
+
+        match_indices, neg_indices = gen_match_and_neg_indices(num_prior,
+                                                               gt_lod, neg_lod)

+        out, out_wt, _, _ = target_assign(encoded_box, gt_label, match_indices,
+                                          neg_indices, gt_lod, neg_lod,
+                                          mismatch_value)
+
+        # assign regression targets
+        x = encoded_box
+        self.inputs = {
+            'X': (x, [gt_lod]),
+            'MatchIndices': match_indices,
+        }
+        self.attrs = {'mismatch_value': mismatch_value}
+        self.outputs = {
+            'Out': out,
+            'OutWeight': out_wt,
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+
+class TestTargetAssginIntType(OpTest):
+    def setUp(self):
+        self.op_type = "target_assign"
        num_prior = 120
        num_class = 21
        gt_lod = [0, 5, 11, 23]
        neg_lod = [0, 4, 7, 13]
+        mismatch_value = 0
        batch_size = len(gt_lod) - 1
        num_gt = gt_lod[-1]
-        background_label = 0

        encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
        gt_label = np.random.randint(
            num_class, size=(num_gt, 1)).astype('int32')
+
        match_indices, neg_indices = gen_match_and_neg_indices(num_prior,
                                                               gt_lod, neg_lod)
-        trg_box, trg_box_wt, trg_label, trg_label_wt = target_assign(
-            encoded_box, gt_label, match_indices, neg_indices, gt_lod, neg_lod,
-            background_label)

+        _, _, out, out_wt, = target_assign(encoded_box, gt_label, match_indices,
+                                           neg_indices, gt_lod, neg_lod,
+                                           mismatch_value)
+
+        # assign cassification argets
+        x = np.reshape(gt_label, (num_gt, 1, 1))
        self.inputs = {
-            'EncodedGTBBox': (encoded_box, [gt_lod]),
-            'GTScoreLabel': (gt_label, [gt_lod]),
-            'MatchIndices': (match_indices),
+            'X': (x, [gt_lod]),
+            'MatchIndices': match_indices,
            'NegIndices': (neg_indices, [neg_lod]),
        }
-        self.attrs = {'background_label': background_label}
+        self.attrs = {'mismatch_value': mismatch_value}
        self.outputs = {
-            'PredBBoxLabel': (trg_box),
-            'PredBBoxWeight': (trg_box_wt),
-            'PredScoreLabel': (trg_label),
-            'PredScoreWeight': (trg_label_wt),
+            'Out': out,
+            'OutWeight': out_wt,
        }

    def test_check_output(self):