From cd859dc96895e2bc3ce54e3a733960abac8c1499 Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Wed, 21 Mar 2018 17:21:56 +0800
Subject: [PATCH] Fix proposal op bug.

---
 mace/core/operator.cc     |   2 -
 mace/kernels/proposal.h   | 194 +++++++++++++++++++++++++++++++++-----
 mace/ops/proposal.cc      |   3 +-
 mace/ops/proposal.h       |  26 +++--
 mace/ops/proposal_test.cc |  43 ++++++---
 5 files changed, 220 insertions(+), 48 deletions(-)
diff --git a/mace/core/operator.cc b/mace/core/operator.cc
index 4c501759..ee15aed6 100644
--- a/mace/core/operator.cc
+++ b/mace/core/operator.cc
@@ -90,7 +90,6 @@ extern void Register_Eltwise(OperatorRegistry *op_registry);
 extern void Register_FullyConnected(OperatorRegistry *op_registry);
 extern void Register_Slice(OperatorRegistry *op_registry);
 extern void Register_Proposal(OperatorRegistry *op_registry);
-extern void Register_PSROIAlign(OperatorRegistry *op_registry);
 
 }  // namespace ops
 
@@ -121,7 +120,6 @@ OperatorRegistry::OperatorRegistry() {
   ops::Register_FullyConnected(this);
   ops::Register_Slice(this);
   ops::Register_Proposal(this);
-  ops::Register_PSROIAlign(this);
 }
 
 }  // namespace mace
diff --git a/mace/kernels/proposal.h b/mace/kernels/proposal.h
index e5f0a87c..53db9c0f 100644
--- a/mace/kernels/proposal.h
+++ b/mace/kernels/proposal.h
@@ -5,9 +5,12 @@
 #ifndef MACE_KERNELS_PROPOSAL_H_
 #define MACE_KERNELS_PROPOSAL_H_
 
+#include <algorithm>
+#include <vector>
+
 #include "mace/core/future.h"
 #include "mace/core/tensor.h"
-#include "mace/core/public/mace.h"
+#include "mace/public/mace.h"
 
 namespace mace {
 namespace kernels {
@@ -22,16 +25,22 @@ static std::vector<float> WHCenters(const std::vector<float> &anchor) {
   return window;
 }
 
-std::vector<std::vector<float>> GenerateAnchors(const std::vector<int> &scales, const std::vector<float> &ratios,
-                     const int base_size = 16) {
-  const std::vector<float> base_anchor = {0, 0, (float)base_size-1, (float)base_size-1};
+std::vector<std::vector<float>> GenerateAnchors(
+    const std::vector<int> &scales,
+    const std::vector<float> &ratios,
+    const int base_size) {
+  const std::vector<float> base_anchor =
+      {0, 0,
+       static_cast<float>(base_size-1),
+       static_cast<float>(base_size-1)};
 
   const size_t scales_size = scales.size();
   const size_t ratios_size = ratios.size();
   // get height, width, centers
   std::vector<float> base_window = WHCenters(base_anchor);
   const float size = base_window[0] * base_window[1];
-  std::vector<std::vector<float>> anchors(scales_size * ratios_size, std::vector<float>(4));
+  std::vector<std::vector<float>> anchors(scales_size * ratios_size,
+                                          std::vector<float>(4));
 
   int idx = 0;
   std::vector<float> tmp_anchor(4);
@@ -56,26 +65,91 @@ std::vector<std::vector<float>> GenerateAnchors(const std::vector<int> &scales,
   return anchors;
 }
 
+std::vector<int> nms(const float *bboxes_ptr,
+                     const float *scores_ptr,
+                     const index_t num_bboxes,
+                     const float thresh) {
+  std::vector<int> keep;
+  std::vector<int> suppressed(num_bboxes, 0);
+
+  std::vector<float> areas(num_bboxes, 0);
+  for (index_t i = 0; i < num_bboxes; ++i) {
+    const index_t idx = (i << 2);
+    areas[i] = (bboxes_ptr[idx + 2] - bboxes_ptr[idx] + 1) *
+        (bboxes_ptr[idx + 3] - bboxes_ptr[idx + 1] + 1);
+  }
+
+  std::vector<int> order(num_bboxes, 0);
+  iota(order.begin(), order.end(), 0);
+
+  float x1, y1, x2, y2, area1, iou;
+  for (index_t i = 0; i < num_bboxes; ++i) {
+    int idx = order[i];
+    if (suppressed[idx] == 1) continue;
+    keep.push_back(idx);
+    int coord_idx = idx << 2;
+    x1 = bboxes_ptr[coord_idx];
+    y1 = bboxes_ptr[coord_idx + 1];
+    x2 = bboxes_ptr[coord_idx + 2];
+    y2 = bboxes_ptr[coord_idx + 3];
+    area1 = areas[idx];
+    for (index_t j = i + 1; j < num_bboxes; ++j) {
+      const int other_idx = order[j];
+      if (suppressed[other_idx] == 1) continue;
+
+      coord_idx = other_idx << 2;
+      iou = std::max<float>(0.0,
+                       std::min(x2, bboxes_ptr[coord_idx + 2]) -
+                       std::max(x1, bboxes_ptr[coord_idx]) + 1)
+          * std::max<float>(0.0,
+                       std::min(y2, bboxes_ptr[coord_idx + 3]) -
+                       std::max(y1, bboxes_ptr[coord_idx + 1]) + 1);
+      if ((iou / (area1 + areas[other_idx] - iou)) >= thresh) {
+        suppressed[other_idx] = 1;
+      }
+    }
+  }
+  return keep;
+}
+
 
 template<DeviceType D, typename T>
 struct ProposalFunctor {
-  ProposalFunctor(const int feat_stride, const std::vector<int> &scales) :
+  ProposalFunctor(const int min_size,
+                  const float nms_thresh,
+                  const int pre_nms_top_n,
+                  const int post_nms_top_n,
+                  const int feat_stride,
+                  const int base_size,
+                  const std::vector<int> &scales,
+                  const std::vector<float> &ratios) :
+      min_size_(min_size),
+      thresh_(nms_thresh),
+      pre_nms_top_n_(pre_nms_top_n),
+      post_nms_top_n_(post_nms_top_n),
       feat_stride_(feat_stride),
-      scales_(scales),
-      anchors_(GenerateAnchors(scales, {0.5, 1, 2})) {}
+      anchors_(GenerateAnchors(scales, ratios, base_size)) {}
 
   void operator()(const Tensor *rpn_cls_prob,
                   const Tensor *rpn_bbox_pred,
-                  const Tensor *im_info,
+                  const Tensor *img_info_tensor,
                   Tensor *output,
                   StatsFuture *future) {
+    MACE_CHECK(rpn_cls_prob->dim(1) == rpn_bbox_pred->dim(1) &&
+        rpn_cls_prob->dim(2) == rpn_bbox_pred->dim(2));
+    MACE_CHECK((rpn_cls_prob->dim(3) / 2 == rpn_bbox_pred->dim(3) / 4) &&
+        (rpn_cls_prob->dim(3) / 2 == anchors_.size()));
+    const float *img_info = img_info_tensor->data<float>();
+    const index_t im_height = img_info[0] - 1;
+    const index_t im_width = img_info[1] - 1;
     const index_t feat_height = rpn_cls_prob->dim(1);
     const index_t feat_width = rpn_cls_prob->dim(2);
     const int anchors_size = anchors_.size();
 
-    // shift anchors
-    std::vector<std::vector<float>> shifted_anchors(anchors_.size() * feat_height * feat_width,
-                                                    std::vector<float>(4));
+    // shift anchors to original input
+    std::vector<std::vector<float>> proposals(
+        anchors_size * feat_height * feat_width,
+        std::vector<float>(4));
     int shift_w, shift_h;
     int sanc_idx = 0;
     for (int h_idx = 0; h_idx < feat_height; ++h_idx) {
@@ -83,38 +157,116 @@ struct ProposalFunctor {
       for (int w_idx = 0; w_idx < feat_width; ++w_idx) {
         shift_w = w_idx * feat_stride_;
         for (int a_idx = 0; a_idx < anchors_size; ++a_idx) {
-          shifted_anchors[sanc_idx][0] = anchors_[a_idx][0] + shift_w;
-          shifted_anchors[sanc_idx][1] = anchors_[a_idx][1] + shift_h;
-          shifted_anchors[sanc_idx][2] = anchors_[a_idx][2] + shift_w;
-          shifted_anchors[sanc_idx][3] = anchors_[a_idx][3] + shift_h;
+          proposals[sanc_idx][0] = anchors_[a_idx][0] + shift_w;
+          proposals[sanc_idx][1] = anchors_[a_idx][1] + shift_h;
+          proposals[sanc_idx][2] = anchors_[a_idx][2] + shift_w;
+          proposals[sanc_idx][3] = anchors_[a_idx][3] + shift_h;
           sanc_idx++;
         }
       }
     }
     // Convert anchors into proposals via bbox transformations
+    // 2. clip predicted boxes to image
+    std::vector<int> keep;
+    const index_t min_size = min_size_ * img_info[2];
+    // 3. remove predicted boxes with either height or width < threshold
+    // (NOTE: convert min_size to input image scale stored in im_info[2])
 
-    // clip predicted boxes to image
+    const float *bbox_deltas = rpn_bbox_pred->data<float>();
+    sanc_idx = 0;
+    for (int h_idx = 0; h_idx < feat_height; ++h_idx) {
+      for (int w_idx = 0; w_idx < feat_width; ++w_idx) {
+        for (int a_idx = 0; a_idx < anchors_size; ++a_idx) {
+          float width = proposals[sanc_idx][2] - proposals[sanc_idx][0] + 1;
+          float height = proposals[sanc_idx][3] - proposals[sanc_idx][1] + 1;
+          int delta_offset = sanc_idx * 4;
+          float pred_ctr_x = bbox_deltas[delta_offset + 0] * width +
+              (proposals[sanc_idx][0] + width / 2);
+          float pred_ctr_y = bbox_deltas[delta_offset + 1] * height +
+              (proposals[sanc_idx][1] + height / 2);
+          float pred_w = std::exp(bbox_deltas[delta_offset + 2]) * width;
+          float pred_h = std::exp(bbox_deltas[delta_offset + 3]) * height;
 
-    // remove predicted boxes with either height or width < threshold
+          proposals[sanc_idx][0] = std::max<float>(
+              std::min<float>(pred_ctr_x - pred_w / 2, im_width),
+              0);
+          proposals[sanc_idx][1] = std::max<float>(
+              std::min<float>(pred_ctr_y - pred_h / 2, im_height),
+              0);
+          proposals[sanc_idx][2] = std::max<float>(
+              std::min<float>(pred_ctr_x + pred_w / 2, im_width),
+              0);
+          proposals[sanc_idx][3] = std::max<float>(
+              std::min<float>(pred_ctr_y + pred_h / 2, im_height),
+              0);
+
+          width = proposals[sanc_idx][2] - proposals[sanc_idx][0] + 1;
+          height = proposals[sanc_idx][3] - proposals[sanc_idx][1] + 1;
+          if (width >= min_size && height >= min_size) {
+            keep.push_back(sanc_idx);
+          }
+          sanc_idx++;
+        }
+      }
+    }
 
     // 4. sort all (proposal, score) pairs by score from highest to lowest
     // 5. take top pre_nms_topN (e.g. 6000)
+    auto scores = rpn_cls_prob->data<float>();
+    const int scores_chan = static_cast<int>(rpn_cls_prob->dim(3));
+
+    auto score_idx_func = [&](int idx) -> int {
+      return (idx / anchors_size) * scores_chan +
+          (idx % anchors_size) + anchors_size;
+    };
+    std::sort(keep.begin(), keep.end(), [&](int left, int right) -> bool{
+      return scores[score_idx_func(left)] >
+          scores[score_idx_func(right)];
+    });
+
+    int size = std::min<int>(pre_nms_top_n_, keep.size());
+    std::vector<float> nms_scores(size, 0);
+    std::vector<float> nms_proposals((size << 2), 0);
+    for (int i = 0; i < size; ++i) {
+      nms_scores[i] = scores[score_idx_func(keep[i])];
+      nms_proposals[i << 2] = proposals[keep[i]][0];
+      nms_proposals[(i << 2) + 1] = proposals[keep[i]][1];
+      nms_proposals[(i << 2) + 2] = proposals[keep[i]][2];
+      nms_proposals[(i << 2) + 3] = proposals[keep[i]][3];
+    }
 
     /* 6. apply nms (e.g. threshold = 0.7)
        7. take after_nms_topN (e.g. 300)
        8. return the top proposals (-> RoIs top) */
+    auto nms_result = nms(nms_proposals.data(), nms_scores.data(),
+                          nms_scores.size(), thresh_);
 
     // Output rois blob
     // Our RPN implementation only supports a single input image, so all
     // batch inds are 0
+    size = std::min<int>(post_nms_top_n_, nms_result.size());
+    output->Resize({1, 1, size, 5});
+    auto output_ptr = output->mutable_data<float>();
+    for (int i = 0; i < size; ++i) {
+      const int out_idx = i * 5;
+      const int nms_idx = nms_result[i] * 4;
+      output_ptr[out_idx] = 0;
+      output_ptr[out_idx + 1] = nms_proposals[nms_idx];
+      output_ptr[out_idx + 2] = nms_proposals[nms_idx + 1];
+      output_ptr[out_idx + 3] = nms_proposals[nms_idx + 2];
+      output_ptr[out_idx + 4] = nms_proposals[nms_idx + 3];
+    }
   }
 
+  const int min_size_;
+  const float thresh_;
+  const int pre_nms_top_n_;
+  const int post_nms_top_n_;
   const int feat_stride_;
-  const std::vector<int> scales_;
   std::vector<std::vector<float>> anchors_;
 };
 
-}  //  namepsace kernels
-}  //  namespace mace
+}  // namespace kernels
+}  // namespace mace
 
 #endif  //  MACE_KERNELS_PROPOSAL_H_
diff --git a/mace/ops/proposal.cc b/mace/ops/proposal.cc
index 7da97a24..853a4e5b 100644
--- a/mace/ops/proposal.cc
+++ b/mace/ops/proposal.cc
@@ -13,8 +13,7 @@ void Register_Proposal(OperatorRegistry *op_registry) {
                                      .TypeConstraint<float>("T")
                                      .Build(),
                     ProposalOp<DeviceType::CPU, float>);
-
 }
 
 }  // namespace ops
-}  //  namespace mace
+}  // namespace mace
diff --git a/mace/ops/proposal.h b/mace/ops/proposal.h
index 646abbf1..06dcc8a1 100644
--- a/mace/ops/proposal.h
+++ b/mace/ops/proposal.h
@@ -2,30 +2,37 @@
 // Copyright (c) 2017 XiaoMi All rights reserved.
 //
 
-#ifndef MACE_SOFTMAX_H_
-#define MACE_SOFTMAX_H_
+#ifndef MACE_OPS_PROPOSAL_H_
+#define MACE_OPS_PROPOSAL_H_
 
 #include "mace/core/operator.h"
 #include "mace/kernels/proposal.h"
 
 namespace mace {
+namespace ops {
 
 template <DeviceType D, class T>
 class ProposalOp : public Operator<D, T> {
  public:
   ProposalOp(const OperatorDef &operator_def, Workspace *ws)
       : Operator<D, T>(operator_def, ws),
-        functor_(OperatorBase::GetSingleArgument<int>("feat_stride", 1),
-                 OperatorBase::GetRepeatedArgument<int>("scales")) {}
+        functor_(OperatorBase::GetSingleArgument<int>("min_size", 0),
+                 OperatorBase::GetSingleArgument<float>("nms_thresh", 0),
+                 OperatorBase::GetSingleArgument<int>("pre_nms_top_n", 0),
+                 OperatorBase::GetSingleArgument<int>("post_nms_top_n", 0),
+                 OperatorBase::GetSingleArgument<int>("feat_stride", 0),
+                 OperatorBase::GetSingleArgument<int>("base_size", 16),
+                 OperatorBase::GetRepeatedArgument<int>("scales"),
+                 OperatorBase::GetRepeatedArgument<float>("ratios")) {}
 
   bool Run(StatsFuture *future) override {
     const Tensor *rpn_cls_prob = this->Input(RPN_CLS_PROB);
     const Tensor *rpn_bbox_pred = this->Input(RPN_BBOX_PRED);
-    const Tensor *im_info = this->Input(IM_INFO);
+    const Tensor *img_info = this->Input(IMG_INFO);
 
     Tensor *output = this->Output(ROIS);
 
-    functor_(rpn_cls_prob, rpn_bbox_pred, im_info, output, future);
+    functor_(rpn_cls_prob, rpn_bbox_pred, img_info, output, future);
     return true;
   }
 
@@ -33,10 +40,11 @@ class ProposalOp : public Operator<D, T> {
   kernels::ProposalFunctor<D, T> functor_;
 
  protected:
-  OP_INPUT_TAGS(RPN_CLS_PROB, RPN_BBOX_PRED, IM_INFO);
+  OP_INPUT_TAGS(RPN_CLS_PROB, RPN_BBOX_PRED, IMG_INFO);
   OP_OUTPUT_TAGS(ROIS);
 };
 
-}  //  namespace mace
+}  // namespace ops
+}  // namespace mace
 
-#endif  //  MACE_SOFTMAX_H_
+#endif  //  MACE_OPS_PROPOSAL_H_
diff --git a/mace/ops/proposal_test.cc b/mace/ops/proposal_test.cc
index 26ad31cf..94203deb 100644
--- a/mace/ops/proposal_test.cc
+++ b/mace/ops/proposal_test.cc
@@ -6,41 +6,56 @@
 #include "mace/ops/ops_test_util.h"
 
 namespace mace {
+namespace ops {
+namespace test {
 
 class ProposalOpTest : public OpsTestBase {};
 
-void TestSimple() {
+TEST_F(ProposalOpTest, CPUSimple) {
+  const int img_height = 256;
+  const int img_width = 256;
+  const int height = 3;
+  const int width = 4;
+
   OpsTestNet net;
 
   OpDefBuilder("Proposal", "ProposalTest")
       .Input("RpnCLSProb")
       .Input("RpnBBoxPred")
-      .Input("IMInfo")
+      .Input("ImgInfo")
+      .AddIntArg("min_size", 16)
+      .AddFloatArg("nms_thresh", 0.7)
+      .AddIntArg("pre_nms_top_n", 12000)
+      .AddIntArg("post_nms_top_n", 2000)
       .AddIntArg("feat_stride", 16)
-      .AddIntsArg("scales", {2, 4, 8, 16, 32})
+      .AddIntArg("base_size", 16)
+      .AddIntsArg("scales", {8, 16, 32})
+      .AddFloatsArg("ratios", {0.5, 1, 2})
       .Output("Output")
       .Finalize(net.NewOperatorDef());
 
+  std::vector<float> scores(height * width * 18);
+  for (int i = 0 ; i < scores.size(); ++i) {
+    scores[i] = i;
+  }
+
   // Add input data
   net.AddInputFromArray<DeviceType::CPU, float>(
-      "RpnCLSProb", {2, 2, 2, 2},
-      {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
+      "RpnCLSProb", {1, height, width, 18}, scores);
+  net.AddRepeatedInput<DeviceType::CPU, float>(
+      "RpnBBoxPred", {1, height, width, 4 * 9}, 1);
   net.AddInputFromArray<DeviceType::CPU, float>(
-      "RpnBBoxPred", {2, 2, 2, 2},
-      {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
-  net.AddInputFromArray<DeviceType::CPU, float>(
-      "IMInfo", {2, 2},
-      {1, 1, 1, 1});
+      "ImgInfo", {1, 1, 1, 3}, {img_height, img_width, 2});
 
   // Run
   net.RunOp();
 
-  auto expected = CreateTensor<float>(
-      {2, 2, 2, 2}, {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0});
+  auto expected_tensor = CreateTensor<float>({1, 1, 1, 5}, {0, 0, 0, 255, 255});
 
+  ExpectTensorNear<float>(*expected_tensor, *net.GetTensor("Output"), 1e-5);
 }
 
-TEST_F(ProposalOpTest, CPUSimple) { TestSimple(); }
-
 
+}  // namespace test
+}  // namespace ops
 }  // namespace mace
-- 
GitLab