[dev] s2anet fix export and deploy (#2919)

* s2anet fix export and deploy * remove redundant code * fix cpp for s2anet deploy * fix bug of get_categories * rename poly_to_rbox to poly2rbox * add some comment for function

[dev] s2anet fix export and deploy (#2919)
* s2anet fix export and deploy * remove redundant code * fix cpp for s2anet deploy * fix bug of get_categories * rename poly_to_rbox to poly2rbox * add some comment for function
0947171b · cnn · GitHub · 912833f2 · 0947171b · 0947171b
14 changed file
--- a/configs/dota/_base_/s2anet_reader.yml
+++ b/configs/dota/_base_/s2anet_reader.yml
@@ -9,7 +9,7 @@ TrainReader:
  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  - Permute: {}
  batch_transforms:
-  - RboxPadBatch: {pad_to_stride: 32, pad_gt: true}
+  - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: true
  drop_last: true
@@ -22,7 +22,7 @@ EvalReader:
  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  - Permute: {}
  batch_transforms:
-  - RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
+  - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: false
  drop_last: false
@@ -36,7 +36,7 @@ TestReader:
  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  - Permute: {}
  batch_transforms:
-  - RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
+  - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: false
  drop_last: false
--- a/dataset/dota_coco/dota_to_coco.py
+++ b/dataset/dota_coco/dota_to_coco.py
@@ -27,7 +27,7 @@ parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
 if parent_path not in sys.path:
    sys.path.append(parent_path)
-from ppdet.modeling.bbox_utils import poly_to_rbox
+from ppdet.modeling.bbox_utils import poly2rbox
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
@@ -118,7 +118,7 @@ def dota_2_coco(image_dir,
            # rbox or bbox
            if is_obb:
                polys = [single_obj_poly]
-                rboxs = poly_to_rbox(polys)
+                rboxs = poly2rbox(polys)
                rbox = rboxs[0].tolist()
                single_obj['bbox'] = rbox
                single_obj['area'] = rbox[2] * rbox[3]

--- a/deploy/cpp/include/object_detector.h
+++ b/deploy/cpp/include/object_detector.h
@@ -51,7 +51,8 @@ std::vector<int> GenerateColorMap(int num_class);
 cv::Mat VisualizeResult(const cv::Mat& img,
                     const std::vector<ObjectResult>& results,
                     const std::vector<std::string>& lable_list,
-                     const std::vector<int>& colormap);
+                     const std::vector<int>& colormap,
+                     const bool is_rbox);
 class ObjectDetector {
@@ -120,7 +121,8 @@ class ObjectDetector {
  // Postprocess result
  void Postprocess(
      const cv::Mat& raw_mat,
-      std::vector<ObjectResult>* result);
+      std::vector<ObjectResult>* result,
+      bool is_rbox);
  std::shared_ptr<Predictor> predictor_;
  Preprocessor preprocessor_;

--- a/deploy/cpp/src/main.cc
+++ b/deploy/cpp/src/main.cc
@@ -195,23 +195,42 @@ void PredictVideo(const std::string& video_path,
  // Capture all frames and do inference
  cv::Mat frame;
  int frame_id = 0;
+  bool is_rbox = false;
  while (capture.read(frame)) {
    if (frame.empty()) {
      break;
    }
    det->Predict(frame, 0.5, 0, 1, &result, &det_times);
-    cv::Mat out_im = PaddleDetection::VisualizeResult(
-        frame, result, labels, colormap);
    for (const auto& item : result) {
-      printf("In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d %d]\n",
+      if (item.rect.size() > 6){
-        frame_id,
+      is_rbox = true;
-        item.class_id,
+      printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
-        item.confidence,
+          item.class_id,
-        item.rect[0],
+          item.confidence,
-        item.rect[1],
+          item.rect[0],
-        item.rect[2],
+          item.rect[1],
-        item.rect[3]);
+          item.rect[2],
-   }   
+          item.rect[3],
+          item.rect[4],
+          item.rect[5],
+          item.rect[6],
+          item.rect[7]);
+      }
+      else{
+        printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
+          item.class_id,
+          item.confidence,
+          item.rect[0],
+          item.rect[1],
+          item.rect[2],
+          item.rect[3]);
+      }
+   }
+   cv::Mat out_im = PaddleDetection::VisualizeResult(
+        frame, result, labels, colormap, is_rbox);
    video_out.write(out_im);
    frame_id += 1;
  }
@@ -231,24 +250,41 @@ void PredictImage(const std::vector<std::string> all_img_list,
    // Store all detected result
    std::vector<PaddleDetection::ObjectResult> result;
    std::vector<double> det_times;
+    bool is_rbox = false;
    if (run_benchmark) {
      det->Predict(im, threshold, 10, 10, &result, &det_times);
    } else {
      det->Predict(im, 0.5, 0, 1, &result, &det_times);
      for (const auto& item : result) {
-        printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
+        if (item.rect.size() > 6){
+        is_rbox = true;
+        printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
+            item.class_id,
+            item.confidence,
+            item.rect[0],
+            item.rect[1],
+            item.rect[2],
+            item.rect[3],
+            item.rect[4],
+            item.rect[5],
+            item.rect[6],
+            item.rect[7]);
+        }
+        else{
+          printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
            item.class_id,
            item.confidence,
            item.rect[0],
            item.rect[1],
            item.rect[2],
            item.rect[3]);
+        }
      }
      // Visualization result
      auto labels = det->GetLabelList();
      auto colormap = PaddleDetection::GenerateColorMap(labels.size());
      cv::Mat vis_img = PaddleDetection::VisualizeResult(
-          im, result, labels, colormap);
+          im, result, labels, colormap, is_rbox);
      std::vector<int> compression_params;
      compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
      compression_params.push_back(95);

--- a/deploy/cpp/src/object_detector.cc
+++ b/deploy/cpp/src/object_detector.cc
@@ -94,13 +94,10 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
 cv::Mat VisualizeResult(const cv::Mat& img,
                        const std::vector<ObjectResult>& results,
                        const std::vector<std::string>& lable_list,
-                        const std::vector<int>& colormap) {
+                        const std::vector<int>& colormap,
+                        const bool is_rbox=false) {
  cv::Mat vis_img = img.clone();
  for (int i = 0; i < results.size(); ++i) {
-    int w = results[i].rect[1] - results[i].rect[0];
-    int h = results[i].rect[3] - results[i].rect[2];
-    cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
    // Configure color and text size
    std::ostringstream oss;
    oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
@@ -120,17 +117,37 @@ cv::Mat VisualizeResult(const cv::Mat& img,
                                         thickness,
                                         nullptr);
    cv::Point origin;
-    origin.x = roi.x;
-    origin.y = roi.y;
+    if (is_rbox)
+    {
+        // Draw object, text, and background
+        for (int k=0; k<4; k++)
+        {
+            cv::Point pt1 = cv::Point(results[i].rect[(k*2)%8], 
+                                      results[i].rect[(k*2+1)%8]);
+            cv::Point pt2 = cv::Point(results[i].rect[(k*2+2)%8], 
+                                      results[i].rect[(k*2+3)%8]);
+            cv::line(vis_img, pt1, pt2, roi_color, 2);
+        }
+    }
+    else
+    {
+        int w = results[i].rect[1] - results[i].rect[0];
+        int h = results[i].rect[3] - results[i].rect[2];
+        cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
+        // Draw roi object, text, and background
+        cv::rectangle(vis_img, roi, roi_color, 2);
+    }
+    origin.x = results[i].rect[0];
+    origin.y = results[i].rect[1];
    // Configure text background
    cv::Rect text_back = cv::Rect(results[i].rect[0],
                                  results[i].rect[2] - text_size.height,
                                  text_size.width,
                                  text_size.height);
+    // Draw text, and background
-    // Draw roi object, text, and background
-    cv::rectangle(vis_img, roi, roi_color, 2);
    cv::rectangle(vis_img, text_back, roi_color, -1);
    cv::putText(vis_img,
                text,
@@ -152,7 +169,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
 void ObjectDetector::Postprocess(
    const cv::Mat& raw_mat,
-    std::vector<ObjectResult>* result) {
+    std::vector<ObjectResult>* result,
+    bool is_rbox=false) {
  result->clear();
  int rh = 1;
  int rw = 1;
@@ -161,24 +179,52 @@ void ObjectDetector::Postprocess(
    rw = raw_mat.cols;
  }
-  int total_size = output_data_.size() / 6;
+  if (is_rbox)
-  for (int j = 0; j < total_size; ++j) {
+  {
-    // Class id
+    int total_size = output_data_.size() / 10;
-    int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
+    for (int j = 0; j < total_size; ++j) {
-    // Confidence score
+      // Class id
-    float score = output_data_[1 + j * 6];
+      int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
-    int xmin = (output_data_[2 + j * 6] * rw);
+      // Confidence score
-    int ymin = (output_data_[3 + j * 6] * rh);
+      float score = output_data_[1 + j * 10];
-    int xmax = (output_data_[4 + j * 6] * rw);
+      int x1 = (output_data_[2 + j * 10] * rw);
-    int ymax = (output_data_[5 + j * 6] * rh);
+      int y1 = (output_data_[3 + j * 10] * rh);
-    int wd = xmax - xmin;
+      int x2 = (output_data_[4 + j * 10] * rw);
-    int hd = ymax - ymin;
+      int y2 = (output_data_[5 + j * 10] * rh);
-    if (score > threshold_ && class_id > -1) {
+      int x3 = (output_data_[6 + j * 10] * rw);
-      ObjectResult result_item;
+      int y3 = (output_data_[7 + j * 10] * rh);
-      result_item.rect = {xmin, xmax, ymin, ymax};
+      int x4 = (output_data_[8 + j * 10] * rw);
-      result_item.class_id = class_id;
+      int y4 = (output_data_[9 + j * 10] * rh);
-      result_item.confidence = score;
+      if (score > threshold_ && class_id > -1) {
-      result->push_back(result_item);
+        ObjectResult result_item;
+        result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
+        result_item.class_id = class_id;
+        result_item.confidence = score;
+        result->push_back(result_item);
+      }
+    }
+  }
+  else
+  {
+    int total_size = output_data_.size() / 6;
+    for (int j = 0; j < total_size; ++j) {
+      // Class id
+      int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
+      // Confidence score
+      float score = output_data_[1 + j * 6];
+      int xmin = (output_data_[2 + j * 6] * rw);
+      int ymin = (output_data_[3 + j * 6] * rh);
+      int xmax = (output_data_[4 + j * 6] * rw);
+      int ymax = (output_data_[5 + j * 6] * rh);
+      int wd = xmax - xmin;
+      int hd = ymax - ymin;
+      if (score > threshold_ && class_id > -1) {
+        ObjectResult result_item;
+        result_item.rect = {xmin, xmax, ymin, ymax};
+        result_item.class_id = class_id;
+        result_item.confidence = score;
+        result->push_back(result_item);
+      }
    }
  }
 }
@@ -231,6 +277,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
    out_tensor->CopyToCpu(output_data_.data()); 
  }
+  bool is_rbox = false;
  auto inference_start = std::chrono::steady_clock::now();
  for (int i = 0; i < repeats; i++)
  {
@@ -244,6 +291,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
    for (int j = 0; j < output_shape.size(); ++j) {
      output_size *= output_shape[j];
    }
+    is_rbox = output_shape[output_shape.size()-1] % 10 == 0;
    if (output_size < 6) {
      std::cerr << "[WARNING] No object detected." << std::endl;
@@ -254,7 +302,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
  auto inference_end = std::chrono::steady_clock::now();
  auto postprocess_start = std::chrono::steady_clock::now();
  // Postprocessing result
-  Postprocess(im,  result);
+  Postprocess(im,  result, is_rbox);
  auto postprocess_end = std::chrono::steady_clock::now();
  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
@@ -263,6 +311,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
  times->push_back(double(inference_diff.count() / repeats * 1000));
  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
  times->push_back(double(postprocess_diff.count() * 1000));
 }
 std::vector<int> GenerateColorMap(int num_class) {

--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -37,6 +37,7 @@ SUPPORT_MODELS = {
    'FCOS',
    'SOLOv2',
    'TTFNet',
+    'S2ANet',
 }

--- a/deploy/python/visualize.py
+++ b/deploy/python/visualize.py
@@ -130,22 +130,29 @@ def draw_box(im, np_boxes, labels, threshold=0.5):
    for dt in np_boxes:
        clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
-        xmin, ymin, xmax, ymax = bbox
-        print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
-              'right_bottom:[{:.2f},{:.2f}]'.format(
-                  int(clsid), score, xmin, ymin, xmax, ymax))
-        w = xmax - xmin
-        h = ymax - ymin
        if clsid not in clsid2color:
            clsid2color[clsid] = color_list[clsid]
        color = tuple(clsid2color[clsid])
-        # draw bbox
+        if len(bbox) == 4:
-        draw.line(
+            xmin, ymin, xmax, ymax = bbox
-            [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
+            print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
-             (xmin, ymin)],
+                  'right_bottom:[{:.2f},{:.2f}]'.format(
-            width=draw_thickness,
+                      int(clsid), score, xmin, ymin, xmax, ymax))
-            fill=color)
+            # draw bbox
+            draw.line(
+                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
+                 (xmin, ymin)],
+                width=draw_thickness,
+                fill=color)
+        elif len(bbox) == 8:
+            x1, y1, x2, y2, x3, y3, x4, y4 = bbox
+            draw.line(
+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
+                width=2,
+                fill=color)
+            xmin = min(x1, x2, x3, x4)
+            ymin = min(y1, y2, y3, y4)
        # draw label
        text = "{} {:.4f}".format(labels[clsid], score)

--- a/ppdet/data/transform/batch_operators.py
+++ b/ppdet/data/transform/batch_operators.py
@@ -33,7 +33,7 @@ logger = setup_logger(__name__)
 __all__ = [
    'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
-    'Gt2TTFTarget', 'Gt2Solov2Target', 'RboxPadBatch'
+    'Gt2TTFTarget', 'Gt2Solov2Target'
 ]
@@ -87,6 +87,12 @@ class PadBatch(BaseOperator):
                padding_segm[:, :im_h, :im_w] = gt_segm
                data['gt_segm'] = padding_segm
+            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
+                # ploy to rbox
+                polys = data['gt_rbox2poly']
+                rbox = bbox_utils.poly2rbox(polys)
+                data['gt_rbox'] = rbox
        return samples
@@ -739,111 +745,3 @@ class Gt2Solov2Target(BaseOperator):
                data['grid_order{}'.format(idx)] = gt_grid_order
        return samples
-@register_op
-class RboxPadBatch(BaseOperator):
-    """
-    Pad a batch of samples so they can be divisible by a stride.
-    The layout of each image should be 'CHW'. And convert poly to rbox.
-    Args:
-        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
-            height and width is divisible by `pad_to_stride`.
-    """
-    def __init__(self, pad_to_stride=0, pad_gt=False):
-        super(RboxPadBatch, self).__init__()
-        self.pad_to_stride = pad_to_stride
-        self.pad_gt = pad_gt
-    def __call__(self, samples, context=None):
-        """
-        Args:
-            samples (list): a batch of sample, each is dict.
-        """
-        coarsest_stride = self.pad_to_stride
-        max_shape = np.array([data['image'].shape for data in samples]).max(
-            axis=0)
-        if coarsest_stride > 0:
-            max_shape[1] = int(
-                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
-            max_shape[2] = int(
-                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
-        for data in samples:
-            im = data['image']
-            im_c, im_h, im_w = im.shape[:]
-            padding_im = np.zeros(
-                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
-            padding_im[:, :im_h, :im_w] = im
-            data['image'] = padding_im
-            if 'semantic' in data and data['semantic'] is not None:
-                semantic = data['semantic']
-                padding_sem = np.zeros(
-                    (1, max_shape[1], max_shape[2]), dtype=np.float32)
-                padding_sem[:, :im_h, :im_w] = semantic
-                data['semantic'] = padding_sem
-            if 'gt_segm' in data and data['gt_segm'] is not None:
-                gt_segm = data['gt_segm']
-                padding_segm = np.zeros(
-                    (gt_segm.shape[0], max_shape[1], max_shape[2]),
-                    dtype=np.uint8)
-                padding_segm[:, :im_h, :im_w] = gt_segm
-                data['gt_segm'] = padding_segm
-        if self.pad_gt:
-            gt_num = []
-            if 'gt_poly' in data and data['gt_poly'] is not None and len(data[
-                    'gt_poly']) > 0:
-                pad_mask = True
-            else:
-                pad_mask = False
-            if pad_mask:
-                poly_num = []
-                poly_part_num = []
-                point_num = []
-            for data in samples:
-                gt_num.append(data['gt_bbox'].shape[0])
-                if pad_mask:
-                    poly_num.append(len(data['gt_poly']))
-                    for poly in data['gt_poly']:
-                        poly_part_num.append(int(len(poly)))
-                        for p_p in poly:
-                            point_num.append(int(len(p_p) / 2))
-            gt_num_max = max(gt_num)
-            for i, sample in enumerate(samples):
-                assert 'gt_rbox' in sample
-                assert 'gt_rbox2poly' in sample
-                gt_box_data = -np.ones([gt_num_max, 4], dtype=np.float32)
-                gt_class_data = -np.ones([gt_num_max], dtype=np.int32)
-                is_crowd_data = np.ones([gt_num_max], dtype=np.int32)
-                if pad_mask:
-                    poly_num_max = max(poly_num)
-                    poly_part_num_max = max(poly_part_num)
-                    point_num_max = max(point_num)
-                    gt_masks_data = -np.ones(
-                        [poly_num_max, poly_part_num_max, point_num_max, 2],
-                        dtype=np.float32)
-                gt_num = sample['gt_bbox'].shape[0]
-                gt_box_data[0:gt_num, :] = sample['gt_bbox']
-                gt_class_data[0:gt_num] = np.squeeze(sample['gt_class'])
-                is_crowd_data[0:gt_num] = np.squeeze(sample['is_crowd'])
-                if pad_mask:
-                    for j, poly in enumerate(sample['gt_poly']):
-                        for k, p_p in enumerate(poly):
-                            pp_np = np.array(p_p).reshape(-1, 2)
-                            gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
-                    sample['gt_poly'] = gt_masks_data
-                sample['gt_bbox'] = gt_box_data
-                sample['gt_class'] = gt_class_data
-                sample['is_crowd'] = is_crowd_data
-                # ploy to rbox
-                polys = sample['gt_rbox2poly']
-                rbox = bbox_utils.poly_to_rbox(polys)
-                sample['gt_rbox'] = rbox
-        return samples
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -2007,7 +2007,7 @@ class Rbox2Poly(BaseOperator):
        x2 = x_ctr + width / 2.0
        y2 = y_ctr + height / 2.0
        sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
-        polys = bbox_utils.rbox2poly(rrects)
+        polys = bbox_utils.rbox2poly_np(rrects)
        sample['gt_rbox2poly'] = polys
        return sample

--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -376,7 +376,8 @@ class Trainer(object):
        imid2path = self.dataset.get_imid2path()
        anno_file = self.dataset.get_anno()
-        clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
+        clsid2catid, catid2name = get_categories(
+            self.cfg.metric, anno_file=anno_file)
        # Run Infer 
        self.status['mode'] = 'test'

--- a/ppdet/modeling/bbox_utils.py
+++ b/ppdet/modeling/bbox_utils.py
@@ -263,146 +263,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
        return iou
-def rect2rbox(bboxes):
+def poly2rbox(polys):
-    """
-    :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
-    :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
-    """
-    bboxes = bboxes.reshape(-1, 4)
-    num_boxes = bboxes.shape[0]
-    x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
-    y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
-    edges1 = np.abs(bboxes[:, 2] - bboxes[:, 0])
-    edges2 = np.abs(bboxes[:, 3] - bboxes[:, 1])
-    angles = np.zeros([num_boxes], dtype=bboxes.dtype)
-    inds = edges1 < edges2
-    rboxes = np.stack((x_ctr, y_ctr, edges1, edges2, angles), axis=1)
-    rboxes[inds, 2] = edges2[inds]
-    rboxes[inds, 3] = edges1[inds]
-    rboxes[inds, 4] = np.pi / 2.0
-    return rboxes
-def delta2rbox(Rrois,
-               deltas,
-               means=[0, 0, 0, 0, 0],
-               stds=[1, 1, 1, 1, 1],
-               wh_ratio_clip=1e-6):
-    """
-    :param Rrois: (cx, cy, w, h, theta)
-    :param deltas: (dx, dy, dw, dh, dtheta)
-    :param means:
-    :param stds:
-    :param wh_ratio_clip:
-    :return:
-    """
-    deltas = paddle.reshape(deltas, [-1, deltas.shape[-1]])
-    denorm_deltas = deltas * stds + means
-    dx = denorm_deltas[:, 0]
-    dy = denorm_deltas[:, 1]
-    dw = denorm_deltas[:, 2]
-    dh = denorm_deltas[:, 3]
-    dangle = denorm_deltas[:, 4]
-    max_ratio = np.abs(np.log(wh_ratio_clip))
-    dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
-    dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
-    Rroi_x = Rrois[:, 0]
-    Rroi_y = Rrois[:, 1]
-    Rroi_w = Rrois[:, 2]
-    Rroi_h = Rrois[:, 3]
-    Rroi_angle = Rrois[:, 4]
-    gx = dx * Rroi_w * paddle.cos(Rroi_angle) - dy * Rroi_h * paddle.sin(
-        Rroi_angle) + Rroi_x
-    gy = dx * Rroi_w * paddle.sin(Rroi_angle) + dy * Rroi_h * paddle.cos(
-        Rroi_angle) + Rroi_y
-    gw = Rroi_w * dw.exp()
-    gh = Rroi_h * dh.exp()
-    ga = np.pi * dangle + Rroi_angle
-    ga = (ga + np.pi / 4) % np.pi - np.pi / 4
-    ga = paddle.to_tensor(ga)
-    gw = paddle.to_tensor(gw, dtype='float32')
-    gh = paddle.to_tensor(gh, dtype='float32')
-    bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
-    return bboxes
-def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
-    """
-    Args:
-        proposals:
-        gt:
-        means: 1x5
-        stds: 1x5
-    Returns:
-    """
-    proposals = proposals.astype(np.float64)
-    PI = np.pi
-    gt_widths = gt[..., 2]
-    gt_heights = gt[..., 3]
-    gt_angle = gt[..., 4]
-    proposals_widths = proposals[..., 2]
-    proposals_heights = proposals[..., 3]
-    proposals_angle = proposals[..., 4]
-    coord = gt[..., 0:2] - proposals[..., 0:2]
-    dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
-          * coord[..., 1]) / proposals_widths
-    dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
-          * coord[..., 1]) / proposals_heights
-    dw = np.log(gt_widths / proposals_widths)
-    dh = np.log(gt_heights / proposals_heights)
-    da = (gt_angle - proposals_angle)
-    da = (da + PI / 4) % PI - PI / 4
-    da /= PI
-    deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
-    means = np.array(means, dtype=deltas.dtype)
-    stds = np.array(stds, dtype=deltas.dtype)
-    deltas = (deltas - means) / stds
-    deltas = deltas.astype(np.float32)
-    return deltas
-def bbox_decode(bbox_preds,
-                anchors,
-                means=[0, 0, 0, 0, 0],
-                stds=[1, 1, 1, 1, 1]):
-    """decode bbox from deltas
-    Args:
-        bbox_preds: [N,H,W,5]
-        anchors: [H*W,5]
-    return:
-        bboxes: [N,H,W,5]
-    """
-    num_imgs, H, W, _ = bbox_preds.shape
-    bboxes_list = []
-    for img_id in range(num_imgs):
-        bbox_pred = bbox_preds[img_id]
-        # bbox_pred.shape=[5,H,W]
-        bbox_delta = bbox_pred
-        bboxes = delta2rbox(
-            anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6)
-        bboxes = paddle.reshape(bboxes, [H, W, 5])
-        bboxes_list.append(bboxes)
-    return paddle.stack(bboxes_list, axis=0)
-def poly_to_rbox(polys):
    """
    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
    to
@@ -479,37 +340,16 @@ def get_best_begin_point_single(coordinate):
    return np.array(combinate[force_flag]).reshape(8)
-def rbox2poly_single(rrect):
+def rbox2poly_np(rrects):
-    """
-    rrect:[x_ctr,y_ctr,w,h,angle]
-    to
-    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
-    """
-    x_ctr, y_ctr, width, height, angle = rrect[:5]
-    tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
-    # rect 2x4
-    rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
-    R = np.array([[np.cos(angle), -np.sin(angle)],
-                  [np.sin(angle), np.cos(angle)]])
-    # poly
-    poly = R.dot(rect)
-    x0, x1, x2, x3 = poly[0, :4] + x_ctr
-    y0, y1, y2, y3 = poly[1, :4] + y_ctr
-    poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
-    poly = get_best_begin_point_single(poly)
-    return poly
-def rbox2poly(rrects):
    """
    rrect:[x_ctr,y_ctr,w,h,angle]
    to
    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
    """
    polys = []
-    rrects = rrects.numpy()
    for i in range(rrects.shape[0]):
        rrect = rrects[i]
+        # x_ctr, y_ctr, width, height, angle = rrect[:5]
        x_ctr = rrect[0]
        y_ctr = rrect[1]
        width = rrect[2]
@@ -529,13 +369,13 @@ def rbox2poly(rrects):
    return polys
-def pd_rbox2poly(rrects):
+def rbox2poly(rrects):
    """
    rrect:[x_ctr,y_ctr,w,h,angle]
    to
    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
    """
-    N = rrects.shape[0]
+    N = paddle.shape(rrects)[0]
    x_ctr = rrects[:, 0]
    y_ctr = rrects[:, 1]
@@ -561,14 +401,10 @@ def pd_rbox2poly(rrects):
    polys = paddle.transpose(polys, [2, 1, 0])
    polys = paddle.reshape(polys, [-1, N])
    polys = paddle.transpose(polys, [1, 0])
-    polys[:, 0] += x_ctr
-    polys[:, 2] += x_ctr
+    tmp = paddle.stack(
-    polys[:, 4] += x_ctr
+        [x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
-    polys[:, 6] += x_ctr
+    polys = polys + tmp
-    polys[:, 1] += y_ctr
-    polys[:, 3] += y_ctr
-    polys[:, 5] += y_ctr
-    polys[:, 7] += y_ctr
    return polys

--- a/ppdet/modeling/heads/s2anet_head.py
+++ b/ppdet/modeling/heads/s2anet_head.py
--- a/ppdet/modeling/post_process.py
+++ b/ppdet/modeling/post_process.py
@@ -17,7 +17,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register
-from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, pd_rbox2poly
+from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, rbox2poly
 try:
    from collections.abc import Sequence
 except Exception:
@@ -214,7 +214,7 @@ class FCOSPostProcess(object):
 @register
-class S2ANetBBoxPostProcess(object):
+class S2ANetBBoxPostProcess(nn.Layer):
    __shared__ = ['num_classes']
    __inject__ = ['nms']
@@ -225,41 +225,43 @@ class S2ANetBBoxPostProcess(object):
        self.min_bbox_size = min_bbox_size
        self.nms = nms
        self.origin_shape_list = []
+        self.fake_pred_cls_score_bbox = paddle.to_tensor(
+            np.array(
+                [[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
+                dtype='float32'))
+        self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
-    def __call__(self, pred_scores, pred_bboxes):
+    def forward(self, pred_scores, pred_bboxes):
        """
        pred_scores : [N, M]  score
        pred_bboxes : [N, 5]  xc, yc, w, h, a
        im_shape : [N, 2]  im_shape
        scale_factor : [N, 2]  scale_factor
        """
-        pred_ploys = pd_rbox2poly(pred_bboxes)
+        pred_ploys0 = rbox2poly(pred_bboxes)
-        pred_ploys = paddle.reshape(
+        pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
-            pred_ploys, [1, pred_ploys.shape[0], pred_ploys.shape[1]])
-        pred_scores = paddle.to_tensor(pred_scores)
        # pred_scores [NA, 16] --> [16, NA]
-        pred_scores = paddle.transpose(pred_scores, [1, 0])
+        pred_scores0 = paddle.transpose(pred_scores, [1, 0])
-        pred_scores = paddle.reshape(
+        pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
-            pred_scores, [1, pred_scores.shape[0], pred_scores.shape[1]])
        pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
                                                    self.num_classes)
        # Prevent empty bbox_pred from decode or NMS.
        # Bboxes and score before NMS may be empty due to the score threshold.
-        if pred_cls_score_bbox.shape[0] == 0:
+        if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
-            pred_cls_score_bbox = paddle.to_tensor(
+                1] <= 1:
-                np.array(
+            pred_cls_score_bbox = self.fake_pred_cls_score_bbox
-                    [[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
+            bbox_num = self.fake_bbox_num
-            bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
+        pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
+        assert pred_cls_score_bbox.shape[1] == 10
        return pred_cls_score_bbox, bbox_num
    def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
        """
        Rescale, clip and filter the bbox from the output of NMS to
        get final prediction.
        Args:
            bboxes(Tensor): bboxes [N, 10]
            bbox_num(Tensor): bbox_num
@@ -270,6 +272,7 @@ class S2ANetBBoxPostProcess(object):
                               including labels, scores and bboxes. The size of
                               bboxes are corresponding to the original image.
        """
+        assert bboxes.shape[1] == 10
        origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
        origin_shape_list = []
@@ -292,7 +295,7 @@ class S2ANetBBoxPostProcess(object):
        # bboxes: [N, 10], label, score, bbox
        pred_label_score = bboxes[:, 0:2]
-        pred_bbox = bboxes[:, 2:10:1]
+        pred_bbox = bboxes[:, 2:]
        # rescale bbox to original image
        scaled_bbox = pred_bbox / scale_factor_list

--- a/ppdet/modeling/proposal_generator/target_layer.py
+++ b/ppdet/modeling/proposal_generator/target_layer.py
@@ -16,7 +16,6 @@ import paddle
 from ppdet.core.workspace import register, serializable
 from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target, libra_generate_proposal_target
-from ppdet.modeling import bbox_utils
 import numpy as np
@@ -283,13 +282,58 @@ class RBoxAssigner(object):
        """
        if anchors.ndim == 3:
-            anchors = anchors.reshape(-1, anchor.shape[-1])
+            anchors = anchors.reshape(-1, anchors.shape[-1])
        assert anchors.ndim == 2
        anchor_num = anchors.shape[0]
        anchor_valid = np.ones((anchor_num), np.uint8)
        anchor_inds = np.arange(anchor_num)
        return anchor_inds
+    def rbox2delta(self,
+                   proposals,
+                   gt,
+                   means=[0, 0, 0, 0, 0],
+                   stds=[1, 1, 1, 1, 1]):
+        """
+        Args:
+            proposals: tensor [N, 5]
+            gt: gt [N, 5]
+            means: means [5]
+            stds: stds [5]
+        Returns:
+        """
+        proposals = proposals.astype(np.float64)
+        PI = np.pi
+        gt_widths = gt[..., 2]
+        gt_heights = gt[..., 3]
+        gt_angle = gt[..., 4]
+        proposals_widths = proposals[..., 2]
+        proposals_heights = proposals[..., 3]
+        proposals_angle = proposals[..., 4]
+        coord = gt[..., 0:2] - proposals[..., 0:2]
+        dx = (np.cos(proposals[..., 4]) * coord[..., 0] +
+              np.sin(proposals[..., 4]) * coord[..., 1]) / proposals_widths
+        dy = (-np.sin(proposals[..., 4]) * coord[..., 0] +
+              np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights
+        dw = np.log(gt_widths / proposals_widths)
+        dh = np.log(gt_heights / proposals_heights)
+        da = (gt_angle - proposals_angle)
+        da = (da + PI / 4) % PI - PI / 4
+        da /= PI
+        deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
+        means = np.array(means, dtype=deltas.dtype)
+        stds = np.array(stds, dtype=deltas.dtype)
+        deltas = (deltas - means) / stds
+        deltas = deltas.astype(np.float32)
+        return deltas
    def assign_anchor(self,
                      anchors,
                      gt_bboxes,
@@ -405,8 +449,8 @@ class RBoxAssigner(object):
        #print('ancho target pos_inds', pos_inds, len(pos_inds))
        pos_sampled_gt_boxes = gt_bboxes[anchor_gt_bbox_inds[pos_inds]]
        if len(pos_inds) > 0:
-            pos_bbox_targets = bbox_utils.rbox2delta(pos_sampled_anchors,
+            pos_bbox_targets = self.rbox2delta(pos_sampled_anchors,
-                                                     pos_sampled_gt_boxes)
+                                               pos_sampled_gt_boxes)
            bbox_targets[pos_inds, :] = pos_bbox_targets
            bbox_weights[pos_inds, :] = 1.0