refactor s2anet (#6604)

* refactor s2anet to support batch_size > 1 * fix problem of inference * support batch_size > 1 for training * fix empty results * fix dota eval * fix configs of s2anet_head * modify s2anet_spine_1x to 73 mAP

refactor s2anet (#6604)
* refactor s2anet to support batch_size > 1 * fix problem of inference * support batch_size > 1 for training * fix empty results * fix dota eval * fix configs of s2anet_head * modify s2anet_spine_1x to 73 mAP
b4727677 · wangxinxin08 · GitHub · 42a4d707 · b4727677 · b4727677
21 changed file
--- a/configs/datasets/dota.yml
+++ b/configs/datasets/dota.yml
@@ -13,6 +13,7 @@ EvalDataset:
    image_dir: trainval_split/images
    anno_path: trainval_split/s2anet_trainval_paddle_coco.json
    dataset_dir: dataset/DOTA_1024_s2anet/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
 TestDataset:
  !ImageFolder

--- a/configs/datasets/spine_coco.yml
+++ b/configs/datasets/spine_coco.yml
@@ -13,6 +13,7 @@ EvalDataset:
    image_dir: images
    anno_path: annotations/valid.json
    dataset_dir: dataset/spine_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
 TestDataset:
  !ImageFolder

--- a/configs/dota/_base_/s2anet.yml
+++ b/configs/dota/_base_/s2anet.yml
@@ -7,8 +7,7 @@ weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams
 S2ANet:
  backbone: ResNet
  neck: FPN
-  s2anet_head: S2ANetHead
+  head: S2ANetHead
-  s2anet_bbox_post_process: S2ANetBBoxPostProcess
 ResNet:
  depth: 50
@@ -33,23 +32,21 @@ S2ANetHead:
  stacked_convs: 2
  feat_in: 256
  feat_out: 256
-  num_classes: 15
  align_conv_type: 'AlignConv'  # AlignConv Conv
  align_conv_size: 3
  use_sigmoid_cls: True
+  reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
-RBoxAssigner:
+  cls_loss_weight: [1.1, 1.05]
-  pos_iou_thr: 0.5
-  neg_iou_thr: 0.4
-  min_iou_thr: 0.0
-  ignore_iof_thr: -2
-S2ANetBBoxPostProcess:
  nms_pre: 2000
-  min_bbox_size: 0.0
  nms:
    name: MultiClassNMS
    keep_top_k: -1
    score_threshold: 0.05
    nms_threshold: 0.1
    normalized: False
+RBoxAssigner:
+  pos_iou_thr: 0.5
+  neg_iou_thr: 0.4
+  min_iou_thr: 0.0
+  ignore_iof_thr: -2
--- a/configs/dota/_base_/s2anet_reader.yml
+++ b/configs/dota/_base_/s2anet_reader.yml
-worker_num: 0
+worker_num: 4
 TrainReader:
  sample_transforms:
-  - Decode: {}
+    - Decode: {}
-  - Rbox2Poly: {}
+    - Rbox2Poly: {}
-  # Resize can process rbox
+    - RandomRFlip: {}
-  - Resize: {target_size: [1024, 1024], interp: 2, keep_ratio: False}
+    - RResize: {target_size: [1024, 1024], keep_ratio: True, interp: 2}
-  - RandomFlip: {prob: 0.5}
+    - Poly2RBox: {rbox_type: 'le135'}
-  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
-  - Permute: {}
  batch_transforms:
-  - PadBatch: {pad_to_stride: 32}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
-  batch_size: 1
+    - Permute: {}
+    - PadRGT: {}
+    - PadBatch: {pad_to_stride: 32}
+  batch_size: 2
  shuffle: true
  drop_last: true
 EvalReader:
  sample_transforms:
-  - Decode: {}
+    - Decode: {}
-  - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
+    - RResize: {target_size: [1024, 1024], keep_ratio: True, interp: 2}
-  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
-  - Permute: {}
+    - Permute: {}
  batch_transforms:
-  - PadBatch: {pad_to_stride: 32}
+    - PadBatch: {pad_to_stride: 32}
-  batch_size: 1
+  batch_size: 2
  shuffle: false
  drop_last: false
+  collate_batch: false
 TestReader:
  sample_transforms:
-  - Decode: {}
+    - Decode: {}
-  - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
+    - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
-  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+    - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
-  - Permute: {}
+    - Permute: {}
  batch_transforms:
-  - PadBatch: {pad_to_stride: 32}
+    - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: false
  drop_last: false
--- a/configs/dota/s2anet_1x_spine.yml
+++ b/configs/dota/s2anet_1x_spine.yml
@@ -7,23 +7,19 @@ _BASE_: [
 ]
 weights: output/s2anet_1x_spine/model_final
+pretrain_weights: https://paddledet.bj.bcebos.com/models/s2anet_alignconv_2x_dota.pdparams
 # for 8 card
 LearningRate:
  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [7, 10]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    epochs: 5
 S2ANetHead:
-  anchor_strides: [8, 16, 32, 64, 128]
-  anchor_scales: [4]
-  anchor_ratios: [1.0]
-  anchor_assign: RBoxAssigner
-  stacked_convs: 2
-  feat_in: 256
-  feat_out: 256
-  num_classes: 9
-  align_conv_type: 'AlignConv'  # AlignConv Conv
-  align_conv_size: 3
-  use_sigmoid_cls: True
  reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.05]
  cls_loss_weight: [1.05, 1.0]
-  reg_loss_type: 'l1'
--- a/configs/dota/s2anet_alignconv_2x_dota.yml
+++ b/configs/dota/s2anet_alignconv_2x_dota.yml
@@ -8,19 +8,3 @@ _BASE_: [
 pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
 weights: output/s2anet_alignconv_2x_dota/model_final
-S2ANetHead:
-  anchor_strides: [8, 16, 32, 64, 128]
-  anchor_scales: [4]
-  anchor_ratios: [1.0]
-  anchor_assign: RBoxAssigner
-  stacked_convs: 2
-  feat_in: 256
-  feat_out: 256
-  num_classes: 15
-  align_conv_type: 'AlignConv'  # AlignConv Conv
-  align_conv_size: 3
-  use_sigmoid_cls: True
-  reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
-  cls_loss_weight: [1.1, 1.05]
-  reg_loss_type: 'l1'
--- a/configs/dota/s2anet_conv_2x_dota.yml
+++ b/configs/dota/s2anet_conv_2x_dota.yml
@@ -16,16 +16,4 @@ ResNet:
  num_stages: 4
 S2ANetHead:
-  anchor_strides: [8, 16, 32, 64, 128]
+  align_conv_type: 'Conv'
-  anchor_scales: [4]
-  anchor_ratios: [1.0]
-  anchor_assign: RBoxAssigner
-  stacked_convs: 2
-  feat_in: 256
-  feat_out: 256
-  num_classes: 15
-  align_conv_type: 'Conv'  # AlignConv Conv
-  align_conv_size: 3
-  use_sigmoid_cls: True
-  reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
-  cls_loss_weight: [1.1, 1.05]
--- a/ppdet/data/source/coco.py
+++ b/ppdet/data/source/coco.py
@@ -180,7 +180,6 @@ class COCODataSet(DetDataset):
                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
                if is_rbox_anno:
                    gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
-                gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
                gt_poly = [None] * num_bbox

--- a/ppdet/data/transform/__init__.py
+++ b/ppdet/data/transform/__init__.py
@@ -16,11 +16,13 @@ from . import operators
 from . import batch_operators
 from . import keypoint_operators
 from . import mot_operators
+from . import rotated_operators
 from .operators import *
 from .batch_operators import *
 from .keypoint_operators import *
 from .mot_operators import *
+from .rotated_operators import *
 __all__ = []
 __all__ += registered_ops

--- a/ppdet/data/transform/batch_operators.py
+++ b/ppdet/data/transform/batch_operators.py
@@ -48,6 +48,7 @@ __all__ = [
    'Gt2GFLTarget',
    'Gt2CenterNetTarget',
    'PadGT',
+    'PadRGT',
 ]
@@ -109,12 +110,6 @@ class PadBatch(BaseOperator):
                padding_segm[:, :im_h, :im_w] = gt_segm
                data['gt_segm'] = padding_segm
-            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
-                # ploy to rbox
-                polys = data['gt_rbox2poly']
-                rbox = bbox_utils.poly2rbox(polys)
-                data['gt_rbox'] = rbox
        return samples
@@ -981,12 +976,6 @@ class PadMaskBatch(BaseOperator):
                padding_mask[:im_h, :im_w] = 1.
                data['pad_mask'] = padding_mask
-            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
-                # ploy to rbox
-                polys = data['gt_rbox2poly']
-                rbox = bbox_utils.poly2rbox(polys)
-                data['gt_rbox'] = rbox
        return samples
@@ -1122,3 +1111,57 @@ class PadGT(BaseOperator):
                    pad_diff[:num_gt] = sample['difficult']
                sample['difficult'] = pad_diff
        return samples
+@register_op
+class PadRGT(BaseOperator):
+    """
+    Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
+    The num_max_boxes is the largest for batch.
+    Args:
+        return_gt_mask (bool): If true, return `pad_gt_mask`,
+                                1 means bbox, 0 means no bbox.
+    """
+    def __init__(self, return_gt_mask=True):
+        super(PadRGT, self).__init__()
+        self.return_gt_mask = return_gt_mask
+    def pad_field(self, sample, field, num_gt):
+        name, shape, dtype = field
+        if name in sample:
+            pad_v = np.zeros(shape, dtype=dtype)
+            if num_gt > 0:
+                pad_v[:num_gt] = sample[name]
+            sample[name] = pad_v
+    def __call__(self, samples, context=None):
+        num_max_boxes = max([len(s['gt_bbox']) for s in samples])
+        for sample in samples:
+            if self.return_gt_mask:
+                sample['pad_gt_mask'] = np.zeros(
+                    (num_max_boxes, 1), dtype=np.float32)
+            if num_max_boxes == 0:
+                continue
+            num_gt = len(sample['gt_bbox'])
+            pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
+            pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
+            if num_gt > 0:
+                pad_gt_class[:num_gt] = sample['gt_class']
+                pad_gt_bbox[:num_gt] = sample['gt_bbox']
+            sample['gt_class'] = pad_gt_class
+            sample['gt_bbox'] = pad_gt_bbox
+            # pad_gt_mask
+            if 'pad_gt_mask' in sample:
+                sample['pad_gt_mask'][:num_gt] = 1
+            # gt_score
+            names = ['gt_score', 'is_crowd', 'difficult', 'gt_poly', 'gt_rbox']
+            dims = [1, 1, 1, 8, 5]
+            dtypes = [np.float32, np.int32, np.int32, np.float32, np.float32]
+            for name, dim, dtype in zip(names, dims, dtypes):
+                self.pad_field(sample, [name, (num_max_boxes, dim), dtype],
+                               num_gt)
+        return samples
--- a/ppdet/data/transform/op_helper.py
+++ b/ppdet/data/transform/op_helper.py
@@ -492,3 +492,72 @@ def get_border(border, size):
    while size - border // i <= border // i:
        i *= 2
    return border // i
+def norm_angle(angle, range=[-np.pi / 4, np.pi]):
+    return (angle - range[0]) % range[1] + range[0]
+def poly2rbox_le135(poly):
+    """convert poly to rbox [-pi / 4, 3 * pi / 4]
+    Args:
+        poly: [x1, y1, x2, y2, x3, y3, x4, y4]
+    Returns:
+        rbox: [cx, cy, w, h, angle]
+    """
+    poly = np.array(poly[:8], dtype=np.float32)
+    pt1 = (poly[0], poly[1])
+    pt2 = (poly[2], poly[3])
+    pt3 = (poly[4], poly[5])
+    pt4 = (poly[6], poly[7])
+    edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[1]) *
+                    (pt1[1] - pt2[1]))
+    edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[1]) *
+                    (pt2[1] - pt3[1]))
+    width = max(edge1, edge2)
+    height = min(edge1, edge2)
+    rbox_angle = 0
+    if edge1 > edge2:
+        rbox_angle = np.arctan2(float(pt2[1] - pt1[1]), float(pt2[0] - pt1[0]))
+    elif edge2 >= edge1:
+        rbox_angle = np.arctan2(float(pt4[1] - pt1[1]), float(pt4[0] - pt1[0]))
+    rbox_angle = norm_angle(rbox_angle)
+    x_ctr = float(pt1[0] + pt3[0]) / 2
+    y_ctr = float(pt1[1] + pt3[1]) / 2
+    return x_ctr, y_ctr, width, height, rbox_angle
+def poly2rbox_oc(poly):
+    """convert poly to rbox (0, pi / 2]
+    Args:
+        poly: [x1, y1, x2, y2, x3, y3, x4, y4]
+    Returns:
+        rbox: [cx, cy, w, h, angle]
+    """
+    points = np.array(poly, dtype=np.float32).reshape((-1, 2))
+    (cx, cy), (w, h), angle = cv2.minAreaRect(points)
+    # using the new OpenCV Rotated BBox definition since 4.5.1
+    # if angle < 0, opencv is older than 4.5.1, angle is in [-90, 0)
+    if angle < 0:
+        angle += 90
+        w, h = h, w
+    # convert angle to [0, 90)
+    if angle == -0.0:
+        angle = 0.0
+    if angle == 90.0:
+        angle = 0.0
+        w, h = h, w
+    angle = angle / 180 * np.pi
+    return cx, cy, w, h, angle
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -41,7 +41,6 @@ import threading
 MUTEX = threading.Lock()
 from ppdet.core.workspace import serializable
-from ppdet.modeling import bbox_utils
 from ..reader import Compose
 from .op_helper import (satisfy_sample_constraint, filter_and_process,
@@ -657,18 +656,6 @@ class RandomFlip(BaseOperator):
        bbox[:, 2] = width - oldx1
        return bbox
-    def apply_rbox(self, bbox, width):
-        oldx1 = bbox[:, 0].copy()
-        oldx2 = bbox[:, 2].copy()
-        oldx3 = bbox[:, 4].copy()
-        oldx4 = bbox[:, 6].copy()
-        bbox[:, 0] = width - oldx1
-        bbox[:, 2] = width - oldx2
-        bbox[:, 4] = width - oldx3
-        bbox[:, 6] = width - oldx4
-        bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
-        return bbox
    def apply(self, sample, context=None):
        """Filp the image and bounding box.
        Operators:
@@ -700,10 +687,6 @@ class RandomFlip(BaseOperator):
            if 'gt_segm' in sample and sample['gt_segm'].any():
                sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
-            if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
-                sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
-                                                         width)
            sample['flipped'] = True
            sample['image'] = im
        return sample
@@ -841,16 +824,6 @@ class Resize(BaseOperator):
                                                [im_scale_x, im_scale_y],
                                                [resize_w, resize_h])
-        # apply rbox
-        if 'gt_rbox2poly' in sample:
-            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
-                logger.warning(
-                    "gt_rbox2poly's length shoule be 8, but actually is {}".
-                    format(len(sample['gt_rbox2poly'])))
-            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
-                                                     [im_scale_x, im_scale_y],
-                                                     [resize_w, resize_h])
        # apply polygon
        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
            sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
@@ -2111,33 +2084,6 @@ class Poly2Mask(BaseOperator):
        return sample
-@register_op
-class Rbox2Poly(BaseOperator):
-    """
-    Convert rbbox format to poly format.
-    """
-    def __init__(self):
-        super(Rbox2Poly, self).__init__()
-    def apply(self, sample, context=None):
-        assert 'gt_rbox' in sample
-        assert sample['gt_rbox'].shape[1] == 5
-        rrects = sample['gt_rbox']
-        x_ctr = rrects[:, 0]
-        y_ctr = rrects[:, 1]
-        width = rrects[:, 2]
-        height = rrects[:, 3]
-        x1 = x_ctr - width / 2.0
-        y1 = y_ctr - height / 2.0
-        x2 = x_ctr + width / 2.0
-        y2 = y_ctr + height / 2.0
-        sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
-        polys = bbox_utils.rbox2poly_np(rrects)
-        sample['gt_rbox2poly'] = polys
-        return sample
 @register_op
 class AugmentHSV(BaseOperator):
    """ 
@@ -2456,16 +2402,6 @@ class RandomResizeCrop(BaseOperator):
                                                [im_scale_x, im_scale_y],
                                                [resize_w, resize_h])
-        # apply rbox
-        if 'gt_rbox2poly' in sample:
-            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
-                logger.warn(
-                    "gt_rbox2poly's length shoule be 8, but actually is {}".
-                    format(len(sample['gt_rbox2poly'])))
-            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
-                                                     [im_scale_x, im_scale_y],
-                                                     [resize_w, resize_h])
        # apply polygon
        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
            sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],

--- a/ppdet/data/transform/rotated_operators.py
+++ b/ppdet/data/transform/rotated_operators.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from numbers import Number, Integral
+import cv2
+import numpy as np
+import math
+import copy
+from .operators import register_op, BaseOperator
+from .op_helper import poly2rbox_le135, poly2rbox_oc
+from ppdet.modeling import bbox_utils
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+@register_op
+class RRotate(BaseOperator):
+    """ Rotate Image, Polygon, Box
+    Args:
+        scale (float): rotate scale
+        angle (float): rotate angle
+        fill_value (int, tuple): fill color
+        auto_bound (bool): whether auto bound or not
+    """
+    def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
+        super(RRotate, self).__init__()
+        self.scale = scale
+        self.angle = angle
+        self.fill_value = fill_value
+        self.auto_bound = auto_bound
+    def get_rotated_matrix(self, angle, scale, h, w):
+        center = ((w - 1) * 0.5, (h - 1) * 0.5)
+        matrix = cv2.getRotationMatrix2D(center, -angle, scale)
+        # calculate the new size
+        cos = np.abs(matrix[0, 0])
+        sin = np.abs(matrix[0, 1])
+        new_w = h * sin + w * cos
+        new_h = h * cos + w * sin
+        # calculate offset
+        n_w = int(np.round(new_w))
+        n_h = int(np.round(new_h))
+        if self.auto_bound:
+            ratio = min(w / n_w, h / n_h)
+            matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
+        else:
+            matrix[0, 2] += (new_w - w) * 0.5
+            matrix[1, 2] += (new_h - h) * 0.5
+            w = n_w
+            h = n_h
+        return matrix, h, w
+    def get_rect_from_pts(self, pts, h, w):
+        """ get minimum rectangle of points
+        """
+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+        min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
+                                                            axis=1)
+        max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
+                                                            axis=1)
+        min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
+        max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
+        boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
+        return boxes
+    def apply_image(self, image, matrix, h, w):
+        return cv2.warpAffine(
+            image, matrix, (w, h), borderValue=self.fill_value)
+    def apply_pts(self, pts, matrix, h, w):
+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+        # n is number of samples and m is two times the number of points due to (x, y)
+        _, m = pts.shape
+        # transpose points
+        pts_ = pts.reshape(-1, 2).T
+        # pad 1 to convert the points to homogeneous coordinates
+        padding = np.ones((1, pts_.shape[1]), pts.dtype)
+        rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
+        return rotated_pts[:2, :].T.reshape(-1, m)
+    def apply(self, sample, context=None):
+        image = sample['image']
+        h, w = image.shape[:2]
+        matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
+        sample['image'] = self.apply_image(image, matrix, h, w)
+        polys = sample['gt_poly']
+        # TODO: segment or keypoint to be processed 
+        if len(polys) > 0:
+            pts = self.apply_pts(polys, matrix, h, w)
+            sample['gt_poly'] = pts
+            sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
+        return sample
+@register_op
+class RandomRRotate(BaseOperator):
+    """ Random Rotate Image
+    Args:
+        scale (float, tuple, list): rotate scale
+        scale_mode (str): mode of scale, [range, value, None]
+        angle (float, tuple, list): rotate angle
+        angle_mode (str): mode of angle, [range, value, None]
+        fill_value (float, tuple, list): fill value
+        rotate_prob (float): probability of rotation
+        auto_bound (bool): whether auto bound or not
+    """
+    def __init__(self,
+                 scale=1.0,
+                 scale_mode=None,
+                 angle=0.,
+                 angle_mode=None,
+                 fill_value=0.,
+                 rotate_prob=1.0,
+                 auto_bound=True):
+        super(RandomRRotate, self).__init__()
+        self.scale = scale
+        self.scale_mode = scale_mode
+        self.angle = angle
+        self.angle_mode = angle_mode
+        self.fill_value = fill_value
+        self.rotate_prob = rotate_prob
+        self.auto_bound = auto_bound
+    def get_angle(self, angle, angle_mode):
+        assert not angle_mode or angle_mode in [
+            'range', 'value'
+        ], 'angle mode should be in [range, value, None]'
+        if not angle_mode:
+            return angle
+        elif angle_mode == 'range':
+            low, high = angle
+            return np.random.rand() * (high - low) + low
+        elif angle_mode == 'value':
+            return np.random.choice(angle)
+    def get_scale(self, scale, scale_mode):
+        assert not scale_mode or scale_mode in [
+            'range', 'value'
+        ], 'scale mode should be in [range, value, None]'
+        if not scale_mode:
+            return scale
+        elif scale_mode == 'range':
+            low, high = scale
+            return np.random.rand() * (high - low) + low
+        elif scale_mode == 'value':
+            return np.random.choice(scale)
+    def apply(self, sample, context=None):
+        if np.random.rand() > self.rotate_prob:
+            return sample
+        angle = self.get_angle(self.angle, self.angle_mode)
+        scale = self.get_scale(self.scale, self.scale_mode)
+        rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
+        return rotator(sample)
+@register_op
+class Poly2RBox(BaseOperator):
+    """ Polygon to Rotated Box, using new OpenCV definition since 4.5.1
+    Args:
+        filter_threshold (int, float): threshold to filter annotations
+        filter_mode (str): filter mode, ['area', 'edge']
+        rbox_type (str): rbox type, ['le135', 'oc']
+    """
+    def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
+        super(Poly2RBox, self).__init__()
+        self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
+        self.rbox_fn = poly2rbox_le135 if rbox_type == 'le135' else poly2rbox_oc
+    def filter(self, size, threshold, mode):
+        if mode == 'area':
+            if size[0] * size[1] < threshold:
+                return True
+        elif mode == 'edge':
+            if min(size) < threshold:
+                return True
+        return False
+    def get_rbox(self, polys):
+        valid_ids, rboxes, bboxes = [], [], []
+        for i, poly in enumerate(polys):
+            cx, cy, w, h, angle = self.rbox_fn(poly)
+            if self.filter_fn((w, h)):
+                continue
+            rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
+            valid_ids.append(i)
+            xmin, ymin = min(poly[0::2]), min(poly[1::2])
+            xmax, ymax = max(poly[0::2]), max(poly[1::2])
+            bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
+        if len(valid_ids) == 0:
+            rboxes = np.zeros((0, 5), dtype=np.float32)
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+        else:
+            rboxes = np.stack(rboxes)
+            bboxes = np.stack(bboxes)
+        return rboxes, bboxes, valid_ids
+    def apply(self, sample, context=None):
+        rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
+        sample['gt_rbox'] = rboxes
+        sample['gt_bbox'] = bboxes
+        for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
+            if k in sample:
+                sample[k] = sample[k][valid_ids]
+        return sample
+@register_op
+class Poly2Array(BaseOperator):
+    """ convert gt_poly to np.array for rotated bboxes
+    """
+    def __init__(self):
+        super(Poly2Array, self).__init__()
+    def apply(self, sample, context=None):
+        if 'gt_poly' in sample:
+            logger.info('gt_poly shape: {}'.format(sample['gt_poly']))
+            sample['gt_poly'] = np.array(
+                sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
+        return sample
+@register_op
+class RResize(BaseOperator):
+    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
+        """
+        Resize image to target size. if keep_ratio is True, 
+        resize the image's long side to the maximum of target_size
+        if keep_ratio is False, resize the image to target size(h, w)
+        Args:
+            target_size (int|list): image target size
+            keep_ratio (bool): whether keep_ratio or not, default true
+            interp (int): the interpolation method
+        """
+        super(RResize, self).__init__()
+        self.keep_ratio = keep_ratio
+        self.interp = interp
+        if not isinstance(target_size, (Integral, Sequence)):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
+                format(type(target_size)))
+        if isinstance(target_size, Integral):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+    def apply_image(self, image, scale):
+        im_scale_x, im_scale_y = scale
+        return cv2.resize(
+            image,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp)
+    def apply_pts(self, pts, scale, size):
+        im_scale_x, im_scale_y = scale
+        resize_w, resize_h = size
+        pts[:, 0::2] *= im_scale_x
+        pts[:, 1::2] *= im_scale_y
+        pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
+        pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
+        return pts
+    def apply(self, sample, context=None):
+        """ Resize the image numpy.
+        """
+        im = sample['image']
+        if not isinstance(im, np.ndarray):
+            raise TypeError("{}: image type is not numpy.".format(self))
+        if len(im.shape) != 3:
+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
+        # apply image
+        im_shape = im.shape
+        if self.keep_ratio:
+            im_size_min = np.min(im_shape[0:2])
+            im_size_max = np.max(im_shape[0:2])
+            target_size_min = np.min(self.target_size)
+            target_size_max = np.max(self.target_size)
+            im_scale = min(target_size_min / im_size_min,
+                           target_size_max / im_size_max)
+            resize_h = im_scale * float(im_shape[0])
+            resize_w = im_scale * float(im_shape[1])
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = self.target_size
+            im_scale_y = resize_h / im_shape[0]
+            im_scale_x = resize_w / im_shape[1]
+        im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
+        sample['image'] = im.astype(np.float32)
+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
+        if 'scale_factor' in sample:
+            scale_factor = sample['scale_factor']
+            sample['scale_factor'] = np.asarray(
+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
+                dtype=np.float32)
+        else:
+            sample['scale_factor'] = np.asarray(
+                [im_scale_y, im_scale_x], dtype=np.float32)
+        # apply bbox
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
+                                               [im_scale_x, im_scale_y],
+                                               [resize_w, resize_h])
+        # apply polygon
+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+            sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
+                                               [im_scale_x, im_scale_y],
+                                               [resize_w, resize_h])
+        return sample
+@register_op
+class RandomRFlip(BaseOperator):
+    def __init__(self, prob=0.5):
+        """
+        Args:
+            prob (float): the probability of flipping image
+        """
+        super(RandomRFlip, self).__init__()
+        self.prob = prob
+        if not (isinstance(self.prob, float)):
+            raise TypeError("{}: input type is invalid.".format(self))
+    def apply_image(self, image):
+        return image[:, ::-1, :]
+    def apply_pts(self, pts, width):
+        oldx = pts[:, 0::2].copy()
+        pts[:, 0::2] = width - oldx - 1
+        return pts
+    def apply(self, sample, context=None):
+        """Filp the image and bounding box.
+        Operators:
+            1. Flip the image numpy.
+            2. Transform the bboxes' x coordinates.
+              (Must judge whether the coordinates are normalized!)
+            3. Transform the segmentations' x coordinates.
+              (Must judge whether the coordinates are normalized!)
+        Output:
+            sample: the image, bounding box and segmentation part
+                    in sample are flipped.
+        """
+        if np.random.uniform(0, 1) < self.prob:
+            im = sample['image']
+            height, width = im.shape[:2]
+            im = self.apply_image(im)
+            if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+                sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
+            if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+                sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
+            sample['flipped'] = True
+            sample['image'] = im
+        return sample
+@register_op
+class VisibleRBox(BaseOperator):
+    """
+    In debug mode, visualize images according to `gt_box`.
+    (Currently only supported when not cropping and flipping image.)
+    """
+    def __init__(self, output_dir='debug'):
+        super(VisibleRBox, self).__init__()
+        self.output_dir = output_dir
+        if not os.path.isdir(output_dir):
+            os.makedirs(output_dir)
+    def apply(self, sample, context=None):
+        image = Image.fromarray(sample['image'].astype(np.uint8))
+        out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
+        width = sample['w']
+        height = sample['h']
+        # gt_poly = sample['gt_rbox']
+        gt_poly = sample['gt_poly']
+        gt_class = sample['gt_class']
+        draw = ImageDraw.Draw(image)
+        for i in range(gt_poly.shape[0]):
+            x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
+            draw.line(
+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
+                width=2,
+                fill='green')
+            # draw label
+            xmin = min(x1, x2, x3, x4)
+            ymin = min(y1, y2, y3, y4)
+            text = str(gt_class[i][0])
+            tw, th = draw.textsize(text)
+            draw.rectangle(
+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+        if 'gt_keypoint' in sample.keys():
+            gt_keypoint = sample['gt_keypoint']
+            if self.is_normalized:
+                for i in range(gt_keypoint.shape[1]):
+                    if i % 2:
+                        gt_keypoint[:, i] = gt_keypoint[:, i] * height
+                    else:
+                        gt_keypoint[:, i] = gt_keypoint[:, i] * width
+            for i in range(gt_keypoint.shape[0]):
+                keypoint = gt_keypoint[i]
+                for j in range(int(keypoint.shape[0] / 2)):
+                    x1 = round(keypoint[2 * j]).astype(np.int32)
+                    y1 = round(keypoint[2 * j + 1]).astype(np.int32)
+                    draw.ellipse(
+                        (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
+        save_path = os.path.join(self.output_dir, out_file_name)
+        image.save(save_path, quality=95)
+        return sample
+@register_op
+class Rbox2Poly(BaseOperator):
+    """
+    Convert rbbox format to poly format.
+    """
+    def __init__(self):
+        super(Rbox2Poly, self).__init__()
+    def apply(self, sample, context=None):
+        assert 'gt_rbox' in sample
+        assert sample['gt_rbox'].shape[1] == 5
+        rrects = sample['gt_rbox']
+        x_ctr = rrects[:, 0]
+        y_ctr = rrects[:, 1]
+        width = rrects[:, 2]
+        height = rrects[:, 3]
+        x1 = x_ctr - width / 2.0
+        y1 = y_ctr - height / 2.0
+        x2 = x_ctr + width / 2.0
+        y2 = y_ctr + height / 2.0
+        sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
+        polys = bbox_utils.rbox2poly_np(rrects)
+        sample['gt_poly'] = polys
+        return sample
--- a/ppdet/metrics/map_utils.py
+++ b/ppdet/metrics/map_utils.py
@@ -138,8 +138,7 @@ def calc_rbox_iou(pred, gt_rbox):
 def prune_zero_padding(gt_box, gt_label, difficult=None):
    valid_cnt = 0
    for i in range(len(gt_box)):
-        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
+        if (gt_box[i] == 0).all():
-                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
            break
        valid_cnt += 1
    return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
@@ -331,8 +330,9 @@ class DetectionMAP(object):
            num_columns = min(6, len(results_per_category) * 2)
            results_flatten = list(itertools.chain(*results_per_category))
            headers = ['category', 'AP'] * (num_columns // 2)
-            results_2d = itertools.zip_longest(
+            results_2d = itertools.zip_longest(* [
-                *[results_flatten[i::num_columns] for i in range(num_columns)])
+                results_flatten[i::num_columns] for i in range(num_columns)
+            ])
            table_data = [headers]
            table_data += [result for result in results_2d]
            table = AsciiTable(table_data)

--- a/ppdet/metrics/metrics.py
+++ b/ppdet/metrics/metrics.py
@@ -347,22 +347,12 @@ class WiderFaceMetric(Metric):
 class RBoxMetric(Metric):
    def __init__(self, anno_file, **kwargs):
-        assert os.path.isfile(anno_file), \
-                "anno_file {} not a file".format(anno_file)
-        assert os.path.exists(anno_file), "anno_file {} not exists".format(
-            anno_file)
        self.anno_file = anno_file
-        self.gt_anno = json.load(open(self.anno_file))
+        self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
-        cats = self.gt_anno['categories']
+        self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
-        self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
-        self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
-        self.catid2name = {cat['id']: cat['name'] for cat in cats}
        self.classwise = kwargs.get('classwise', False)
        self.output_eval = kwargs.get('output_eval', None)
-        # TODO: bias should be unified
-        self.bias = kwargs.get('bias', 0)
        self.save_prediction_only = kwargs.get('save_prediction_only', False)
-        self.iou_type = kwargs.get('IouType', 'bbox')
        self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
        self.map_type = kwargs.get('map_type', '11point')
        self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
@@ -379,7 +369,7 @@ class RBoxMetric(Metric):
        self.reset()
    def reset(self):
-        self.result_bbox = []
+        self.results = []
        self.detection_map.reset()
    def update(self, inputs, outputs):
@@ -389,35 +379,45 @@ class RBoxMetric(Metric):
            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
        im_id = inputs['im_id']
-        outs['im_id'] = im_id.numpy() if isinstance(im_id,
+        im_id = im_id.numpy() if isinstance(im_id, paddle.Tensor) else im_id
-                                                    paddle.Tensor) else im_id
+        outs['im_id'] = im_id
-        infer_results = get_infer_results(
+        infer_results = get_infer_results(outs, self.clsid2catid)
-            outs, self.clsid2catid, bias=self.bias)
+        infer_results = infer_results['bbox'] if 'bbox' in infer_results else []
-        self.result_bbox += infer_results[
+        self.results += infer_results
-            'bbox'] if 'bbox' in infer_results else []
+        if self.save_prediction_only:
-        bbox = [b['bbox'] for b in self.result_bbox]
+            return
-        score = [b['score'] for b in self.result_bbox]
-        label = [b['category_id'] for b in self.result_bbox]
+        gt_boxes = inputs['gt_rbox']
-        label = [self.catid2clsid[e] for e in label]
+        gt_labels = inputs['gt_class']
-        gt_box = [
+        for i in range(len(gt_boxes)):
-            e['bbox'] for e in self.gt_anno['annotations']
+            gt_box = gt_boxes[i].numpy() if isinstance(
-            if e['image_id'] == outs['im_id']
+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
-        ]
+            gt_label = gt_labels[i].numpy() if isinstance(
-        gt_label = [
+                gt_labels[i], paddle.Tensor) else gt_labels[i]
-            e['category_id'] for e in self.gt_anno['annotations']
+            gt_box, gt_label, _ = prune_zero_padding(gt_box, gt_label)
-            if e['image_id'] == outs['im_id']
+            bbox = [
-        ]
+                res['bbox'] for res in infer_results
-        gt_label = [self.catid2clsid[e] for e in gt_label]
+                if int(res['image_id']) == int(im_id[i])
-        self.detection_map.update(bbox, score, label, gt_box, gt_label)
+            ]
+            score = [
+                res['score'] for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            label = [
+                self.catid2clsid[int(res['category_id'])]
+                for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            self.detection_map.update(bbox, score, label, gt_box, gt_label)
    def accumulate(self):
-        if len(self.result_bbox) > 0:
+        if len(self.results) > 0:
            output = "bbox.json"
            if self.output_eval:
                output = os.path.join(self.output_eval, output)
            with open(output, 'w') as f:
-                json.dump(self.result_bbox, f)
+                json.dump(self.results, f)
                logger.info('The bbox result is saved to bbox.json.')
            if self.save_prediction_only:

--- a/ppdet/modeling/architectures/s2anet.py
+++ b/ppdet/modeling/architectures/s2anet.py
@@ -26,26 +26,21 @@ __all__ = ['S2ANet']
 @register
 class S2ANet(BaseArch):
    __category__ = 'architecture'
-    __inject__ = [
+    __inject__ = ['head']
-        's2anet_head',
-        's2anet_bbox_post_process',
-    ]
-    def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
+    def __init__(self, backbone, neck, head):
        """
        S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
        Args:
            backbone (object): backbone instance
            neck (object): `FPN` instance
-            s2anet_head (object): `S2ANetHead` instance
+            head (object): `Head` instance
-            s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
        """
        super(S2ANet, self).__init__()
        self.backbone = backbone
        self.neck = neck
-        self.s2anet_head = s2anet_head
+        self.s2anet_head = head
-        self.s2anet_bbox_post_process = s2anet_bbox_post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
@@ -55,42 +50,28 @@ class S2ANet(BaseArch):
        out_shape = neck and neck.out_shape or backbone.out_shape
        kwargs = {'input_shape': out_shape}
-        s2anet_head = create(cfg['s2anet_head'], **kwargs)
+        head = create(cfg['head'], **kwargs)
-        s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
-                                          **kwargs)
-        return {
+        return {'backbone': backbone, 'neck': neck, "head": head}
-            'backbone': backbone,
-            'neck': neck,
-            "s2anet_head": s2anet_head,
-            "s2anet_bbox_post_process": s2anet_bbox_post_process,
-        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        if self.neck is not None:
            body_feats = self.neck(body_feats)
-        self.s2anet_head(body_feats)
        if self.training:
-            loss = self.s2anet_head.get_loss(self.inputs)
+            loss = self.s2anet_head(body_feats, self.inputs)
-            total_loss = paddle.add_n(list(loss.values()))
-            loss.update({'loss': total_loss})
            return loss
        else:
-            im_shape = self.inputs['im_shape']
+            head_outs = self.s2anet_head(body_feats)
-            scale_factor = self.inputs['scale_factor']
-            nms_pre = self.s2anet_bbox_post_process.nms_pre
-            pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
            # post_process
-            pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
+            bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
-                                                                  pred_bboxes)
            # rescale the prediction back to origin image
-            pred_bboxes = self.s2anet_bbox_post_process.get_pred(
+            im_shape = self.inputs['im_shape']
-                pred_bboxes, bbox_num, im_shape, scale_factor)
+            scale_factor = self.inputs['scale_factor']
+            bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
+                                               scale_factor)
            # output
-            output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
+            output = {'bbox': bboxes, 'bbox_num': bbox_num}
            return output
    def get_loss(self, ):

--- a/ppdet/modeling/heads/s2anet_head.py
+++ b/ppdet/modeling/heads/s2anet_head.py
--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -39,6 +39,81 @@ def _to_list(l):
    return [l]
+class AlignConv(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
+        super(AlignConv, self).__init__()
+        self.kernel_size = kernel_size
+        self.align_conv = paddle.vision.ops.DeformConv2D(
+            in_channels,
+            out_channels,
+            kernel_size=self.kernel_size,
+            padding=(self.kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+            bias_attr=None)
+    @paddle.no_grad()
+    def get_offset(self, anchors, featmap_size, stride):
+        """
+        Args:
+            anchors: [B, L, 5] xc,yc,w,h,angle
+            featmap_size: (feat_h, feat_w)
+            stride: 8
+        Returns:
+        """
+        batch = anchors.shape[0]
+        dtype = anchors.dtype
+        feat_h, feat_w = featmap_size
+        pad = (self.kernel_size - 1) // 2
+        idx = paddle.arange(-pad, pad + 1, dtype=dtype)
+        yy, xx = paddle.meshgrid(idx, idx)
+        xx = paddle.reshape(xx, [-1])
+        yy = paddle.reshape(yy, [-1])
+        # get sampling locations of default conv
+        xc = paddle.arange(0, feat_w, dtype=dtype)
+        yc = paddle.arange(0, feat_h, dtype=dtype)
+        yc, xc = paddle.meshgrid(yc, xc)
+        xc = paddle.reshape(xc, [-1, 1])
+        yc = paddle.reshape(yc, [-1, 1])
+        x_conv = xc + xx
+        y_conv = yc + yy
+        # get sampling locations of anchors
+        x_ctr, y_ctr, w, h, a = paddle.split(anchors, 5, axis=-1)
+        x_ctr = x_ctr / stride
+        y_ctr = y_ctr / stride
+        w_s = w / stride
+        h_s = h / stride
+        cos, sin = paddle.cos(a), paddle.sin(a)
+        dw, dh = w_s / self.kernel_size, h_s / self.kernel_size
+        x, y = dw * xx, dh * yy
+        xr = cos * x - sin * y
+        yr = sin * x + cos * y
+        x_anchor, y_anchor = xr + x_ctr, yr + y_ctr
+        # get offset filed
+        offset_x = x_anchor - x_conv
+        offset_y = y_anchor - y_conv
+        offset = paddle.stack([offset_y, offset_x], axis=-1)
+        offset = offset.reshape(
+            [batch, feat_h, feat_w, self.kernel_size * self.kernel_size * 2])
+        offset = offset.transpose([0, 3, 1, 2])
+        return offset
+    def forward(self, x, refine_anchors, featmap_size, stride):
+        batch = paddle.shape(x)[0].numpy()
+        offset = self.get_offset(refine_anchors, featmap_size, stride)
+        if self.training:
+            x = F.relu(self.align_conv(x, offset.detach()))
+        else:
+            x = F.relu(self.align_conv(x, offset))
+        return x
 class DeformableConvV2(nn.Layer):
    def __init__(self,
                 in_channels,

--- a/ppdet/modeling/post_process.py
+++ b/ppdet/modeling/post_process.py
@@ -27,8 +27,8 @@ except Exception:
 __all__ = [
    'BBoxPostProcess', 'MaskPostProcess', 'FCOSPostProcess',
-    'S2ANetBBoxPostProcess', 'JDEBBoxPostProcess', 'CenterNetPostProcess',
+    'JDEBBoxPostProcess', 'CenterNetPostProcess', 'DETRBBoxPostProcess',
-    'DETRBBoxPostProcess', 'SparsePostProcess'
+    'SparsePostProcess'
 ]
@@ -294,109 +294,6 @@ class FCOSPostProcess(object):
        return bbox_pred, bbox_num
-@register
-class S2ANetBBoxPostProcess(nn.Layer):
-    __shared__ = ['num_classes']
-    __inject__ = ['nms']
-    def __init__(self, num_classes=15, nms_pre=2000, min_bbox_size=0, nms=None):
-        super(S2ANetBBoxPostProcess, self).__init__()
-        self.num_classes = num_classes
-        self.nms_pre = nms_pre
-        self.min_bbox_size = min_bbox_size
-        self.nms = nms
-        self.origin_shape_list = []
-        self.fake_pred_cls_score_bbox = paddle.to_tensor(
-            np.array(
-                [[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
-                dtype='float32'))
-        self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
-    def forward(self, pred_scores, pred_bboxes):
-        """
-        pred_scores : [N, M]  score
-        pred_bboxes : [N, 5]  xc, yc, w, h, a
-        im_shape : [N, 2]  im_shape
-        scale_factor : [N, 2]  scale_factor
-        """
-        pred_ploys0 = rbox2poly(pred_bboxes)
-        pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
-        # pred_scores [NA, 16] --> [16, NA]
-        pred_scores0 = paddle.transpose(pred_scores, [1, 0])
-        pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
-        pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
-                                                    self.num_classes)
-        # Prevent empty bbox_pred from decode or NMS.
-        # Bboxes and score before NMS may be empty due to the score threshold.
-        if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
-                1] <= 1:
-            pred_cls_score_bbox = self.fake_pred_cls_score_bbox
-            bbox_num = self.fake_bbox_num
-        pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
-        return pred_cls_score_bbox, bbox_num
-    def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
-        """
-        Rescale, clip and filter the bbox from the output of NMS to
-        get final prediction.
-        Args:
-            bboxes(Tensor): bboxes [N, 10]
-            bbox_num(Tensor): bbox_num
-            im_shape(Tensor): [1 2]
-            scale_factor(Tensor): [1 2]
-        Returns:
-            bbox_pred(Tensor): The output is the prediction with shape [N, 8]
-                               including labels, scores and bboxes. The size of
-                               bboxes are corresponding to the original image.
-        """
-        origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
-        origin_shape_list = []
-        scale_factor_list = []
-        # scale_factor: scale_y, scale_x
-        for i in range(bbox_num.shape[0]):
-            expand_shape = paddle.expand(origin_shape[i:i + 1, :],
-                                         [bbox_num[i], 2])
-            scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
-            scale = paddle.concat([
-                scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x,
-                scale_y
-            ])
-            expand_scale = paddle.expand(scale, [bbox_num[i], 8])
-            origin_shape_list.append(expand_shape)
-            scale_factor_list.append(expand_scale)
-        origin_shape_list = paddle.concat(origin_shape_list)
-        scale_factor_list = paddle.concat(scale_factor_list)
-        # bboxes: [N, 10], label, score, bbox
-        pred_label_score = bboxes[:, 0:2]
-        pred_bbox = bboxes[:, 2:]
-        # rescale bbox to original image
-        pred_bbox = pred_bbox.reshape([-1, 8])
-        scaled_bbox = pred_bbox / scale_factor_list
-        origin_h = origin_shape_list[:, 0]
-        origin_w = origin_shape_list[:, 1]
-        bboxes = scaled_bbox
-        zeros = paddle.zeros_like(origin_h)
-        x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros)
-        y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros)
-        x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros)
-        y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros)
-        x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros)
-        y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros)
-        x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros)
-        y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros)
-        pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1)
-        pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1)
-        return pred_result
 @register
 class JDEBBoxPostProcess(nn.Layer):
    __shared__ = ['num_classes']

--- a/ppdet/modeling/proposal_generator/anchor_generator.py
+++ b/ppdet/modeling/proposal_generator/anchor_generator.py
@@ -19,10 +19,11 @@ import math
 import paddle
 import paddle.nn as nn
+import numpy as np
 from ppdet.core.workspace import register
-__all__ = ['AnchorGenerator', 'RetinaAnchorGenerator']
+__all__ = ['AnchorGenerator', 'RetinaAnchorGenerator', 'S2ANetAnchorGenerator']
 @register
@@ -153,3 +154,113 @@ class RetinaAnchorGenerator(AnchorGenerator):
            strides=strides,
            variance=variance,
            offset=offset)
+@register
+class S2ANetAnchorGenerator(nn.Layer):
+    """
+    AnchorGenerator by paddle
+    """
+    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
+        super(S2ANetAnchorGenerator, self).__init__()
+        self.base_size = base_size
+        self.scales = paddle.to_tensor(scales)
+        self.ratios = paddle.to_tensor(ratios)
+        self.scale_major = scale_major
+        self.ctr = ctr
+        self.base_anchors = self.gen_base_anchors()
+    @property
+    def num_base_anchors(self):
+        return self.base_anchors.shape[0]
+    def gen_base_anchors(self):
+        w = self.base_size
+        h = self.base_size
+        if self.ctr is None:
+            x_ctr = 0.5 * (w - 1)
+            y_ctr = 0.5 * (h - 1)
+        else:
+            x_ctr, y_ctr = self.ctr
+        h_ratios = paddle.sqrt(self.ratios)
+        w_ratios = 1 / h_ratios
+        if self.scale_major:
+            ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
+            hs = (h * h_ratios[:] * self.scales[:]).reshape([-1])
+        else:
+            ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
+            hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
+        base_anchors = paddle.stack(
+            [
+                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
+                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
+            ],
+            axis=-1)
+        base_anchors = paddle.round(base_anchors)
+        return base_anchors
+    def _meshgrid(self, x, y, row_major=True):
+        yy, xx = paddle.meshgrid(y, x)
+        yy = yy.reshape([-1])
+        xx = xx.reshape([-1])
+        if row_major:
+            return xx, yy
+        else:
+            return yy, xx
+    def forward(self, featmap_size, stride=16):
+        # featmap_size*stride project it to original area
+        feat_h = featmap_size[0]
+        feat_w = featmap_size[1]
+        shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
+        shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
+        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+        shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
+        all_anchors = self.base_anchors[:, :] + shifts[:, :]
+        all_anchors = all_anchors.cast(paddle.float32).reshape(
+            [feat_h * feat_w, 4])
+        all_anchors = self.rect2rbox(all_anchors)
+        return all_anchors
+    def valid_flags(self, featmap_size, valid_size):
+        feat_h, feat_w = featmap_size
+        valid_h, valid_w = valid_size
+        assert valid_h <= feat_h and valid_w <= feat_w
+        valid_x = paddle.zeros([feat_w], dtype='int32')
+        valid_y = paddle.zeros([feat_h], dtype='int32')
+        valid_x[:valid_w] = 1
+        valid_y[:valid_h] = 1
+        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+        valid = valid_xx & valid_yy
+        valid = paddle.reshape(valid, [-1, 1])
+        valid = paddle.expand(valid, [-1, self.num_base_anchors]).reshape([-1])
+        return valid
+    def rect2rbox(self, bboxes):
+        """
+        :param bboxes: shape (L, 4) (xmin, ymin, xmax, ymax)
+        :return: dbboxes: shape (L, 5) (x_ctr, y_ctr, w, h, angle)
+        """
+        x1, y1, x2, y2 = paddle.split(bboxes, 4, axis=-1)
+        x_ctr = (x1 + x2) / 2.0
+        y_ctr = (y1 + y2) / 2.0
+        edges1 = paddle.abs(x2 - x1)
+        edges2 = paddle.abs(y2 - y1)
+        rbox_w = paddle.maximum(edges1, edges2)
+        rbox_h = paddle.minimum(edges1, edges2)
+        # set angle
+        inds = edges1 < edges2
+        inds = paddle.cast(inds, paddle.float32)
+        rboxes_angle = inds * np.pi / 2.0
+        rboxes = paddle.concat(
+            (x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=-1)
+        return rboxes
--- a/ppdet/modeling/proposal_generator/target_layer.py
+++ b/ppdet/modeling/proposal_generator/target_layer.py
@@ -365,21 +365,11 @@ class RBoxAssigner(object):
    def assign_anchor(self,
                      anchors,
                      gt_bboxes,
-                      gt_lables,
+                      gt_labels,
                      pos_iou_thr,
                      neg_iou_thr,
                      min_iou_thr=0.0,
                      ignore_iof_thr=-2):
-        """
-        Args:
-            anchors:
-            gt_bboxes:[M, 5] rc,yc,w,h,angle
-            gt_lables:
-        Returns:
-        """
        assert anchors.shape[1] == 4 or anchors.shape[1] == 5
        assert gt_bboxes.shape[1] == 4 or gt_bboxes.shape[1] == 5
        anchors_xc_yc = anchors
@@ -428,12 +418,12 @@ class RBoxAssigner(object):
        # (4) assign max_iou as pos_ids >=0
        anchor_gt_bbox_iou_inds = anchor_gt_bbox_inds[gt_bbox_anchor_iou_inds]
        # gt_bbox_anchor_iou_inds = np.logical_and(gt_bbox_anchor_iou_inds, anchor_gt_bbox_iou >= min_iou_thr)
-        labels[gt_bbox_anchor_iou_inds] = gt_lables[anchor_gt_bbox_iou_inds]
+        labels[gt_bbox_anchor_iou_inds] = gt_labels[anchor_gt_bbox_iou_inds]
        # (5) assign >= pos_iou_thr as pos_ids
        iou_pos_iou_thr_ids = anchor_gt_bbox_iou >= pos_iou_thr
        iou_pos_iou_thr_ids_box_inds = anchor_gt_bbox_inds[iou_pos_iou_thr_ids]
-        labels[iou_pos_iou_thr_ids] = gt_lables[iou_pos_iou_thr_ids_box_inds]
+        labels[iou_pos_iou_thr_ids] = gt_labels[iou_pos_iou_thr_ids_box_inds]
        return anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels
    def __call__(self, anchors, gt_bboxes, gt_labels, is_crowd):