diff --git a/configs/dota/_base_/s2anet.yml b/configs/dota/_base_/s2anet.yml
index f4e4974d91fd70f7772f4ac29272f33a1bea0279..8d6a8c477a1062c69a0a78d638b3b7305560c4e5 100644
--- a/configs/dota/_base_/s2anet.yml
+++ b/configs/dota/_base_/s2anet.yml
@@ -36,6 +36,8 @@ S2ANetHead:
   align_conv_type: 'Conv'  # AlignConv Conv
   align_conv_size: 3
   use_sigmoid_cls: True
+  reg_loss_weight: [ 1.0, 1.0, 1.0, 1.0, 1.1 ]
+  cls_loss_weight: [ 1.1, 1.05 ]
 
 RBoxAssigner:
   pos_iou_thr: 0.5
@@ -52,4 +54,3 @@ S2ANetBBoxPostProcess:
     score_threshold: 0.05
     nms_threshold: 0.1
     normalized: False
-    #background_label: -1
diff --git a/configs/dota/s2anet_1x_dota.yml b/configs/dota/s2anet_1x_dota.yml
index d480c1c8669402727d16cfb1c3fbdd0d1d7464af..37959a0e60c0b5ce1b32527142f1fed9f384ae01 100644
--- a/configs/dota/s2anet_1x_dota.yml
+++ b/configs/dota/s2anet_1x_dota.yml
@@ -1,4 +1,4 @@
-_BASE_: [
+it _BASE_: [
   '../datasets/dota.yml',
   '../runtime.yml',
   '_base_/s2anet_optimizer_1x.yml',
@@ -6,3 +6,18 @@ _BASE_: [
   '_base_/s2anet_reader.yml',
 ]
 weights: output/s2anet_1x_dota/model_final
+
+S2ANetHead:
+  anchor_strides: [8, 16, 32, 64, 128]
+  anchor_scales: [4]
+  anchor_ratios: [1.0]
+  anchor_assign: RBoxAssigner
+  stacked_convs: 2
+  feat_in: 256
+  feat_out: 256
+  num_classes: 15
+  align_conv_type: 'AlignConv'  # AlignConv Conv
+  align_conv_size: 3
+  use_sigmoid_cls: True
+  reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
+  cls_loss_weight: [1.1, 1.05]
diff --git a/configs/dota/s2anet_conv_1x_dota.yml b/configs/dota/s2anet_conv_1x_dota.yml
index 60931b13185be22b0a5c17bbb056c86260bb0d49..2a192ecf96cab529c8b83ac0b54b255b86432ece 100644
--- a/configs/dota/s2anet_conv_1x_dota.yml
+++ b/configs/dota/s2anet_conv_1x_dota.yml
@@ -19,3 +19,5 @@ S2ANetHead:
   align_conv_type: 'Conv'  # AlignConv Conv
   align_conv_size: 3
   use_sigmoid_cls: True
+  reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
+  cls_loss_weight: [1.1, 1.05]
diff --git a/ppdet/modeling/bbox_utils.py b/ppdet/modeling/bbox_utils.py
index 26d7ae49960cf7433ece4f276c4fc2060237d91e..fb5dda6ed0d9ac9f294d70b78859e0a9a9dbaa14 100644
--- a/ppdet/modeling/bbox_utils.py
+++ b/ppdet/modeling/bbox_utils.py
@@ -267,6 +267,150 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
         return iou
 
 
+def rect2rbox(bboxes):
+    """
+    :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
+    :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
+    """
+    bboxes = bboxes.reshape(-1, 4)
+    num_boxes = bboxes.shape[0]
+
+    x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
+    y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
+    edges1 = np.abs(bboxes[:, 2] - bboxes[:, 0])
+    edges2 = np.abs(bboxes[:, 3] - bboxes[:, 1])
+    angles = np.zeros([num_boxes], dtype=bboxes.dtype)
+
+    inds = edges1 < edges2
+
+    rboxes = np.stack((x_ctr, y_ctr, edges1, edges2, angles), axis=1)
+    rboxes[inds, 2] = edges2[inds]
+    rboxes[inds, 3] = edges1[inds]
+    rboxes[inds, 4] = np.pi / 2.0
+    return rboxes
+
+
+def delta2rbox(rrois,
+               deltas,
+               means=[0, 0, 0, 0, 0],
+               stds=[1, 1, 1, 1, 1],
+               wh_ratio_clip=1e-6):
+    """
+    :param rrois: (cx, cy, w, h, theta)
+    :param deltas: (dx, dy, dw, dh, dtheta)
+    :param means:
+    :param stds:
+    :param wh_ratio_clip:
+    :return:
+    """
+    means = paddle.to_tensor(means)
+    stds = paddle.to_tensor(stds)
+    deltas = paddle.reshape(deltas, [-1, deltas.shape[-1]])
+    denorm_deltas = deltas * stds + means
+
+    dx = denorm_deltas[:, 0]
+    dy = denorm_deltas[:, 1]
+    dw = denorm_deltas[:, 2]
+    dh = denorm_deltas[:, 3]
+    dangle = denorm_deltas[:, 4]
+
+    max_ratio = np.abs(np.log(wh_ratio_clip))
+    dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
+    dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
+
+    rroi_x = rrois[:, 0]
+    rroi_y = rrois[:, 1]
+    rroi_w = rrois[:, 2]
+    rroi_h = rrois[:, 3]
+    rroi_angle = rrois[:, 4]
+
+    gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
+        rroi_angle) + rroi_x
+    gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos(
+        rroi_angle) + rroi_y
+    gw = rroi_w * dw.exp()
+    gh = rroi_h * dh.exp()
+    ga = np.pi * dangle + rroi_angle
+    ga = (ga + np.pi / 4) % np.pi - np.pi / 4
+    ga = paddle.to_tensor(ga)
+
+    gw = paddle.to_tensor(gw, dtype='float32')
+    gh = paddle.to_tensor(gh, dtype='float32')
+    bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
+    return bboxes
+
+
+def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
+    """
+
+    Args:
+        proposals:
+        gt:
+        means: 1x5
+        stds: 1x5
+
+    Returns:
+
+    """
+    proposals = proposals.astype(np.float64)
+
+    PI = np.pi
+
+    gt_widths = gt[..., 2]
+    gt_heights = gt[..., 3]
+    gt_angle = gt[..., 4]
+
+    proposals_widths = proposals[..., 2]
+    proposals_heights = proposals[..., 3]
+    proposals_angle = proposals[..., 4]
+
+    coord = gt[..., 0:2] - proposals[..., 0:2]
+    dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
+          * coord[..., 1]) / proposals_widths
+    dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
+          * coord[..., 1]) / proposals_heights
+    dw = np.log(gt_widths / proposals_widths)
+    dh = np.log(gt_heights / proposals_heights)
+    da = (gt_angle - proposals_angle)
+
+    da = (da + PI / 4) % PI - PI / 4
+    da /= PI
+
+    deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
+    means = np.array(means, dtype=deltas.dtype)
+    stds = np.array(stds, dtype=deltas.dtype)
+    deltas = (deltas - means) / stds
+    deltas = deltas.astype(np.float32)
+    return deltas
+
+
+def bbox_decode(bbox_preds,
+                anchors,
+                means=[0, 0, 0, 0, 0],
+                stds=[1, 1, 1, 1, 1]):
+    """decode bbox from deltas
+    Args:
+        bbox_preds: [N,H,W,5]
+        anchors: [H*W,5]
+    return:
+        bboxes: [N,H,W,5]
+    """
+    means = paddle.to_tensor(means)
+    stds = paddle.to_tensor(stds)
+    num_imgs, H, W, _ = bbox_preds.shape
+    bboxes_list = []
+    for img_id in range(num_imgs):
+        bbox_pred = bbox_preds[img_id]
+        # bbox_pred.shape=[5,H,W]
+        bbox_delta = bbox_pred
+        anchors = paddle.to_tensor(anchors)
+        bboxes = delta2rbox(
+            anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6)
+        bboxes = paddle.reshape(bboxes, [H, W, 5])
+        bboxes_list.append(bboxes)
+    return paddle.stack(bboxes_list, axis=0)
+
+
 def poly2rbox(polys):
     """
     poly:[x0,y0,x1,y1,x2,y2,x3,y3]
diff --git a/ppdet/modeling/heads/s2anet_head.py b/ppdet/modeling/heads/s2anet_head.py
index ae3659d91a75c6b687e49693e4aaaf7f39bebe12..56be7d5b587d4dda17383d1b96f9a9f3fbd3fa1e 100644
--- a/ppdet/modeling/heads/s2anet_head.py
+++ b/ppdet/modeling/heads/s2anet_head.py
@@ -17,21 +17,26 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal, Constant
 from ppdet.core.workspace import register
+from ppdet.modeling import ops
 from ppdet.modeling import bbox_utils
 from ppdet.modeling.proposal_generator.target_layer import RBoxAssigner
 import numpy as np
 
 
-class S2ANetAnchorGenerator(nn.Layer):
+class S2ANetAnchorGenerator(object):
     """
-    AnchorGenerator by paddle
+    S2ANetAnchorGenerator by np
     """
 
-    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
-        super(S2ANetAnchorGenerator, self).__init__()
+    def __init__(self,
+                 base_size=8,
+                 scales=1.0,
+                 ratios=1.0,
+                 scale_major=True,
+                 ctr=None):
         self.base_size = base_size
-        self.scales = paddle.to_tensor(scales)
-        self.ratios = paddle.to_tensor(ratios)
+        self.scales = scales
+        self.ratios = ratios
         self.scale_major = scale_major
         self.ctr = ctr
         self.base_anchors = self.gen_base_anchors()
@@ -49,7 +54,7 @@ class S2ANetAnchorGenerator(nn.Layer):
         else:
             x_ctr, y_ctr = self.ctr
 
-        h_ratios = paddle.sqrt(self.ratios)
+        h_ratios = np.sqrt(self.ratios)
         w_ratios = 1 / h_ratios
         if self.scale_major:
             ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
@@ -58,51 +63,53 @@ class S2ANetAnchorGenerator(nn.Layer):
             ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
             hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
 
-        base_anchors = paddle.stack(
+        # yapf: disable
+        base_anchors = np.stack(
             [
                 x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
                 x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
             ],
             axis=-1)
-        base_anchors = paddle.round(base_anchors)
+        base_anchors = np.round(base_anchors)
+        # yapf: enable
+
         return base_anchors
 
     def _meshgrid(self, x, y, row_major=True):
-        yy, xx = paddle.meshgrid(x, y)
-        yy = yy.reshape([-1])
-        xx = xx.reshape([-1])
+        xx, yy = np.meshgrid(x, y)
+        xx = xx.reshape(-1)
+        yy = yy.reshape(-1)
         if row_major:
             return xx, yy
         else:
             return yy, xx
 
-    def forward(self, featmap_size, stride=16):
+    def grid_anchors(self, featmap_size, stride=16):
         # featmap_size*stride project it to original area
         base_anchors = self.base_anchors
-
-        feat_h = featmap_size[0]
-        feat_w = featmap_size[1]
-        shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
-        shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
+        feat_h, feat_w = featmap_size
+        shift_x = np.arange(0, feat_w, 1, 'int32') * stride
+        shift_y = np.arange(0, feat_h, 1, 'int32') * stride
         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
-        shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
+        shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
 
-        all_anchors = base_anchors[:, :] + shifts[:, :]
-        all_anchors = all_anchors.reshape([feat_h * feat_w, 4])
+        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
         return all_anchors
 
     def valid_flags(self, featmap_size, valid_size):
         feat_h, feat_w = featmap_size
         valid_h, valid_w = valid_size
         assert valid_h <= feat_h and valid_w <= feat_w
-        valid_x = paddle.zeros([feat_w], dtype='uint8')
-        valid_y = paddle.zeros([feat_h], dtype='uint8')
+        valid_x = np.zeros([feat_w], dtype='uint8')
+        valid_y = np.zeros([feat_h], dtype='uint8')
         valid_x[:valid_w] = 1
         valid_y[:valid_h] = 1
         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
         valid = valid_xx & valid_yy
-        valid = valid[:, None].expand(
-            [valid.size(0), self.num_base_anchors]).reshape([-1])
+        valid = valid.reshape([-1])
+
+        # valid = valid[:, None].expand(
+        #    [valid.size(0), self.num_base_anchors]).reshape([-1])
         return valid
 
 
@@ -225,8 +232,8 @@ class S2ANetHead(nn.Layer):
                  anchor_strides=[8, 16, 32, 64, 128],
                  anchor_scales=[4],
                  anchor_ratios=[1.0],
-                 target_means=0.0,
-                 target_stds=1.0,
+                 target_means=(.0, .0, .0, .0, .0),
+                 target_stds=(1.0, 1.0, 1.0, 1.0, 1.0),
                  align_conv_type='AlignConv',
                  align_conv_size=3,
                  use_sigmoid_cls=True,
@@ -263,8 +270,6 @@ class S2ANetHead(nn.Layer):
             self.anchor_generators.append(
                 S2ANetAnchorGenerator(anchor_base, anchor_scales,
                                       anchor_ratios))
-        self.anchor_generators = paddle.nn.LayerList(self.anchor_generators)
-        self.add_sublayer('s2anet_anchor_gen', self.anchor_generators)
 
         self.fam_cls_convs = nn.Sequential()
         self.fam_reg_convs = nn.Sequential()
@@ -399,9 +404,9 @@ class S2ANetHead(nn.Layer):
             weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
             bias_attr=ParamAttr(initializer=Constant(0)))
 
-        self.featmap_size_list = []
-        self.init_anchors_list = []
-        self.rbox_anchors_list = []
+        self.base_anchors = dict()
+        self.featmap_sizes = dict()
+        self.base_anchors = dict()
         self.refine_anchor_list = []
 
     def forward(self, feats):
@@ -411,27 +416,13 @@ class S2ANetHead(nn.Layer):
         odm_reg_branch_list = []
         odm_cls_branch_list = []
 
-        fam_reg1_branch_list = []
-
-        self.featmap_size_list = []
-        self.init_anchors_list = []
-        self.rbox_anchors_list = []
+        self.featmap_sizes = dict()
+        self.base_anchors = dict()
         self.refine_anchor_list = []
 
         for i, feat in enumerate(feats):
-            # prepare anchor
-            featmap_size = paddle.shape(feat)[-2:]
-            self.featmap_size_list.append(featmap_size)
-            init_anchors = self.anchor_generators[i](featmap_size,
-                                                     self.anchor_strides[i])
-            init_anchors = paddle.reshape(
-                init_anchors, [featmap_size[0] * featmap_size[1], 4])
-            self.init_anchors_list.append(init_anchors)
-
-            rbox_anchors = self.rect2rbox(init_anchors)
-            self.rbox_anchors_list.append(rbox_anchors)
-
             fam_cls_feat = self.fam_cls_convs(feat)
+
             fam_cls = self.fam_cls(fam_cls_feat)
             # [N, CLS, H, W] --> [N, H, W, CLS]
             fam_cls = fam_cls.transpose([0, 2, 3, 1])
@@ -447,13 +438,21 @@ class S2ANetHead(nn.Layer):
             fam_reg_reshape = paddle.reshape(fam_reg, [fam_reg.shape[0], -1, 5])
             fam_reg_branch_list.append(fam_reg_reshape)
 
-            # refine anchors
-            fam_reg1 = fam_reg.clone()
-            fam_reg1.stop_gradient = True
-            rbox_anchors.stop_gradient = True
-            fam_reg1_branch_list.append(fam_reg1)
-            refine_anchor = self.bbox_decode(
-                fam_reg1, rbox_anchors, self.target_stds, self.target_means)
+            # prepare anchor
+            featmap_size = feat.shape[-2:]
+            self.featmap_sizes[i] = featmap_size
+            init_anchors = self.anchor_generators[i].grid_anchors(
+                featmap_size, self.anchor_strides[i])
+
+            init_anchors = bbox_utils.rect2rbox(init_anchors)
+            self.base_anchors[(i, featmap_size[0])] = init_anchors
+
+            #fam_reg1 = fam_reg
+            #fam_reg1.stop_gradient = True
+            refine_anchor = bbox_utils.bbox_decode(
+                fam_reg.detach(), init_anchors, self.target_means,
+                self.target_stds)
+
             self.refine_anchor_list.append(refine_anchor)
 
             if self.align_conv_type == 'AlignConv':
@@ -493,87 +492,6 @@ class S2ANetHead(nn.Layer):
                                 odm_cls_branch_list, odm_reg_branch_list)
         return self.s2anet_head_out
 
-    def rect2rbox(self, bboxes):
-        """
-        :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
-        :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
-        """
-        num_boxes = paddle.shape(bboxes)[0]
-        x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
-        y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
-        edges1 = paddle.abs(bboxes[:, 2] - bboxes[:, 0])
-        edges2 = paddle.abs(bboxes[:, 3] - bboxes[:, 1])
-
-        rbox_w = paddle.maximum(edges1, edges2)
-        rbox_h = paddle.minimum(edges1, edges2)
-
-        # set angle
-        inds = edges1 < edges2
-        inds = paddle.cast(inds, 'int32')
-        inds1 = inds * paddle.arange(0, num_boxes)
-        rboxes_angle = inds1 * np.pi / 2.0
-
-        rboxes = paddle.stack(
-            (x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=1)
-        return rboxes
-
-    # deltas to rbox
-    def delta2rbox(self, rrois, deltas, means, stds, wh_ratio_clip=1e-6):
-        """
-        :param rrois: (cx, cy, w, h, theta)
-        :param deltas: (dx, dy, dw, dh, dtheta)
-        :param means: means of anchor
-        :param stds: stds of anchor
-        :param wh_ratio_clip: clip threshold of wh_ratio
-        :return:
-        """
-        deltas = paddle.reshape(deltas, [-1, 5])
-        rrois = paddle.reshape(rrois, [-1, 5])
-        pd_means = paddle.ones(shape=[5]) * means
-        pd_stds = paddle.ones(shape=[5]) * stds
-        denorm_deltas = deltas * pd_stds + pd_means
-
-        dx = denorm_deltas[:, 0]
-        dy = denorm_deltas[:, 1]
-        dw = denorm_deltas[:, 2]
-        dh = denorm_deltas[:, 3]
-        dangle = denorm_deltas[:, 4]
-        max_ratio = np.abs(np.log(wh_ratio_clip))
-        dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
-        dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
-
-        rroi_x = rrois[:, 0]
-        rroi_y = rrois[:, 1]
-        rroi_w = rrois[:, 2]
-        rroi_h = rrois[:, 3]
-        rroi_angle = rrois[:, 4]
-
-        gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
-            rroi_angle) + rroi_x
-        gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos(
-            rroi_angle) + rroi_y
-        gw = rroi_w * dw.exp()
-        gh = rroi_h * dh.exp()
-        ga = np.pi * dangle + rroi_angle
-        ga = (ga + np.pi / 4) % np.pi - np.pi / 4
-        bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
-        return bboxes
-
-    def bbox_decode(self, bbox_preds, anchors, stds, means, wh_ratio_clip=1e-6):
-        """decode bbox from deltas
-        Args:
-            bbox_preds: bbox_preds, shape=[N,H,W,5]
-            anchors: anchors, shape=[H,W,5]
-        return:
-            bboxes: return decoded bboxes, shape=[N*H*W,5]
-        """
-
-        num_imgs, H, W, _ = bbox_preds.shape
-        bbox_delta = paddle.reshape(bbox_preds, [-1, 5])
-        bboxes = self.delta2rbox(anchors, bbox_delta, means, stds,
-                                 wh_ratio_clip)
-        return bboxes
-
     def get_prediction(self, nms_pre):
         refine_anchors = self.refine_anchor_list
         fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = self.s2anet_head_out
@@ -584,7 +502,6 @@ class S2ANetHead(nn.Layer):
             nms_pre,
             cls_out_channels=self.cls_out_channels,
             use_sigmoid_cls=self.use_sigmoid_cls)
-
         return pred_scores, pred_bboxes
 
     def smooth_l1_loss(self, pred, label, delta=1.0 / 9.0):
@@ -603,125 +520,170 @@ class S2ANetHead(nn.Layer):
         return loss
 
     def get_fam_loss(self, fam_target, s2anet_head_out):
-        (feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
-         pos_inds, neg_inds) = fam_target
-        fam_cls_score, fam_bbox_pred = s2anet_head_out
-
-        # step1:  sample count
+        (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+         neg_inds) = fam_target
+        fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = s2anet_head_out
+
+        fam_cls_losses = []
+        fam_bbox_losses = []
+        st_idx = 0
+        featmap_sizes = [self.featmap_sizes[e] for e in self.featmap_sizes]
         num_total_samples = len(pos_inds) + len(
             neg_inds) if self.sampling else len(pos_inds)
         num_total_samples = max(1, num_total_samples)
 
-        # step2: calc cls loss
-        feat_labels = feat_labels.reshape(-1)
-        feat_label_weights = feat_label_weights.reshape(-1)
-        fam_cls_score = paddle.squeeze(fam_cls_score, axis=0)
-        fam_cls_score1 = fam_cls_score
-
-        feat_labels = paddle.to_tensor(feat_labels)
-        feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
-        feat_labels_one_hot = feat_labels_one_hot[:, 1:]
-        feat_labels_one_hot.stop_gradient = True
-
-        num_total_samples = paddle.to_tensor(
-            num_total_samples, dtype='float32', stop_gradient=True)
-
-        fam_cls = F.sigmoid_focal_loss(
-            fam_cls_score1,
-            feat_labels_one_hot,
-            normalizer=num_total_samples,
-            reduction='none')
-
-        feat_label_weights = feat_label_weights.reshape(
-            feat_label_weights.shape[0], 1)
-        feat_label_weights = np.repeat(
-            feat_label_weights, self.cls_out_channels, axis=1)
-        feat_label_weights = paddle.to_tensor(
-            feat_label_weights, stop_gradient=True)
-
-        fam_cls = fam_cls * feat_label_weights
-        fam_cls_total = paddle.sum(fam_cls)
-
-        # step3: regression loss
-        feat_bbox_targets = paddle.to_tensor(
-            feat_bbox_targets, dtype='float32', stop_gradient=True)
-        feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
-        fam_bbox_pred = paddle.squeeze(fam_bbox_pred, axis=0)
-        fam_bbox_pred = paddle.reshape(fam_bbox_pred, [-1, 5])
-        fam_bbox = self.smooth_l1_loss(fam_bbox_pred, feat_bbox_targets)
-        loss_weight = paddle.to_tensor(
-            self.reg_loss_weight, dtype='float32', stop_gradient=True)
-        fam_bbox = paddle.multiply(fam_bbox, loss_weight)
-        feat_bbox_weights = paddle.to_tensor(
-            feat_bbox_weights, stop_gradient=True)
-        fam_bbox = fam_bbox * feat_bbox_weights
-        fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
-
+        for idx, feat_size in enumerate(featmap_sizes):
+            feat_anchor_num = feat_size[0] * feat_size[1]
+
+            # step1:  get data
+            feat_labels = labels[st_idx:st_idx + feat_anchor_num]
+            feat_label_weights = label_weights[st_idx:st_idx + feat_anchor_num]
+
+            feat_bbox_targets = bbox_targets[st_idx:st_idx + feat_anchor_num, :]
+            feat_bbox_weights = bbox_weights[st_idx:st_idx + feat_anchor_num, :]
+            st_idx += feat_anchor_num
+
+            # step2: calc cls loss
+            feat_labels = feat_labels.reshape(-1)
+            feat_label_weights = feat_label_weights.reshape(-1)
+
+            fam_cls_score = fam_cls_branch_list[idx]
+            fam_cls_score = paddle.squeeze(fam_cls_score, axis=0)
+            fam_cls_score1 = fam_cls_score
+
+            feat_labels = paddle.to_tensor(feat_labels)
+            feat_labels_one_hot = paddle.nn.functional.one_hot(
+                feat_labels, self.cls_out_channels + 1)
+            feat_labels_one_hot = feat_labels_one_hot[:, 1:]
+            feat_labels_one_hot.stop_gradient = True
+
+            num_total_samples = paddle.to_tensor(
+                num_total_samples, dtype='float32', stop_gradient=True)
+
+            fam_cls = F.sigmoid_focal_loss(
+                fam_cls_score1,
+                feat_labels_one_hot,
+                normalizer=num_total_samples,
+                reduction='none')
+
+            feat_label_weights = feat_label_weights.reshape(
+                feat_label_weights.shape[0], 1)
+            feat_label_weights = np.repeat(
+                feat_label_weights, self.cls_out_channels, axis=1)
+            feat_label_weights = paddle.to_tensor(
+                feat_label_weights, stop_gradient=True)
+
+            fam_cls = fam_cls * feat_label_weights
+            fam_cls_total = paddle.sum(fam_cls)
+            fam_cls_losses.append(fam_cls_total)
+
+            # step3: regression loss
+            fam_bbox_pred = fam_reg_branch_list[idx]
+            feat_bbox_targets = paddle.to_tensor(
+                feat_bbox_targets, dtype='float32', stop_gradient=True)
+            feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
+
+            fam_bbox_pred = fam_reg_branch_list[idx]
+            fam_bbox_pred = paddle.squeeze(fam_bbox_pred, axis=0)
+            fam_bbox_pred = paddle.reshape(fam_bbox_pred, [-1, 5])
+            fam_bbox = self.smooth_l1_loss(fam_bbox_pred, feat_bbox_targets)
+            loss_weight = paddle.to_tensor(
+                self.reg_loss_weight, dtype='float32', stop_gradient=True)
+            fam_bbox = paddle.multiply(fam_bbox, loss_weight)
+            feat_bbox_weights = paddle.to_tensor(
+                feat_bbox_weights, stop_gradient=True)
+            fam_bbox = fam_bbox * feat_bbox_weights
+            fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
+
+            fam_bbox_losses.append(fam_bbox_total)
+
+        fam_cls_loss = paddle.add_n(fam_cls_losses)
         fam_cls_loss_weight = paddle.to_tensor(
             self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
-        fam_cls_loss = fam_cls_total * fam_cls_loss_weight
-        fam_reg_loss = paddle.add_n(fam_bbox_total)
+        fam_cls_loss = fam_cls_loss * fam_cls_loss_weight
+        fam_reg_loss = paddle.add_n(fam_bbox_losses)
         return fam_cls_loss, fam_reg_loss
 
     def get_odm_loss(self, odm_target, s2anet_head_out):
-        (feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
-         pos_inds, neg_inds) = odm_target
-        odm_cls_score, odm_bbox_pred = s2anet_head_out
-
-        # step1:  sample count
+        (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+         neg_inds) = odm_target
+        fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = s2anet_head_out
+
+        odm_cls_losses = []
+        odm_bbox_losses = []
+        st_idx = 0
+        featmap_sizes = [self.featmap_sizes[e] for e in self.featmap_sizes]
         num_total_samples = len(pos_inds) + len(
             neg_inds) if self.sampling else len(pos_inds)
         num_total_samples = max(1, num_total_samples)
-
-        # step2: calc cls loss
-        feat_labels = feat_labels.reshape(-1)
-        feat_label_weights = feat_label_weights.reshape(-1)
-        odm_cls_score = paddle.squeeze(odm_cls_score, axis=0)
-        odm_cls_score1 = odm_cls_score
-
-        feat_labels = paddle.to_tensor(feat_labels)
-        feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
-        feat_labels_one_hot = feat_labels_one_hot[:, 1:]
-        feat_labels_one_hot.stop_gradient = True
-
-        num_total_samples = paddle.to_tensor(
-            num_total_samples, dtype='float32', stop_gradient=True)
-
-        odm_cls = F.sigmoid_focal_loss(
-            odm_cls_score1,
-            feat_labels_one_hot,
-            normalizer=num_total_samples,
-            reduction='none')
-
-        feat_label_weights = feat_label_weights.reshape(
-            feat_label_weights.shape[0], 1)
-        feat_label_weights = np.repeat(
-            feat_label_weights, self.cls_out_channels, axis=1)
-        feat_label_weights = paddle.to_tensor(
-            feat_label_weights, stop_gradient=True)
-
-        odm_cls = odm_cls * feat_label_weights
-        odm_cls_total = paddle.sum(odm_cls)
-
-        # step3: regression loss
-        feat_bbox_targets = paddle.to_tensor(
-            feat_bbox_targets, dtype='float32', stop_gradient=True)
-        feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
-        odm_bbox_pred = paddle.squeeze(odm_bbox_pred, axis=0)
-        odm_bbox_pred = paddle.reshape(odm_bbox_pred, [-1, 5])
-        odm_bbox = self.smooth_l1_loss(odm_bbox_pred, feat_bbox_targets)
-        loss_weight = paddle.to_tensor(
-            self.reg_loss_weight, dtype='float32', stop_gradient=True)
-        odm_bbox = paddle.multiply(odm_bbox, loss_weight)
-        feat_bbox_weights = paddle.to_tensor(
-            feat_bbox_weights, stop_gradient=True)
-        odm_bbox = odm_bbox * feat_bbox_weights
-        odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
-
+        for idx, feat_size in enumerate(featmap_sizes):
+            feat_anchor_num = feat_size[0] * feat_size[1]
+
+            # step1:  get data
+            feat_labels = labels[st_idx:st_idx + feat_anchor_num]
+            feat_label_weights = label_weights[st_idx:st_idx + feat_anchor_num]
+
+            feat_bbox_targets = bbox_targets[st_idx:st_idx + feat_anchor_num, :]
+            feat_bbox_weights = bbox_weights[st_idx:st_idx + feat_anchor_num, :]
+            st_idx += feat_anchor_num
+
+            # step2: calc cls loss
+            feat_labels = feat_labels.reshape(-1)
+            feat_label_weights = feat_label_weights.reshape(-1)
+
+            odm_cls_score = odm_cls_branch_list[idx]
+            odm_cls_score = paddle.squeeze(odm_cls_score, axis=0)
+            odm_cls_score1 = odm_cls_score
+
+            feat_labels = paddle.to_tensor(feat_labels)
+            feat_labels_one_hot = paddle.nn.functional.one_hot(
+                feat_labels, self.cls_out_channels + 1)
+            feat_labels_one_hot = feat_labels_one_hot[:, 1:]
+            feat_labels_one_hot.stop_gradient = True
+
+            num_total_samples = paddle.to_tensor(
+                num_total_samples, dtype='float32', stop_gradient=True)
+            odm_cls = F.sigmoid_focal_loss(
+                odm_cls_score1,
+                feat_labels_one_hot,
+                normalizer=num_total_samples,
+                reduction='none')
+
+            feat_label_weights = feat_label_weights.reshape(
+                feat_label_weights.shape[0], 1)
+            feat_label_weights = np.repeat(
+                feat_label_weights, self.cls_out_channels, axis=1)
+            feat_label_weights = paddle.to_tensor(feat_label_weights)
+            feat_label_weights.stop_gradient = True
+
+            odm_cls = odm_cls * feat_label_weights
+            odm_cls_total = paddle.sum(odm_cls)
+            odm_cls_losses.append(odm_cls_total)
+
+            # # step3: regression loss
+            feat_bbox_targets = paddle.to_tensor(
+                feat_bbox_targets, dtype='float32')
+            feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
+            feat_bbox_targets.stop_gradient = True
+
+            odm_bbox_pred = odm_reg_branch_list[idx]
+            odm_bbox_pred = paddle.squeeze(odm_bbox_pred, axis=0)
+            odm_bbox_pred = paddle.reshape(odm_bbox_pred, [-1, 5])
+            odm_bbox = self.smooth_l1_loss(odm_bbox_pred, feat_bbox_targets)
+            loss_weight = paddle.to_tensor(
+                self.reg_loss_weight, dtype='float32', stop_gradient=True)
+            odm_bbox = paddle.multiply(odm_bbox, loss_weight)
+            feat_bbox_weights = paddle.to_tensor(
+                feat_bbox_weights, stop_gradient=True)
+            odm_bbox = odm_bbox * feat_bbox_weights
+            odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
+            odm_bbox_losses.append(odm_bbox_total)
+
+        odm_cls_loss = paddle.add_n(odm_cls_losses)
         odm_cls_loss_weight = paddle.to_tensor(
-            self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
-        odm_cls_loss = odm_cls_total * odm_cls_loss_weight
-        odm_reg_loss = paddle.add_n(odm_bbox_total)
+            self.cls_loss_weight[1], dtype='float32', stop_gradient=True)
+        odm_cls_loss = odm_cls_loss * odm_cls_loss_weight
+        odm_reg_loss = paddle.add_n(odm_bbox_losses)
         return odm_cls_loss, odm_reg_loss
 
     def get_loss(self, inputs):
@@ -743,38 +705,46 @@ class S2ANetHead(nn.Layer):
             is_crowd = inputs['is_crowd'][im_id].numpy()
             gt_labels = gt_labels + 1
 
+            # featmap_sizes
+            featmap_sizes = [self.featmap_sizes[e] for e in self.featmap_sizes]
+            anchors_list, valid_flag_list = self.get_init_anchors(featmap_sizes,
+                                                                  np_im_shape)
+            anchors_list_all = []
+            for ii, anchor in enumerate(anchors_list):
+                anchor = anchor.reshape(-1, 4)
+                anchor = bbox_utils.rect2rbox(anchor)
+                anchors_list_all.extend(anchor)
+            anchors_list_all = np.array(anchors_list_all)
+
+            # get im_feat
+            fam_cls_feats_list = [e[im_id] for e in self.s2anet_head_out[0]]
+            fam_reg_feats_list = [e[im_id] for e in self.s2anet_head_out[1]]
+            odm_cls_feats_list = [e[im_id] for e in self.s2anet_head_out[2]]
+            odm_reg_feats_list = [e[im_id] for e in self.s2anet_head_out[3]]
+            im_s2anet_head_out = (fam_cls_feats_list, fam_reg_feats_list,
+                                  odm_cls_feats_list, odm_reg_feats_list)
+
             # FAM
-            for idx, rbox_anchors in enumerate(self.rbox_anchors_list):
-                rbox_anchors = rbox_anchors.numpy()
-                rbox_anchors = rbox_anchors.reshape(-1, 5)
-                im_fam_target = self.anchor_assign(rbox_anchors, gt_bboxes,
-                                                   gt_labels, is_crowd)
-                # feat
-                fam_cls_feat = self.s2anet_head_out[0][idx][im_id]
-                fam_reg_feat = self.s2anet_head_out[1][idx][im_id]
-
-                im_s2anet_fam_feat = (fam_cls_feat, fam_reg_feat)
+            im_fam_target = self.anchor_assign(anchors_list_all, gt_bboxes,
+                                               gt_labels, is_crowd)
+            if im_fam_target is not None:
                 im_fam_cls_loss, im_fam_reg_loss = self.get_fam_loss(
-                    im_fam_target, im_s2anet_fam_feat)
+                    im_fam_target, im_s2anet_head_out)
                 fam_cls_loss_lst.append(im_fam_cls_loss)
                 fam_reg_loss_lst.append(im_fam_reg_loss)
 
             # ODM
-            for idx, refine_anchors in enumerate(self.refine_anchor_list):
-                refine_anchors = refine_anchors.numpy()
-                refine_anchors = refine_anchors.reshape(-1, 5)
-                im_odm_target = self.anchor_assign(refine_anchors, gt_bboxes,
-                                                   gt_labels, is_crowd)
-
-                odm_cls_feat = self.s2anet_head_out[2][idx][im_id]
-                odm_reg_feat = self.s2anet_head_out[3][idx][im_id]
+            refine_anchors_list, valid_flag_list = self.get_refine_anchors(
+                featmap_sizes, image_shape=np_im_shape)
+            refine_anchors_list = np.array(refine_anchors_list)
+            im_odm_target = self.anchor_assign(refine_anchors_list, gt_bboxes,
+                                               gt_labels, is_crowd)
 
-                im_s2anet_odm_feat = (odm_cls_feat, odm_reg_feat)
+            if im_odm_target is not None:
                 im_odm_cls_loss, im_odm_reg_loss = self.get_odm_loss(
-                    im_odm_target, im_s2anet_odm_feat)
+                    im_odm_target, im_s2anet_head_out)
                 odm_cls_loss_lst.append(im_odm_cls_loss)
                 odm_reg_loss_lst.append(im_odm_reg_loss)
-
         fam_cls_loss = paddle.add_n(fam_cls_loss_lst)
         fam_reg_loss = paddle.add_n(fam_reg_loss_lst)
         odm_cls_loss = paddle.add_n(odm_cls_loss_lst)
@@ -786,6 +756,65 @@ class S2ANetHead(nn.Layer):
             'odm_reg_loss': odm_reg_loss
         }
 
+    def get_init_anchors(self, featmap_sizes, image_shape):
+        """Get anchors according to feature map sizes.
+
+        Args:
+            featmap_sizes (list[tuple]): Multi-level feature map sizes.
+            image_shape (list[dict]): Image meta info.
+        Returns:
+            tuple: anchors of each image, valid flags of each image
+        """
+        num_levels = len(featmap_sizes)
+
+        # since feature map sizes of all images are the same, we only compute
+        # anchors for one time
+        anchor_list = []
+        for i in range(num_levels):
+            anchors = self.anchor_generators[i].grid_anchors(
+                featmap_sizes[i], self.anchor_strides[i])
+            anchor_list.append(anchors)
+
+        # for each image, we compute valid flags of multi level anchors
+        valid_flag_list = []
+        for i in range(num_levels):
+            anchor_stride = self.anchor_strides[i]
+            feat_h, feat_w = featmap_sizes[i]
+            h, w = image_shape
+            valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
+            valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
+            flags = self.anchor_generators[i].valid_flags(
+                (feat_h, feat_w), (valid_feat_h, valid_feat_w))
+            valid_flag_list.append(flags)
+
+        return anchor_list, valid_flag_list
+
+    def get_refine_anchors(self, featmap_sizes, image_shape):
+        num_levels = len(featmap_sizes)
+
+        refine_anchors_list = []
+        for i in range(num_levels):
+            refine_anchor = self.refine_anchor_list[i]
+            refine_anchor = paddle.squeeze(refine_anchor, axis=0)
+            refine_anchor = refine_anchor.numpy()
+            refine_anchor = np.reshape(refine_anchor,
+                                       [-1, refine_anchor.shape[-1]])
+            refine_anchors_list.extend(refine_anchor)
+
+        # for each image, we compute valid flags of multi level anchors
+        valid_flag_list = []
+        for i in range(num_levels):
+            anchor_stride = self.anchor_strides[i]
+        feat_h, feat_w = featmap_sizes[i]
+        h, w = image_shape
+        valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
+        valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
+        flags = self.anchor_generators[i].valid_flags(
+            (feat_h, feat_w), (valid_feat_h, valid_feat_w))
+        valid_flag_list.append(flags)
+
+        return refine_anchors_list, valid_flag_list
+
     def get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, nms_pre,
                    cls_out_channels, use_sigmoid_cls):
         assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
@@ -819,8 +848,10 @@ class S2ANetHead(nn.Layer):
                 bbox_pred = paddle.gather(bbox_pred, topk_inds)
                 scores = paddle.gather(scores, topk_inds)
 
-            bboxes = self.delta2rbox(anchors, bbox_pred, self.target_means,
-                                     self.target_stds)
+            target_means = (.0, .0, .0, .0, .0)
+            target_stds = (1.0, 1.0, 1.0, 1.0, 1.0)
+            bboxes = bbox_utils.delta2rbox(anchors, bbox_pred, target_means,
+                                           target_stds)
             mlvl_bboxes.append(bboxes)
             mlvl_scores.append(scores)
 
diff --git a/ppdet/modeling/proposal_generator/target_layer.py b/ppdet/modeling/proposal_generator/target_layer.py
index f4576be4ad8d42e6aff25148383d3bd590b7c07e..cc9880a446db6556b68b0f61728975dafaaf6131 100644
--- a/ppdet/modeling/proposal_generator/target_layer.py
+++ b/ppdet/modeling/proposal_generator/target_layer.py
@@ -296,7 +296,7 @@ class RBoxAssigner(object):
             anchors = anchors.reshape(-1, anchors.shape[-1])
         assert anchors.ndim == 2
         anchor_num = anchors.shape[0]
-        anchor_valid = np.ones((anchor_num), np.uint8)
+        anchor_valid = np.ones((anchor_num), np.int32)
         anchor_inds = np.arange(anchor_num)
         return anchor_inds
 
@@ -371,9 +371,8 @@ class RBoxAssigner(object):
         # calc rbox iou
         anchors_xc_yc = anchors_xc_yc.astype(np.float32)
         gt_bboxes_xc_yc = gt_bboxes_xc_yc.astype(np.float32)
-        anchors_xc_yc = paddle.to_tensor(anchors_xc_yc, place=paddle.CPUPlace())
-        gt_bboxes_xc_yc = paddle.to_tensor(
-            gt_bboxes_xc_yc, place=paddle.CPUPlace())
+        anchors_xc_yc = paddle.to_tensor(anchors_xc_yc)
+        gt_bboxes_xc_yc = paddle.to_tensor(gt_bboxes_xc_yc)
 
         try:
             from rbox_iou_ops import rbox_iou
@@ -433,8 +432,7 @@ class RBoxAssigner(object):
         ignore_iof_thr = self.ignore_iof_thr
 
         anchor_num = anchors.shape[0]
-        anchors_inds = self.anchor_valid(anchors)
-        anchors = anchors[anchors_inds]
+
         gt_bboxes = gt_bboxes
         is_crowd_slice = is_crowd
         not_crowd_inds = np.where(is_crowd_slice == 0)