diff --git a/ppdet/modeling/heads/pico_head.py b/ppdet/modeling/heads/pico_head.py
index b26048f52a88e1002bf02bd3961f028b14edaf62..dbdc1cfc6abcc6dd13a1e53eee162c5f3a001b8d 100644
--- a/ppdet/modeling/heads/pico_head.py
+++ b/ppdet/modeling/heads/pico_head.py
@@ -460,18 +460,15 @@ class PicoHeadV2(GFLHead):
                         act=self.act,
                         use_act_in_out=False))
 
-    def forward(self, fpn_feats, deploy=False):
+    def forward(self, fpn_feats, export_post_process=True):
         assert len(fpn_feats) == len(
             self.fpn_stride
         ), "The size of fpn_feats is not equal to size of fpn_stride"
-        anchors, _, num_anchors_list, stride_tensor_list = generate_anchors_for_grid_cell(
-            fpn_feats, self.fpn_stride, self.grid_cell_scale, self.cell_offset)
-        anchors_split = paddle.split(anchors, num_anchors_list)
 
         cls_score_list, reg_list, box_list = [], [], []
-        for i, fpn_feat, anchor, stride, align_cls in zip(
-                range(len(self.fpn_stride)), fpn_feats, anchors_split,
-                self.fpn_stride, self.cls_align):
+        for i, fpn_feat, stride, align_cls in zip(
+                range(len(self.fpn_stride)), fpn_feats, self.fpn_stride,
+                self.cls_align):
             b, _, h, w = get_static_shape(fpn_feat)
             # task decomposition
             conv_cls_feat, se_feat = self.conv_feat(fpn_feat, i)
@@ -485,22 +482,35 @@ class PicoHeadV2(GFLHead):
             else:
                 cls_score = F.sigmoid(cls_logit)
 
-            anchor_centers = bbox_center(anchor).unsqueeze(0) / stride
-            anchor_centers = anchor_centers.reshape([1, h, w, 2])
-
-            pred_distances = self.distribution_project(
-                reg_pred.transpose([0, 2, 3, 1])).reshape([b, h, w, 4])
-            reg_bbox = batch_distance2bbox(
-                anchor_centers, pred_distances, max_shapes=None)
-            if not self.training:
+            if not export_post_process and not self.training:
+                # Now only supports batch size = 1 in deploy
                 cls_score_list.append(
-                    cls_score.transpose([0, 2, 3, 1]).reshape(
-                        [b, -1, self.cls_out_channels]))
-                box_list.append(reg_bbox.reshape([b, -1, 4]) * stride)
+                    cls_score.reshape([1, self.cls_out_channels, -1]).transpose(
+                        [0, 2, 1]))
+                box_list.append(
+                    reg_pred.reshape([1, (self.reg_max + 1) * 4, -1]).transpose(
+                        [0, 2, 1]))
             else:
-                cls_score_list.append(cls_score.flatten(2).transpose([0, 2, 1]))
-                reg_list.append(reg_pred.flatten(2).transpose([0, 2, 1]))
-                box_list.append(reg_bbox.reshape([b, -1, 4]))
+                cls_score_out = cls_score.transpose([0, 2, 3, 1])
+                bbox_pred = reg_pred.transpose([0, 2, 3, 1])
+                b, cell_h, cell_w, _ = paddle.shape(cls_score_out)
+                y, x = self.get_single_level_center_point(
+                    [cell_h, cell_w], stride, cell_offset=self.cell_offset)
+                center_points = paddle.stack([x, y], axis=-1)
+                cls_score_out = cls_score_out.reshape(
+                    [b, -1, self.cls_out_channels])
+                bbox_pred = self.distribution_project(bbox_pred) * stride
+                bbox_pred = bbox_pred.reshape([b, cell_h * cell_w, 4])
+                bbox_pred = batch_distance2bbox(
+                    center_points, bbox_pred, max_shapes=None)
+                if not self.training:
+                    cls_score_list.append(cls_score_out)
+                    box_list.append(bbox_pred)
+                else:
+                    cls_score_list.append(
+                        cls_score.flatten(2).transpose([0, 2, 1]))
+                    reg_list.append(reg_pred.flatten(2).transpose([0, 2, 1]))
+                    box_list.append(bbox_pred / stride)
 
         if not self.training:
             return cls_score_list, box_list
@@ -508,16 +518,19 @@ class PicoHeadV2(GFLHead):
             cls_score_list = paddle.concat(cls_score_list, axis=1)
             box_list = paddle.concat(box_list, axis=1)
             reg_list = paddle.concat(reg_list, axis=1)
-            return cls_score_list, reg_list, box_list, anchors, num_anchors_list, stride_tensor_list
+            return cls_score_list, reg_list, box_list, fpn_feats
 
     def get_loss(self, head_outs, gt_meta):
-        pred_scores, pred_regs, pred_bboxes, anchors, num_anchors_list, stride_tensor_list = head_outs
+        pred_scores, pred_regs, pred_bboxes, fpn_feats = head_outs
         gt_labels = gt_meta['gt_class']
         gt_bboxes = gt_meta['gt_bbox']
         gt_scores = gt_meta['gt_score'] if 'gt_score' in gt_meta else None
         num_imgs = gt_meta['im_id'].shape[0]
         pad_gt_mask = gt_meta['pad_gt_mask']
 
+        anchors, _, num_anchors_list, stride_tensor_list = generate_anchors_for_grid_cell(
+            fpn_feats, self.fpn_stride, self.grid_cell_scale, self.cell_offset)
+
         centers = bbox_center(anchors)
 
         # label assignment