From 01d57c6a634ff98971d027f3aafac9e68edc8e06 Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Fri, 5 Feb 2021 14:33:17 +0800
Subject: [PATCH] fix RCNN dygraph to static (#2184)

* fix RCNN dygraph to static
---
 dygraph/deploy/python/infer.py                | 50 +++--------
 dygraph/deploy/python/visualize.py            | 84 +++++--------------
 dygraph/ppdet/engine/export_utils.py          |  5 +-
 dygraph/ppdet/metrics/coco_utils.py           | 15 +---
 .../modeling/architectures/faster_rcnn.py     | 10 +--
 dygraph/ppdet/modeling/architectures/fcos.py  | 10 +--
 .../ppdet/modeling/architectures/mask_rcnn.py | 11 +--
 .../ppdet/modeling/architectures/ttfnet.py    |  7 +-
 dygraph/ppdet/modeling/architectures/yolo.py  | 10 +--
 dygraph/ppdet/modeling/bbox_utils.py          | 13 ++-
 dygraph/ppdet/modeling/heads/bbox_head.py     |  3 +-
 dygraph/ppdet/modeling/heads/mask_head.py     |  7 +-
 dygraph/ppdet/modeling/layers.py              | 10 +--
 dygraph/ppdet/modeling/post_process.py        | 28 +++++--
 .../proposal_generator/anchor_generator.py    | 23 +++--
 .../modeling/proposal_generator/rpn_head.py   | 16 ++--
 dygraph/ppdet/py_op/post_process.py           | 41 +++++----
 17 files changed, 125 insertions(+), 218 deletions(-)

diff --git a/dygraph/deploy/python/infer.py b/dygraph/deploy/python/infer.py
index 41ddadf1b..53e7693f5 100644
--- a/dygraph/deploy/python/infer.py
+++ b/dygraph/deploy/python/infer.py
@@ -84,16 +84,8 @@ class Detector(object):
             np_boxes[:, 3] *= w
             np_boxes[:, 4] *= h
             np_boxes[:, 5] *= w
-        expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
-        np_boxes = np_boxes[expect_boxes, :]
-        for box in np_boxes:
-            print('class_id:{:d}, confidence:{:.4f},'
-                  'left_top:[{:.2f},{:.2f}],'
-                  ' right_bottom:[{:.2f},{:.2f}]'.format(
-                      int(box[0]), box[1], box[2], box[3], box[4], box[5]))
         results['boxes'] = np_boxes
         if np_masks is not None:
-            np_masks = np_masks[expect_boxes, :, :, :]
             results['masks'] = np_masks
         return results
 
@@ -111,7 +103,7 @@ class Detector(object):
             results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
                             matix element:[class, score, x_min, y_min, x_max, y_max]
                             MaskRCNN's results include 'masks': np.ndarray:
-                            shape:[N, class_num, mask_resolution, mask_resolution]
+                            shape: [N, im_h, im_w]
         '''
         inputs = self.preprocess(image)
         np_boxes, np_masks = None, None
@@ -125,7 +117,7 @@ class Detector(object):
             output_names = self.predictor.get_output_names()
             boxes_tensor = self.predictor.get_output_handle(output_names[0])
             np_boxes = boxes_tensor.copy_to_cpu()
-            if self.pred_config.mask_resolution is not None:
+            if self.pred_config.mask:
                 masks_tensor = self.predictor.get_output_handle(output_names[2])
                 np_masks = masks_tensor.copy_to_cpu()
 
@@ -135,14 +127,7 @@ class Detector(object):
             output_names = self.predictor.get_output_names()
             boxes_tensor = self.predictor.get_output_handle(output_names[0])
             np_boxes = boxes_tensor.copy_to_cpu()
-            score_tensor = self.predictor.get_output_handle(output_names[3])
-            np_score = score_tensor.copy_to_cpu()
-            label_tensor = self.predictor.get_output_handle(output_names[2])
-            np_label = label_tensor.copy_to_cpu()
-            np_boxes = np.concatenate(
-                [np_label[:, np.newaxis], np_score[:, np.newaxis], np_boxes],
-                axis=-1)
-            if self.pred_config.mask_resolution is not None:
+            if self.pred_config.mask:
                 masks_tensor = self.predictor.get_output_handle(output_names[2])
                 np_masks = masks_tensor.copy_to_cpu()
         t2 = time.time()
@@ -196,10 +181,9 @@ class DetectorSOLOv2(Detector):
             image (str/np.ndarray): path of image/ np.ndarray read by cv2
             threshold (float): threshold of predicted box' score
         Returns:
-            results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
-                            matix element:[class, score, x_min, y_min, x_max, y_max]
-                            MaskRCNN's results include 'masks': np.ndarray:
-                            shape:[N, class_num, mask_resolution, mask_resolution]
+            results (dict): 'segm': np.ndarray,shape:[N, im_h, im_w]
+                            'cate_label': label of segm, shape:[N]
+                            'cate_score': confidence score of segm, shape:[N]
         '''
         inputs = self.preprocess(image)
         np_label, np_score, np_segms = None, None, None
@@ -273,9 +257,9 @@ class PredictConfig():
         self.preprocess_infos = yml_conf['Preprocess']
         self.min_subgraph_size = yml_conf['min_subgraph_size']
         self.labels = yml_conf['label_list']
-        self.mask_resolution = None
-        if 'mask_resolution' in yml_conf:
-            self.mask_resolution = yml_conf['mask_resolution']
+        self.mask = False
+        if 'mask' in yml_conf:
+            self.mask = yml_conf['mask']
         self.input_shape = yml_conf['image_shape']
         self.print_config()
 
@@ -355,19 +339,9 @@ def load_predictor(model_dir,
     return predictor
 
 
-def visualize(image_file,
-              results,
-              labels,
-              mask_resolution=14,
-              output_dir='output/',
-              threshold=0.5):
+def visualize(image_file, results, labels, output_dir='output/', threshold=0.5):
     # visualize the predict result
-    im = visualize_box_mask(
-        image_file,
-        results,
-        labels,
-        mask_resolution=mask_resolution,
-        threshold=threshold)
+    im = visualize_box_mask(image_file, results, labels, threshold=threshold)
     img_name = os.path.split(image_file)[-1]
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
@@ -397,7 +371,6 @@ def predict_image(detector):
             FLAGS.image_file,
             results,
             detector.pred_config.labels,
-            mask_resolution=detector.pred_config.mask_resolution,
             output_dir=FLAGS.output_dir,
             threshold=FLAGS.threshold)
 
@@ -431,7 +404,6 @@ def predict_video(detector, camera_id):
             frame,
             results,
             detector.pred_config.labels,
-            mask_resolution=detector.pred_config.mask_resolution,
             threshold=FLAGS.threshold)
         im = np.array(im)
         writer.write(im)
diff --git a/dygraph/deploy/python/visualize.py b/dygraph/deploy/python/visualize.py
index 1c136be4d..fefba9773 100644
--- a/dygraph/deploy/python/visualize.py
+++ b/dygraph/deploy/python/visualize.py
@@ -21,16 +21,15 @@ from PIL import Image, ImageDraw
 from scipy import ndimage
 
 
-def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5):
+def visualize_box_mask(im, results, labels, threshold=0.5):
     """
     Args:
         im (str/np.ndarray): path of image/np.ndarray read by cv2
         results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
                         matix element:[class, score, x_min, y_min, x_max, y_max]
                         MaskRCNN's results include 'masks': np.ndarray:
-                        shape:[N, class_num, mask_resolution, mask_resolution]
+                        shape:[N, im_h, im_w]
         labels (list): labels:['class1', ..., 'classn']
-        mask_resolution (int): shape of a mask is:[mask_resolution, mask_resolution]
         threshold (float): Threshold of score.
     Returns:
         im (PIL.Image.Image): visualized image
@@ -41,13 +40,9 @@ def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5):
         im = Image.fromarray(im)
     if 'masks' in results and 'boxes' in results:
         im = draw_mask(
-            im,
-            results['boxes'],
-            results['masks'],
-            labels,
-            resolution=mask_resolution)
+            im, results['boxes'], results['masks'], labels, threshold=threshold)
     if 'boxes' in results:
-        im = draw_box(im, results['boxes'], labels)
+        im = draw_box(im, results['boxes'], labels, threshold=threshold)
     if 'segm' in results:
         im = draw_segm(
             im,
@@ -80,91 +75,49 @@ def get_color_map_list(num_classes):
     return color_map
 
 
-def expand_boxes(boxes, scale=0.0):
-    """
-    Args:
-        boxes (np.ndarray): shape:[N,4], N:number of box,
-                            matix element:[x_min, y_min, x_max, y_max]
-        scale (float): scale of boxes
-    Returns:
-        boxes_exp (np.ndarray): expanded boxes
-    """
-    w_half = (boxes[:, 2] - boxes[:, 0]) * .5
-    h_half = (boxes[:, 3] - boxes[:, 1]) * .5
-    x_c = (boxes[:, 2] + boxes[:, 0]) * .5
-    y_c = (boxes[:, 3] + boxes[:, 1]) * .5
-    w_half *= scale
-    h_half *= scale
-    boxes_exp = np.zeros(boxes.shape)
-    boxes_exp[:, 0] = x_c - w_half
-    boxes_exp[:, 2] = x_c + w_half
-    boxes_exp[:, 1] = y_c - h_half
-    boxes_exp[:, 3] = y_c + h_half
-    return boxes_exp
-
-
-def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5):
+def draw_mask(im, np_boxes, np_masks, labels, threshold=0.5):
     """
     Args:
         im (PIL.Image.Image): PIL image
         np_boxes (np.ndarray): shape:[N,6], N: number of box,
-                               matix element:[class, score, x_min, y_min, x_max, y_max]
-        np_masks (np.ndarray): shape:[N, class_num, resolution, resolution]
+            matix element:[class, score, x_min, y_min, x_max, y_max]
+        np_masks (np.ndarray): shape:[N, im_h, im_w]
         labels (list): labels:['class1', ..., 'classn']
-        resolution (int): shape of a mask is:[resolution, resolution]
         threshold (float): threshold of mask
     Returns:
         im (PIL.Image.Image): visualized image
     """
     color_list = get_color_map_list(len(labels))
-    scale = (resolution + 2.0) / resolution
-    im_w, im_h = im.size
     w_ratio = 0.4
     alpha = 0.7
     im = np.array(im).astype('float32')
-    rects = np_boxes[:, 2:]
-    expand_rects = expand_boxes(rects, scale)
-    expand_rects = expand_rects.astype(np.int32)
-    clsid_scores = np_boxes[:, 0:2]
-    padded_mask = np.zeros((resolution + 2, resolution + 2), dtype=np.float32)
     clsid2color = {}
-    for idx in range(len(np_boxes)):
-        clsid, score = clsid_scores[idx].tolist()
-        clsid = int(clsid)
-        xmin, ymin, xmax, ymax = expand_rects[idx].tolist()
-        w = xmax - xmin + 1
-        h = ymax - ymin + 1
-        w = np.maximum(w, 1)
-        h = np.maximum(h, 1)
-        padded_mask[1:-1, 1:-1] = np_masks[idx, int(clsid), :, :]
-        resized_mask = cv2.resize(padded_mask, (w, h))
-        resized_mask = np.array(resized_mask > threshold, dtype=np.uint8)
-        x0 = min(max(xmin, 0), im_w)
-        x1 = min(max(xmax + 1, 0), im_w)
-        y0 = min(max(ymin, 0), im_h)
-        y1 = min(max(ymax + 1, 0), im_h)
-        im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
-        im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), (
-            x0 - xmin):(x1 - xmin)]
+    expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
+    np_boxes = np_boxes[expect_boxes, :]
+    np_masks = np_masks[expect_boxes, :, :]
+    for i in range(len(np_masks)):
+        clsid, score = int(np_boxes[i][0]), np_boxes[i][1]
+        mask = np_masks[i]
         if clsid not in clsid2color:
             clsid2color[clsid] = color_list[clsid]
         color_mask = clsid2color[clsid]
         for c in range(3):
             color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
-        idx = np.nonzero(im_mask)
+        idx = np.nonzero(mask)
         color_mask = np.array(color_mask)
         im[idx[0], idx[1], :] *= 1.0 - alpha
         im[idx[0], idx[1], :] += alpha * color_mask
     return Image.fromarray(im.astype('uint8'))
 
 
-def draw_box(im, np_boxes, labels):
+def draw_box(im, np_boxes, labels, threshold=0.5):
     """
     Args:
         im (PIL.Image.Image): PIL image
         np_boxes (np.ndarray): shape:[N,6], N: number of box,
                                matix element:[class, score, x_min, y_min, x_max, y_max]
         labels (list): labels:['class1', ..., 'classn']
+        threshold (float): threshold of box
     Returns:
         im (PIL.Image.Image): visualized image
     """
@@ -172,10 +125,15 @@ def draw_box(im, np_boxes, labels):
     draw = ImageDraw.Draw(im)
     clsid2color = {}
     color_list = get_color_map_list(len(labels))
+    expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
+    np_boxes = np_boxes[expect_boxes, :]
 
     for dt in np_boxes:
         clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
         xmin, ymin, xmax, ymax = bbox
+        print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
+              'right_bottom:[{:.2f},{:.2f}]'.format(
+                  int(clsid), score, xmin, ymin, xmax, ymax))
         w = xmax - xmin
         h = ymax - ymin
         if clsid not in clsid2color:
diff --git a/dygraph/ppdet/engine/export_utils.py b/dygraph/ppdet/engine/export_utils.py
index 85cd8d003..140d61964 100644
--- a/dygraph/ppdet/engine/export_utils.py
+++ b/dygraph/ppdet/engine/export_utils.py
@@ -98,9 +98,8 @@ def _dump_infer_config(config, path, image_shape, model):
             'Architecture: {} is not supported for exporting model now'.format(
                 infer_arch))
         os._exit(0)
-    if 'mask_post_process' in model.__dict__ and model.__dict__[
-            'mask_post_process']:
-        infer_cfg['mask_resolution'] = model.mask_post_process.mask_resolution
+    if 'Mask' in infer_arch:
+        infer_cfg['mask'] = True
     infer_cfg['Preprocess'], infer_cfg[
         'label_list'], image_shape = _parse_reader(
             config['TestReader'], config['TestDataset'], config['metric'],
diff --git a/dygraph/ppdet/metrics/coco_utils.py b/dygraph/ppdet/metrics/coco_utils.py
index 40929d0ad..6a14fd6fa 100644
--- a/dygraph/ppdet/metrics/coco_utils.py
+++ b/dygraph/ppdet/metrics/coco_utils.py
@@ -30,7 +30,7 @@ def get_infer_results(outs, catid, bias=0):
     The output format is dictionary containing bbox or mask result.
 
     For example, bbox result is a list and each element contains
-    image_id, category_id, bbox and score. 
+    image_id, category_id, bbox and score.
     """
     if outs is None or len(outs) == 0:
         raise ValueError(
@@ -42,19 +42,12 @@ def get_infer_results(outs, catid, bias=0):
     infer_res = {}
     if 'bbox' in outs:
         infer_res['bbox'] = get_det_res(
-            outs['bbox'],
-            outs['score'],
-            outs['label'],
-            outs['bbox_num'],
-            im_id,
-            catid,
-            bias=bias)
+            outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
 
     if 'mask' in outs:
         # mask post process
-        infer_res['mask'] = get_seg_res(outs['mask'], outs['score'],
-                                        outs['label'], outs['bbox_num'], im_id,
-                                        catid)
+        infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'],
+                                        outs['bbox_num'], im_id, catid)
 
     if 'segm' in outs:
         infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid)
diff --git a/dygraph/ppdet/modeling/architectures/faster_rcnn.py b/dygraph/ppdet/modeling/architectures/faster_rcnn.py
index e9a4af87b..b7cd9308f 100644
--- a/dygraph/ppdet/modeling/architectures/faster_rcnn.py
+++ b/dygraph/ppdet/modeling/architectures/faster_rcnn.py
@@ -99,13 +99,5 @@ class FasterRCNN(BaseArch):
 
     def get_pred(self):
         bbox_pred, bbox_num = self._forward()
-        label = bbox_pred[:, 0]
-        score = bbox_pred[:, 1]
-        bbox = bbox_pred[:, 2:]
-        output = {
-            'bbox': bbox,
-            'score': score,
-            'label': label,
-            'bbox_num': bbox_num
-        }
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
         return output
diff --git a/dygraph/ppdet/modeling/architectures/fcos.py b/dygraph/ppdet/modeling/architectures/fcos.py
index c8db8c0be..6f71c8992 100644
--- a/dygraph/ppdet/modeling/architectures/fcos.py
+++ b/dygraph/ppdet/modeling/architectures/fcos.py
@@ -91,13 +91,5 @@ class FCOS(BaseArch):
 
     def get_pred(self):
         bboxes, bbox_num = self._forward()
-        label = bboxes[:, 0]
-        score = bboxes[:, 1]
-        bbox = bboxes[:, 2:]
-        output = {
-            'bbox': bbox,
-            'score': score,
-            'label': label,
-            'bbox_num': bbox_num
-        }
+        output = {'bbox': bboxes, 'bbox_num': bbox_num}
         return output
diff --git a/dygraph/ppdet/modeling/architectures/mask_rcnn.py b/dygraph/ppdet/modeling/architectures/mask_rcnn.py
index 05b05c137..3b5618655 100644
--- a/dygraph/ppdet/modeling/architectures/mask_rcnn.py
+++ b/dygraph/ppdet/modeling/architectures/mask_rcnn.py
@@ -124,14 +124,5 @@ class MaskRCNN(BaseArch):
 
     def get_pred(self):
         bbox_pred, bbox_num, mask_pred = self._forward()
-        label = bbox_pred[:, 0]
-        score = bbox_pred[:, 1]
-        bbox = bbox_pred[:, 2:]
-        output = {
-            'label': label,
-            'score': score,
-            'bbox': bbox,
-            'bbox_num': bbox_num,
-            'mask': mask_pred,
-        }
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
         return output
diff --git a/dygraph/ppdet/modeling/architectures/ttfnet.py b/dygraph/ppdet/modeling/architectures/ttfnet.py
index 181aa7f0d..c3eb61c87 100644
--- a/dygraph/ppdet/modeling/architectures/ttfnet.py
+++ b/dygraph/ppdet/modeling/architectures/ttfnet.py
@@ -91,13 +91,8 @@ class TTFNet(BaseArch):
 
     def get_pred(self):
         bbox_pred, bbox_num = self._forward()
-        label = bbox_pred[:, 0]
-        score = bbox_pred[:, 1]
-        bbox = bbox_pred[:, 2:]
         output = {
-            "bbox": bbox,
-            'score': score,
-            'label': label,
+            "bbox": bbox_pred,
             "bbox_num": bbox_num,
         }
         return output
diff --git a/dygraph/ppdet/modeling/architectures/yolo.py b/dygraph/ppdet/modeling/architectures/yolo.py
index 42391bb98..a9d2a6e76 100644
--- a/dygraph/ppdet/modeling/architectures/yolo.py
+++ b/dygraph/ppdet/modeling/architectures/yolo.py
@@ -61,13 +61,5 @@ class YOLOv3(BaseArch):
 
     def get_pred(self):
         bbox_pred, bbox_num = self._forward()
-        label = bbox_pred[:, 0]
-        score = bbox_pred[:, 1]
-        bbox = bbox_pred[:, 2:]
-        output = {
-            'bbox': bbox,
-            'score': score,
-            'label': label,
-            'bbox_num': bbox_num
-        }
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
         return output
diff --git a/dygraph/ppdet/modeling/bbox_utils.py b/dygraph/ppdet/modeling/bbox_utils.py
index 51f963a60..505cea8e5 100644
--- a/dygraph/ppdet/modeling/bbox_utils.py
+++ b/dygraph/ppdet/modeling/bbox_utils.py
@@ -39,8 +39,6 @@ def bbox2delta(src_boxes, tgt_boxes, weights):
 
 def delta2bbox(deltas, boxes, weights):
     clip_scale = math.log(1000.0 / 16)
-    if boxes.shape[0] == 0:
-        return paddle.zeros((0, deltas.shape[1]), dtype='float32')
 
     widths = boxes[:, 2] - boxes[:, 0]
     heights = boxes[:, 3] - boxes[:, 1]
@@ -61,12 +59,13 @@ def delta2bbox(deltas, boxes, weights):
     pred_w = paddle.exp(dw) * widths.unsqueeze(1)
     pred_h = paddle.exp(dh) * heights.unsqueeze(1)
 
-    pred_boxes = paddle.zeros_like(deltas)
+    pred_boxes = []
+    pred_boxes.append(pred_ctr_x - 0.5 * pred_w)
+    pred_boxes.append(pred_ctr_y - 0.5 * pred_h)
+    pred_boxes.append(pred_ctr_x + 0.5 * pred_w)
+    pred_boxes.append(pred_ctr_y + 0.5 * pred_h)
+    pred_boxes = paddle.stack(pred_boxes, axis=-1)
 
-    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
-    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
-    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
-    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
     return pred_boxes
 
 
diff --git a/dygraph/ppdet/modeling/heads/bbox_head.py b/dygraph/ppdet/modeling/heads/bbox_head.py
index a2af5f303..f897d799d 100644
--- a/dygraph/ppdet/modeling/heads/bbox_head.py
+++ b/dygraph/ppdet/modeling/heads/bbox_head.py
@@ -141,8 +141,7 @@ class BBoxHead(nn.Layer):
 
         rois_feat = self.roi_extractor(body_feats, rois, rois_num)
         bbox_feat = self.head(rois_feat)
-        #if self.with_pool:
-        if len(bbox_feat.shape) > 2 and bbox_feat.shape[-1] > 1:
+        if self.with_pool:
             feat = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
             feat = paddle.squeeze(feat, axis=[2, 3])
         else:
diff --git a/dygraph/ppdet/modeling/heads/mask_head.py b/dygraph/ppdet/modeling/heads/mask_head.py
index 3ccf23ba8..9a5243d95 100644
--- a/dygraph/ppdet/modeling/heads/mask_head.py
+++ b/dygraph/ppdet/modeling/heads/mask_head.py
@@ -182,11 +182,12 @@ class MaskHead(nn.Layer):
                 mask_out = F.sigmoid(mask_logit)
             else:
                 num_masks = mask_logit.shape[0]
-                pred_masks = paddle.split(mask_logit, num_masks)
                 mask_out = []
                 # TODO: need to optimize gather
-                for i, pred_mask in enumerate(pred_masks):
-                    mask = paddle.gather(pred_mask, labels[i], axis=1)
+                for i in range(mask_logit.shape[0]):
+                    pred_masks = paddle.unsqueeze(
+                        mask_logit[i, :, :, :], axis=0)
+                    mask = paddle.gather(pred_masks, labels[i], axis=1)
                     mask_out.append(mask)
                 mask_out = F.sigmoid(paddle.concat(mask_out))
         return mask_out
diff --git a/dygraph/ppdet/modeling/layers.py b/dygraph/ppdet/modeling/layers.py
index 2b96e2c98..a499d528d 100644
--- a/dygraph/ppdet/modeling/layers.py
+++ b/dygraph/ppdet/modeling/layers.py
@@ -316,14 +316,12 @@ class RCNNBox(object):
 
         # [N, C*4]
         bbox = paddle.concat(roi)
-        bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var)
+        if bbox.shape[0] == 0:
+            bbox = paddle.zeros([0, bbox_pred.shape[1]], dtype='float32')
+        else:
+            bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var)
         scores = cls_prob[:, :-1]
 
-        # [N*C, 4]
-
-        bbox_num_class = bbox.shape[1] // 4
-        bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4])
-
         origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1)
         origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1)
         zeros = paddle.zeros_like(origin_h)
diff --git a/dygraph/ppdet/modeling/post_process.py b/dygraph/ppdet/modeling/post_process.py
index 4cfc6beae..232c0d859 100644
--- a/dygraph/ppdet/modeling/post_process.py
+++ b/dygraph/ppdet/modeling/post_process.py
@@ -54,8 +54,6 @@ class BBoxPostProcess(object):
                                including labels, scores and bboxes. The size of
                                bboxes are corresponding to the original image.
         """
-        if bboxes.shape[0] == 0:
-            return paddle.zeros(shape=[1, 6])
 
         origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
 
@@ -65,9 +63,12 @@ class BBoxPostProcess(object):
         for i in range(bbox_num.shape[0]):
             expand_shape = paddle.expand(origin_shape[i:i + 1, :],
                                          [bbox_num[i], 2])
-            scale_y, scale_x = scale_factor[i]
+            scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
             scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
             expand_scale = paddle.expand(scale, [bbox_num[i], 4])
+            # TODO: Because paddle.expand transform error when dygraph
+            # to static, use reshape to avoid mistakes.
+            expand_scale = paddle.reshape(expand_scale, [bbox_num[i], 4])
             origin_shape_list.append(expand_shape)
             scale_factor_list.append(expand_scale)
 
@@ -121,6 +122,10 @@ class MaskPostProcess(object):
 
         gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]])
         gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]])
+        # TODO: Because paddle.expand transform error when dygraph
+        # to static, use reshape to avoid mistakes.
+        gx = paddle.reshape(gx, [N, img_y.shape[1], img_x.shape[2]])
+        gy = paddle.reshape(gy, [N, img_y.shape[1], img_x.shape[2]])
         grid = paddle.stack([gx, gy], axis=3)
         img_masks = F.grid_sample(masks, grid, align_corners=False)
         return img_masks[:, 0]
@@ -129,19 +134,24 @@ class MaskPostProcess(object):
         """
         Paste the mask prediction to the original image.
         """
-        assert bboxes.shape[0] > 0, 'There is no detection output'
-
         num_mask = mask_out.shape[0]
-        # TODO: support bs > 1
+        origin_shape = paddle.cast(origin_shape, 'int32')
+        # TODO: support bs > 1 and mask output dtype is bool
         pred_result = paddle.zeros(
-            [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='bool')
+            [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='int32')
+        if bboxes.shape[0] == 0:
+            return pred_result
+
         # TODO: optimize chunk paste
+        pred_result = []
         for i in range(bboxes.shape[0]):
-            im_h, im_w = origin_shape[i]
+            im_h, im_w = origin_shape[i][0], origin_shape[i][1]
             pred_mask = self.paste_mask(mask_out[i], bboxes[i:i + 1, 2:], im_h,
                                         im_w)
             pred_mask = pred_mask >= self.binary_thresh
-            pred_result[i] = pred_mask
+            pred_mask = paddle.cast(pred_mask, 'int32')
+            pred_result.append(pred_mask)
+        pred_result = paddle.concat(pred_result)
         return pred_result
 
 
diff --git a/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py b/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py
index dcb5bf0c7..1ca0319d3 100644
--- a/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py
+++ b/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py
@@ -24,7 +24,7 @@ from .. import ops
 
 
 @register
-class AnchorGenerator(object):
+class AnchorGenerator(nn.Layer):
     def __init__(self,
                  anchor_sizes=[32, 64, 128, 256, 512],
                  aspect_ratios=[0.5, 1.0, 2.0],
@@ -64,17 +64,21 @@ class AnchorGenerator(object):
             self.generate_cell_anchors(s, a)
             for s, a in zip(sizes, aspect_ratios)
         ]
+        [
+            self.register_buffer(
+                t.name, t, persistable=False) for t in cell_anchors
+        ]
         return cell_anchors
 
     def _create_grid_offsets(self, size, stride, offset):
-        grid_height, grid_width = size
+        grid_height, grid_width = size[0], size[1]
         shifts_x = paddle.arange(
             offset * stride, grid_width * stride, step=stride, dtype='float32')
         shifts_y = paddle.arange(
             offset * stride, grid_height * stride, step=stride, dtype='float32')
         shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x)
-        shift_x = shift_x.reshape([-1])
-        shift_y = shift_y.reshape([-1])
+        shift_x = paddle.reshape(shift_x, [-1])
+        shift_y = paddle.reshape(shift_y, [-1])
         return shift_x, shift_y
 
     def _grid_anchors(self, grid_sizes):
@@ -84,14 +88,15 @@ class AnchorGenerator(object):
             shift_x, shift_y = self._create_grid_offsets(size, stride,
                                                          self.offset)
             shifts = paddle.stack((shift_x, shift_y, shift_x, shift_y), axis=1)
+            shifts = paddle.reshape(shifts, [-1, 1, 4])
+            base_anchors = paddle.reshape(base_anchors, [1, -1, 4])
 
-            anchors.append((shifts.reshape([-1, 1, 4]) + base_anchors.reshape(
-                [1, -1, 4])).reshape([-1, 4]))
+            anchors.append(paddle.reshape(shifts + base_anchors, [-1, 4]))
 
         return anchors
 
-    def __call__(self, input):
-        grid_sizes = [feature_map.shape[-2:] for feature_map in input]
+    def forward(self, input):
+        grid_sizes = [paddle.shape(feature_map)[-2:] for feature_map in input]
         anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
         return anchors_over_all_feature_maps
 
@@ -105,4 +110,4 @@ class AnchorGenerator(object):
                 ratios and 5 sizes, the number of anchors is 15.
                 For FPN models, `num_anchors` on every feature map is the same.
         """
-        return self.cell_anchors[0].shape[0]
+        return len(self.cell_anchors[0])
diff --git a/dygraph/ppdet/modeling/proposal_generator/rpn_head.py b/dygraph/ppdet/modeling/proposal_generator/rpn_head.py
index c7a615cfb..be399be9e 100644
--- a/dygraph/ppdet/modeling/proposal_generator/rpn_head.py
+++ b/dygraph/ppdet/modeling/proposal_generator/rpn_head.py
@@ -108,7 +108,14 @@ class RPNHead(nn.Layer):
 
         anchors = self.anchor_generator(rpn_feats)
 
-        rois, rois_num = self._gen_proposal(scores, deltas, anchors, inputs)
+        # TODO: Fix batch_size > 1 when testing.
+        if self.training:
+            batch_size = im_shape.shape[0]
+        else:
+            batch_size = 1
+
+        rois, rois_num = self._gen_proposal(scores, deltas, anchors, inputs,
+                                            batch_size)
 
         if self.training:
             loss = self.get_loss(scores, deltas, anchors, inputs)
@@ -116,16 +123,15 @@ class RPNHead(nn.Layer):
         else:
             return rois, rois_num, None
 
-    def _gen_proposal(self, scores, bbox_deltas, anchors, inputs):
+    def _gen_proposal(self, scores, bbox_deltas, anchors, inputs, batch_size):
         """
-        scores (list[Tensor]): Multi-level scores prediction 
+        scores (list[Tensor]): Multi-level scores prediction
         bbox_deltas (list[Tensor]): Multi-level deltas prediction
-        anchors (list[Tensor]): Multi-level anchors 
+        anchors (list[Tensor]): Multi-level anchors
         inputs (dict): ground truth info
         """
         prop_gen = self.train_proposal if self.training else self.test_proposal
         im_shape = inputs['im_shape']
-        batch_size = im_shape.shape[0]
         rpn_rois_list = [[] for i in range(batch_size)]
         rpn_prob_list = [[] for i in range(batch_size)]
         rpn_rois_num_list = [[] for i in range(batch_size)]
diff --git a/dygraph/ppdet/py_op/post_process.py b/dygraph/ppdet/py_op/post_process.py
index e7c5d9dbf..41d366787 100755
--- a/dygraph/ppdet/py_op/post_process.py
+++ b/dygraph/ppdet/py_op/post_process.py
@@ -1,29 +1,35 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import six
 import os
 import numpy as np
 import cv2
 
 
-def get_det_res(bboxes,
-                scores,
-                labels,
-                bbox_nums,
-                image_id,
-                label_to_cat_id_map,
-                bias=0):
+def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
     det_res = []
     k = 0
     for i in range(len(bbox_nums)):
         cur_image_id = int(image_id[i][0])
         det_nums = bbox_nums[i]
         for j in range(det_nums):
-            box = bboxes[k]
-            score = float(scores[k])
-            label = int(labels[k])
-            if label < 0: continue
+            dt = bboxes[k]
             k = k + 1
-            xmin, ymin, xmax, ymax = box.tolist()
-            category_id = label_to_cat_id_map[label]
+            num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
             w = xmax - xmin + bias
             h = ymax - ymin + bias
             bbox = [xmin, ymin, w, h]
@@ -37,8 +43,7 @@ def get_det_res(bboxes,
     return det_res
 
 
-def get_seg_res(masks, scores, labels, mask_nums, image_id,
-                label_to_cat_id_map):
+def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
     import pycocotools.mask as mask_util
     seg_res = []
     k = 0
@@ -46,9 +51,9 @@ def get_seg_res(masks, scores, labels, mask_nums, image_id,
         cur_image_id = int(image_id[i][0])
         det_nums = mask_nums[i]
         for j in range(det_nums):
-            mask = masks[k]
-            score = float(scores[k])
-            label = int(labels[k])
+            mask = masks[k].astype(np.uint8)
+            score = float(bboxes[k][1])
+            label = int(bboxes[k][0])
             k = k + 1
             cat_id = label_to_cat_id_map[label]
             rle = mask_util.encode(
-- 
GitLab