diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
index b5fb9017b84a62ab1da7f05518d622903cbce53d..ede3ffdfa4944f636c1204d3010278dede28990c 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
@@ -53,6 +53,7 @@ class CascadeMaskRCNN(object):
                  bbox_assigner='CascadeBBoxAssigner',
                  mask_assigner='MaskAssigner',
                  mask_head='MaskHead',
+                 rpn_only=False,
                  fpn='FPN'):
         super(CascadeMaskRCNN, self).__init__()
         assert fpn is not None, "cascade RCNN requires FPN"
@@ -64,6 +65,7 @@ class CascadeMaskRCNN(object):
         self.bbox_head = bbox_head
         self.mask_assigner = mask_assigner
         self.mask_head = mask_head
+        self.rpn_only = rpn_only
         # Cascade local cfg
         self.cls_agnostic_bbox_reg = 2
         (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights
@@ -191,8 +193,9 @@ class CascadeMaskRCNN(object):
                 roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
 
             bbox_pred = self.bbox_head.get_prediction(
-                im_info, roi_feat_list, rcnn_pred_list, proposal_list,
-                self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg)
+                im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
+                proposal_list, self.cascade_bbox_reg_weights,
+                self.cls_agnostic_bbox_reg)
 
             bbox_pred = bbox_pred['bbox']
 
@@ -204,7 +207,11 @@ class CascadeMaskRCNN(object):
             cond = fluid.layers.less_than(x=bbox_size, y=size)
 
             mask_pred = fluid.layers.create_global_var(
-                shape=[1], value=0.0, dtype='float32', persistable=False)
+                shape=[1],
+                value=0.0,
+                dtype='float32',
+                persistable=False,
+                name='mask_pred')
 
             with fluid.layers.control_flow.Switch() as switch:
                 with switch.case(cond):
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
index 42b940b3d487a5a25847c7ee2fd02e8aec8b1ac9..133281e4f13fa16bebd5031fd4241c1bb94f5124 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
@@ -72,7 +72,19 @@ class CascadeRCNN(object):
 
     def build(self, feed_vars, mode='train'):
         im = feed_vars['image']
+        assert mode in ['train', 'test'], \
+            "only 'train' and 'test' mode is supported"
+        if mode == 'train':
+            required_fields = [
+                'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
+            ]
+        else:
+            required_fields = ['im_shape', 'im_info']
+        for var in required_fields:
+            assert var in feed_vars, \
+                "{} has no {} field".format(feed_vars, var)
         im_info = feed_vars['im_info']
+
         if mode == 'train':
             gt_box = feed_vars['gt_box']
             is_crowd = feed_vars['is_crowd']
@@ -92,7 +104,8 @@ class CascadeRCNN(object):
             rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
         else:
             if self.rpn_only:
-                im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
+                im_scale = fluid.layers.slice(
+                    im_info, [1], starts=[2], ends=[3])
                 im_scale = fluid.layers.sequence_expand(im_scale, rois)
                 rois = rois / im_scale
                 return {'proposal': rois}
@@ -143,8 +156,9 @@ class CascadeRCNN(object):
             return loss
         else:
             pred = self.bbox_head.get_prediction(
-                im_info, roi_feat_list, rcnn_pred_list, proposal_list,
-                self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg)
+                im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
+                proposal_list, self.cascade_bbox_reg_weights,
+                self.cls_agnostic_bbox_reg)
             return pred
 
     def _decode_box(self, proposals, bbox_pred, curr_stage):
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
index 7bdb64b989ceea5da0668f7a49f45cdd24a9a830..ce2fb63d756b378cba026a41ce329cc310cbcffc 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
@@ -274,11 +274,14 @@ class BBoxHead(object):
         Get prediction bounding box in test stage.
 
         Args:
+            roi_feat (Variable): RoI feature from RoIExtractor.
             rois (Variable): Output of generate_proposals in rpn head.
             im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
                 number of input images, each element consists of im_height,
                 im_width, im_scale.
-            cls_score (Variable), bbox_pred(Variable): Output of get_output.
+            im_shape (Variable): Actual shape of original image with shape
+                [B, 3]. B is the number of images, each element consists of 
+                original_height, original_width, 1
 
         Returns:
             pred_result(Variable): Prediction result with shape [N, 6]. Each
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
index 58936b3d4a07ab018d7b7268f92970d4e436ed35..7c4a3b8c7387c35c0605651eda4aa0e51c06dbf5 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
@@ -139,6 +139,7 @@ class CascadeBBoxHead(object):
 
     def get_prediction(self,
                        im_info,
+                       im_shape,
                        roi_feat_list,
                        rcnn_pred_list,
                        proposal_list,
@@ -151,6 +152,9 @@ class CascadeBBoxHead(object):
             im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
                 number of input images, each element consists
                 of im_height, im_width, im_scale.
+            im_shape (Variable): Actual shape of original image with shape
+                [B, 3]. B is the number of images, each element consists of
+                original_height, original_width, 1
             rois_feat_list (List): RoI feature from RoIExtractor.
             rcnn_pred_list (Variable): Cascade rcnn's head's output
                 including bbox_pred and cls_score
@@ -197,7 +201,8 @@ class CascadeBBoxHead(object):
             # only use fg box delta to decode box
             bbox_pred_new = fluid.layers.slice(
                 bbox_pred_new, axes=[1], starts=[1], ends=[2])
-            bbox_pred_new = fluid.layers.expand(bbox_pred_new, [1, self.num_classes, 1])
+            bbox_pred_new = fluid.layers.expand(bbox_pred_new,
+                                                [1, self.num_classes, 1])
         decoded_box = fluid.layers.box_coder(
             prior_box=proposals_boxes,
             prior_box_var=bbox_reg_w,
@@ -206,8 +211,7 @@ class CascadeBBoxHead(object):
             box_normalized=False,
             axis=1)
 
-        # TODO: notice detectron use img.shape
-        box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_info)
+        box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
 
         pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
         return {"bbox": pred_result}