From 01002e40f93ed8d59c2e96cb9f4d7d32bbd87f99 Mon Sep 17 00:00:00 2001
From: jm12138 <2286040843@qq.com>
Date: Fri, 16 Sep 2022 14:27:14 +0800
Subject: [PATCH] update faster_rcnn_resnet50_fpn_coco2017 (#1948)

* update faster_rcnn_resnet50_fpn_coco2017

* update unittest faster_rcnn_resnet50_fpn_coco2017

* update unittest

* update unittest

* update gpu config

* update

* add clean func

* update save inference model

Co-authored-by: chenjian <chenjian26@baidu.com>
---
 .../README.md                                 |  17 +-
 .../README_en.md                              |  17 +-
 .../bbox_assigner.py                          |  20 -
 .../bbox_head.py                              | 270 ---------
 .../data_feed.py                              |   3 -
 .../faster_rcnn_resnet50_fpn_coco2017/fpn.py  | 296 ----------
 .../module.py                                 | 302 ++--------
 .../name_adapter.py                           |  61 --
 .../nonlocal_helper.py                        | 154 -----
 .../processor.py                              |   8 +-
 .../resnet.py                                 | 447 ---------------
 .../roi_extractor.py                          |  76 ---
 .../rpn_head.py                               | 533 ------------------
 .../faster_rcnn_resnet50_fpn_coco2017/test.py | 108 ++++
 14 files changed, 162 insertions(+), 2150 deletions(-)
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
 delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
 create mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
index 9d003b80..ef5324a7 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
@@ -102,19 +102,13 @@
 
 
   - ```python
-    def save_inference_model(dirname,
-                             model_filename=None,
-                             params_filename=None,
-                             combined=True)
+    def save_inference_model(dirname)
     ```
     - 将模型保存到指定路径。
 
     - **参数**
 
-      - dirname: 存在模型的目录名称； <br/>
-      - model\_filename: 模型文件名称，默认为\_\_model\_\_； <br/>
-      - params\_filename: 参数文件名称，默认为\_\_params\_\_(仅当`combined`为True时生效)；<br/>
-      - combined: 是否将参数保存到统一的一个文件中。
+      - dirname: 模型保存路径 <br/>
 
 
 ## 四、服务部署
@@ -167,6 +161,11 @@
 * 1.0.1
 
   修复numpy数据读取问题
+
+* 1.1.0
+
+  移除 fluid api
+
   - ```shell
-    $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1
+    $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0
     ```
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md
index d90beb64..bf4c7274 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md
@@ -101,19 +101,13 @@
 
 
   - ```python
-    def save_inference_model(dirname,
-                             model_filename=None,
-                             params_filename=None,
-                             combined=True)
+    def save_inference_model(dirname)
     ```
     - Save model to specific path
 
     - **Parameters**
 
-      - dirname: output dir for saving model
-      - model\_filename: filename for saving model
-      - params\_filename: filename for saving parameters
-      - combined: whether save parameters into one file
+      - dirname: model save path
 
 
 ## IV.Server Deployment
@@ -166,6 +160,11 @@
 * 1.0.1
 
   Fix the problem of reading numpy
+
+* 1.1.0
+
+  Remove fluid api
+
   - ```shell
-    $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1
+    $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0
     ```
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
deleted file mode 100644
index d033382c..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
+++ /dev/null
@@ -1,20 +0,0 @@
-class BBoxAssigner(object):
-    # __op__ = fluid.layers.generate_proposal_labels
-    def __init__(self,
-                 batch_size_per_im=512,
-                 fg_fraction=.25,
-                 fg_thresh=.5,
-                 bg_thresh_hi=.5,
-                 bg_thresh_lo=0.,
-                 bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
-                 class_nums=81,
-                 shuffle_before_sample=True):
-        super(BBoxAssigner, self).__init__()
-        self.batch_size_per_im = batch_size_per_im
-        self.fg_fraction = fg_fraction
-        self.fg_thresh = fg_thresh
-        self.bg_thresh_hi = bg_thresh_hi
-        self.bg_thresh_lo = bg_thresh_lo
-        self.bbox_reg_weights = bbox_reg_weights
-        self.class_nums = class_nums
-        self.use_random = shuffle_before_sample
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
deleted file mode 100644
index 8080ed22..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import OrderedDict
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Normal, Xavier
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.initializer import MSRA
-
-
-class MultiClassNMS(object):
-    # __op__ = fluid.layers.multiclass_nms
-    def __init__(self,
-                 score_threshold=.05,
-                 nms_top_k=-1,
-                 keep_top_k=100,
-                 nms_threshold=.5,
-                 normalized=False,
-                 nms_eta=1.0,
-                 background_label=0):
-        super(MultiClassNMS, self).__init__()
-        self.score_threshold = score_threshold
-        self.nms_top_k = nms_top_k
-        self.keep_top_k = keep_top_k
-        self.nms_threshold = nms_threshold
-        self.normalized = normalized
-        self.nms_eta = nms_eta
-        self.background_label = background_label
-
-
-class SmoothL1Loss(object):
-    '''
-    Smooth L1 loss
-    Args:
-        sigma (float): hyper param in smooth l1 loss
-    '''
-
-    def __init__(self, sigma=1.0):
-        super(SmoothL1Loss, self).__init__()
-        self.sigma = sigma
-
-    def __call__(self, x, y, inside_weight=None, outside_weight=None):
-        return fluid.layers.smooth_l1(
-            x,
-            y,
-            inside_weight=inside_weight,
-            outside_weight=outside_weight,
-            sigma=self.sigma)
-
-
-class BoxCoder(object):
-    def __init__(self,
-                 prior_box_var=[0.1, 0.1, 0.2, 0.2],
-                 code_type='decode_center_size',
-                 box_normalized=False,
-                 axis=1):
-        super(BoxCoder, self).__init__()
-        self.prior_box_var = prior_box_var
-        self.code_type = code_type
-        self.box_normalized = box_normalized
-        self.axis = axis
-
-
-class TwoFCHead(object):
-    """
-    RCNN head with two Fully Connected layers
-
-    Args:
-        mlp_dim (int): num of filters for the fc layers
-    """
-
-    def __init__(self, mlp_dim=1024):
-        super(TwoFCHead, self).__init__()
-        self.mlp_dim = mlp_dim
-
-    def __call__(self, roi_feat):
-        fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
-
-        fc6 = fluid.layers.fc(
-            input=roi_feat,
-            size=self.mlp_dim,
-            act='relu',
-            name='fc6',
-            param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)),
-            bias_attr=ParamAttr(
-                name='fc6_b', learning_rate=2., regularizer=L2Decay(0.)))
-        head_feat = fluid.layers.fc(
-            input=fc6,
-            size=self.mlp_dim,
-            act='relu',
-            name='fc7',
-            param_attr=ParamAttr(name='fc7_w', initializer=Xavier()),
-            bias_attr=ParamAttr(
-                name='fc7_b', learning_rate=2., regularizer=L2Decay(0.)))
-
-        return head_feat
-
-
-class BBoxHead(object):
-    """
-    RCNN bbox head
-
-    Args:
-        head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead`
-        box_coder (object): `BoxCoder` instance
-        nms (object): `MultiClassNMS` instance
-        num_classes: number of output classes
-    """
-    __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss']
-    __shared__ = ['num_classes']
-
-    def __init__(self,
-                 head,
-                 box_coder=BoxCoder(),
-                 nms=MultiClassNMS(),
-                 bbox_loss=SmoothL1Loss(),
-                 num_classes=81):
-        super(BBoxHead, self).__init__()
-        self.head = head
-        self.num_classes = num_classes
-        self.box_coder = box_coder
-        self.nms = nms
-        self.bbox_loss = bbox_loss
-        self.head_feat = None
-
-    def get_head_feat(self, input=None):
-        """
-        Get the bbox head feature map.
-        """
-
-        if input is not None:
-            feat = self.head(input)
-            if isinstance(feat, OrderedDict):
-                feat = list(feat.values())[0]
-            self.head_feat = feat
-        return self.head_feat
-
-    def _get_output(self, roi_feat):
-        """
-        Get bbox head output.
-
-        Args:
-            roi_feat (Variable): RoI feature from RoIExtractor.
-
-        Returns:
-            cls_score(Variable): Output of rpn head with shape of
-                [N, num_anchors, H, W].
-            bbox_pred(Variable): Output of rpn head with shape of
-                [N, num_anchors * 4, H, W].
-        """
-        head_feat = self.get_head_feat(roi_feat)
-        # when ResNetC5 output a single feature map
-        if not isinstance(self.head, TwoFCHead):
-            head_feat = fluid.layers.pool2d(
-                head_feat, pool_type='avg', global_pooling=True)
-        cls_score = fluid.layers.fc(
-            input=head_feat,
-            size=self.num_classes,
-            act=None,
-            name='cls_score',
-            param_attr=ParamAttr(
-                name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)),
-            bias_attr=ParamAttr(
-                name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.)))
-        bbox_pred = fluid.layers.fc(
-            input=head_feat,
-            size=4 * self.num_classes,
-            act=None,
-            name='bbox_pred',
-            param_attr=ParamAttr(
-                name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)),
-            bias_attr=ParamAttr(
-                name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.)))
-        return cls_score, bbox_pred
-
-    def get_loss(self, roi_feat, labels_int32, bbox_targets,
-                 bbox_inside_weights, bbox_outside_weights):
-        """
-        Get bbox_head loss.
-
-        Args:
-            roi_feat (Variable): RoI feature from RoIExtractor.
-            labels_int32(Variable): Class label of a RoI with shape [P, 1].
-                P is the number of RoI.
-            bbox_targets(Variable): Box label of a RoI with shape
-                [P, 4 * class_nums].
-            bbox_inside_weights(Variable): Indicates whether a box should
-                contribute to loss. Same shape as bbox_targets.
-            bbox_outside_weights(Variable): Indicates whether a box should
-                contribute to loss. Same shape as bbox_targets.
-
-        Return:
-            Type: Dict
-                loss_cls(Variable): bbox_head loss.
-                loss_bbox(Variable): bbox_head loss.
-        """
-
-        cls_score, bbox_pred = self._get_output(roi_feat)
-
-        labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64')
-        labels_int64.stop_gradient = True
-        loss_cls = fluid.layers.softmax_with_cross_entropy(
-            logits=cls_score, label=labels_int64, numeric_stable_mode=True)
-        loss_cls = fluid.layers.reduce_mean(loss_cls)
-        loss_bbox = self.bbox_loss(
-            x=bbox_pred,
-            y=bbox_targets,
-            inside_weight=bbox_inside_weights,
-            outside_weight=bbox_outside_weights)
-        loss_bbox = fluid.layers.reduce_mean(loss_bbox)
-        return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
-
-    def get_prediction(self,
-                       roi_feat,
-                       rois,
-                       im_info,
-                       im_shape,
-                       return_box_score=False):
-        """
-        Get prediction bounding box in test stage.
-
-        Args:
-            roi_feat (Variable): RoI feature from RoIExtractor.
-            rois (Variable): Output of generate_proposals in rpn head.
-            im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
-                number of input images, each element consists of im_height,
-                im_width, im_scale.
-            im_shape (Variable): Actual shape of original image with shape
-                [B, 3]. B is the number of images, each element consists of
-                original_height, original_width, 1
-
-        Returns:
-            pred_result(Variable): Prediction result with shape [N, 6]. Each
-                row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
-                N is the total number of prediction.
-        """
-        cls_score, bbox_pred = self._get_output(roi_feat)
-
-        im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
-        im_scale = fluid.layers.sequence_expand(im_scale, rois)
-        boxes = rois / im_scale
-        cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
-        bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
-        # self.box_coder
-        decoded_box = fluid.layers.box_coder(
-            prior_box=boxes,
-            target_box=bbox_pred,
-            prior_box_var=self.box_coder.prior_box_var,
-            code_type=self.box_coder.code_type,
-            box_normalized=self.box_coder.box_normalized,
-            axis=self.box_coder.axis)
-        cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
-        if return_box_score:
-            return {'bbox': cliped_box, 'score': cls_prob}
-        # self.nms
-        pred_result = fluid.layers.multiclass_nms(
-            bboxes=cliped_box,
-            scores=cls_prob,
-            score_threshold=self.nms.score_threshold,
-            nms_top_k=self.nms.nms_top_k,
-            keep_top_k=self.nms.keep_top_k,
-            nms_threshold=self.nms.nms_threshold,
-            normalized=self.nms.normalized,
-            nms_eta=self.nms.nms_eta,
-            background_label=self.nms.background_label)
-        return pred_result
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
index b38501e5..c9e52d54 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
@@ -4,12 +4,9 @@ from __future__ import print_function
 from __future__ import division
 
 import os
-from collections import OrderedDict
 
 import cv2
 import numpy as np
-from PIL import Image, ImageEnhance
-from paddle import fluid
 
 __all__ = ['test_reader']
 
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py
deleted file mode 100644
index bd19c712..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-from collections import OrderedDict
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Xavier
-from paddle.fluid.regularizer import L2Decay
-
-__all__ = ['ConvNorm', 'FPN']
-
-
-def ConvNorm(input,
-             num_filters,
-             filter_size,
-             stride=1,
-             groups=1,
-             norm_decay=0.,
-             norm_type='affine_channel',
-             norm_groups=32,
-             dilation=1,
-             lr_scale=1,
-             freeze_norm=False,
-             act=None,
-             norm_name=None,
-             initializer=None,
-             name=None):
-    fan = num_filters
-    conv = fluid.layers.conv2d(
-        input=input,
-        num_filters=num_filters,
-        filter_size=filter_size,
-        stride=stride,
-        padding=((filter_size - 1) // 2) * dilation,
-        dilation=dilation,
-        groups=groups,
-        act=None,
-        param_attr=ParamAttr(
-            name=name + "_weights",
-            initializer=initializer,
-            learning_rate=lr_scale),
-        bias_attr=False,
-        name=name + '.conv2d.output.1')
-
-    norm_lr = 0. if freeze_norm else 1.
-    pattr = ParamAttr(
-        name=norm_name + '_scale',
-        learning_rate=norm_lr * lr_scale,
-        regularizer=L2Decay(norm_decay))
-    battr = ParamAttr(
-        name=norm_name + '_offset',
-        learning_rate=norm_lr * lr_scale,
-        regularizer=L2Decay(norm_decay))
-
-    if norm_type in ['bn', 'sync_bn']:
-        global_stats = True if freeze_norm else False
-        out = fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            name=norm_name + '.output.1',
-            param_attr=pattr,
-            bias_attr=battr,
-            moving_mean_name=norm_name + '_mean',
-            moving_variance_name=norm_name + '_variance',
-            use_global_stats=global_stats)
-        scale = fluid.framework._get_var(pattr.name)
-        bias = fluid.framework._get_var(battr.name)
-    elif norm_type == 'gn':
-        out = fluid.layers.group_norm(
-            input=conv,
-            act=act,
-            name=norm_name + '.output.1',
-            groups=norm_groups,
-            param_attr=pattr,
-            bias_attr=battr)
-        scale = fluid.framework._get_var(pattr.name)
-        bias = fluid.framework._get_var(battr.name)
-    elif norm_type == 'affine_channel':
-        scale = fluid.layers.create_parameter(
-            shape=[conv.shape[1]],
-            dtype=conv.dtype,
-            attr=pattr,
-            default_initializer=fluid.initializer.Constant(1.))
-        bias = fluid.layers.create_parameter(
-            shape=[conv.shape[1]],
-            dtype=conv.dtype,
-            attr=battr,
-            default_initializer=fluid.initializer.Constant(0.))
-        out = fluid.layers.affine_channel(
-            x=conv, scale=scale, bias=bias, act=act)
-    if freeze_norm:
-        scale.stop_gradient = True
-        bias.stop_gradient = True
-    return out
-
-
-class FPN(object):
-    """
-    Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
-
-    Args:
-        num_chan (int): number of feature channels
-        min_level (int): lowest level of the backbone feature map to use
-        max_level (int): highest level of the backbone feature map to use
-        spatial_scale (list): feature map scaling factor
-        has_extra_convs (bool): whether has extral convolutions in higher levels
-        norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel'
-    """
-    __shared__ = ['norm_type', 'freeze_norm']
-
-    def __init__(self,
-                 num_chan=256,
-                 min_level=2,
-                 max_level=6,
-                 spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.],
-                 has_extra_convs=False,
-                 norm_type=None,
-                 freeze_norm=False):
-        self.freeze_norm = freeze_norm
-        self.num_chan = num_chan
-        self.min_level = min_level
-        self.max_level = max_level
-        self.spatial_scale = spatial_scale
-        self.has_extra_convs = has_extra_convs
-        self.norm_type = norm_type
-
-    def _add_topdown_lateral(self, body_name, body_input, upper_output):
-        lateral_name = 'fpn_inner_' + body_name + '_lateral'
-        topdown_name = 'fpn_topdown_' + body_name
-        fan = body_input.shape[1]
-        if self.norm_type:
-            initializer = Xavier(fan_out=fan)
-            lateral = ConvNorm(
-                body_input,
-                self.num_chan,
-                1,
-                initializer=initializer,
-                norm_type=self.norm_type,
-                freeze_norm=self.freeze_norm,
-                name=lateral_name,
-                norm_name=lateral_name)
-        else:
-            lateral = fluid.layers.conv2d(
-                body_input,
-                self.num_chan,
-                1,
-                param_attr=ParamAttr(
-                    name=lateral_name + "_w", initializer=Xavier(fan_out=fan)),
-                bias_attr=ParamAttr(
-                    name=lateral_name + "_b",
-                    learning_rate=2.,
-                    regularizer=L2Decay(0.)),
-                name=lateral_name)
-        topdown = fluid.layers.resize_nearest(
-            upper_output, scale=2., name=topdown_name)
-        return lateral + topdown
-
-    def get_output(self, body_dict):
-        """
-        Add FPN onto backbone.
-
-        Args:
-            body_dict(OrderedDict): Dictionary of variables and each element is the
-                output of backbone.
-
-        Return:
-            fpn_dict(OrderedDict): A dictionary represents the output of FPN with
-                their name.
-            spatial_scale(list): A list of multiplicative spatial scale factor.
-        """
-        spatial_scale = copy.deepcopy(self.spatial_scale)
-        body_name_list = list(body_dict.keys())[::-1]
-        num_backbone_stages = len(body_name_list)
-        self.fpn_inner_output = [[] for _ in range(num_backbone_stages)]
-        fpn_inner_name = 'fpn_inner_' + body_name_list[0]
-        body_input = body_dict[body_name_list[0]]
-        fan = body_input.shape[1]
-        if self.norm_type:
-            initializer = Xavier(fan_out=fan)
-            self.fpn_inner_output[0] = ConvNorm(
-                body_input,
-                self.num_chan,
-                1,
-                initializer=initializer,
-                norm_type=self.norm_type,
-                freeze_norm=self.freeze_norm,
-                name=fpn_inner_name,
-                norm_name=fpn_inner_name)
-        else:
-            self.fpn_inner_output[0] = fluid.layers.conv2d(
-                body_input,
-                self.num_chan,
-                1,
-                param_attr=ParamAttr(
-                    name=fpn_inner_name + "_w",
-                    initializer=Xavier(fan_out=fan)),
-                bias_attr=ParamAttr(
-                    name=fpn_inner_name + "_b",
-                    learning_rate=2.,
-                    regularizer=L2Decay(0.)),
-                name=fpn_inner_name)
-        for i in range(1, num_backbone_stages):
-            body_name = body_name_list[i]
-            body_input = body_dict[body_name]
-            top_output = self.fpn_inner_output[i - 1]
-            fpn_inner_single = self._add_topdown_lateral(
-                body_name, body_input, top_output)
-            self.fpn_inner_output[i] = fpn_inner_single
-        fpn_dict = {}
-        fpn_name_list = []
-        for i in range(num_backbone_stages):
-            fpn_name = 'fpn_' + body_name_list[i]
-            fan = self.fpn_inner_output[i].shape[1] * 3 * 3
-            if self.norm_type:
-                initializer = Xavier(fan_out=fan)
-                fpn_output = ConvNorm(
-                    self.fpn_inner_output[i],
-                    self.num_chan,
-                    3,
-                    initializer=initializer,
-                    norm_type=self.norm_type,
-                    freeze_norm=self.freeze_norm,
-                    name=fpn_name,
-                    norm_name=fpn_name)
-            else:
-                fpn_output = fluid.layers.conv2d(
-                    self.fpn_inner_output[i],
-                    self.num_chan,
-                    filter_size=3,
-                    padding=1,
-                    param_attr=ParamAttr(
-                        name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
-                    bias_attr=ParamAttr(
-                        name=fpn_name + "_b",
-                        learning_rate=2.,
-                        regularizer=L2Decay(0.)),
-                    name=fpn_name)
-            fpn_dict[fpn_name] = fpn_output
-            fpn_name_list.append(fpn_name)
-        if not self.has_extra_convs and self.max_level - self.min_level == len(
-                spatial_scale):
-            body_top_name = fpn_name_list[0]
-            body_top_extension = fluid.layers.pool2d(
-                fpn_dict[body_top_name],
-                1,
-                'max',
-                pool_stride=2,
-                name=body_top_name + '_subsampled_2x')
-            fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension
-            fpn_name_list.insert(0, body_top_name + '_subsampled_2x')
-            spatial_scale.insert(0, spatial_scale[0] * 0.5)
-        # Coarser FPN levels introduced for RetinaNet
-        highest_backbone_level = self.min_level + len(spatial_scale) - 1
-        if self.has_extra_convs and self.max_level > highest_backbone_level:
-            fpn_blob = body_dict[body_name_list[0]]
-            for i in range(highest_backbone_level + 1, self.max_level + 1):
-                fpn_blob_in = fpn_blob
-                fpn_name = 'fpn_' + str(i)
-                if i > highest_backbone_level + 1:
-                    fpn_blob_in = fluid.layers.relu(fpn_blob)
-                fan = fpn_blob_in.shape[1] * 3 * 3
-                fpn_blob = fluid.layers.conv2d(
-                    input=fpn_blob_in,
-                    num_filters=self.num_chan,
-                    filter_size=3,
-                    stride=2,
-                    padding=1,
-                    param_attr=ParamAttr(
-                        name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
-                    bias_attr=ParamAttr(
-                        name=fpn_name + "_b",
-                        learning_rate=2.,
-                        regularizer=L2Decay(0.)),
-                    name=fpn_name)
-                fpn_dict[fpn_name] = fpn_blob
-                fpn_name_list.insert(0, fpn_name)
-                spatial_scale.insert(0, spatial_scale[0] * 0.5)
-        res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
-        return res_dict, spatial_scale
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
index b8dd5afa..65049189 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
@@ -6,41 +6,32 @@ from __future__ import print_function
 import os
 import ast
 import argparse
-from collections import OrderedDict
-from functools import partial
 from math import ceil
 
+import paddle
 import numpy as np
-import paddle.fluid as fluid
-import paddlehub as hub
+import paddle.jit
+import paddle.static
 from paddlehub.module.module import moduleinfo, runnable, serving
-from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
-from paddlehub.io.parser import txt_parser
-from paddlehub.common.paddle_helper import add_vars_prefix
-
-from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
-from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
-from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN
-from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet
-from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead
-from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead
-from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner
-from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign
+from paddle.inference import Config, create_predictor
+from paddlehub.utils.parser import txt_parser
+from .processor import load_label_info, postprocess, base64_to_cv2
+from .data_feed import test_reader, padding_minibatch
 
 
 @moduleinfo(
     name="faster_rcnn_resnet50_fpn_coco2017",
-    version="1.0.1",
+    version="1.1.0",
     type="cv/object_detection",
     summary=
     "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks",
     author="paddlepaddle",
     author_email="paddle-dev@baidu.com")
-class FasterRCNNResNet50RPN(hub.Module):
-    def _initialize(self):
+class FasterRCNNResNet50RPN:
+    def __init__(self):
         # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
         self.default_pretrained_model_path = os.path.join(
-            self.directory, "faster_rcnn_resnet50_fpn_model")
+            self.directory, "faster_rcnn_resnet50_fpn_model", "model")
         self.label_names = load_label_info(
             os.path.join(self.directory, "label_file.txt"))
         self._set_config()
@@ -49,10 +40,12 @@ class FasterRCNNResNet50RPN(hub.Module):
         """
         predictor config setting
         """
-        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
+        model = self.default_pretrained_model_path+'.pdmodel'
+        params = self.default_pretrained_model_path+'.pdiparams'
+        cpu_config = Config(model, params)
         cpu_config.disable_glog_info()
         cpu_config.disable_gpu()
-        self.cpu_predictor = create_paddle_predictor(cpu_config)
+        self.cpu_predictor = create_predictor(cpu_config)
 
         try:
             _places = os.environ["CUDA_VISIBLE_DEVICES"]
@@ -61,245 +54,14 @@ class FasterRCNNResNet50RPN(hub.Module):
         except:
             use_gpu = False
         if use_gpu:
-            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
+            gpu_config = Config(model, params)
             gpu_config.disable_glog_info()
             gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
-            self.gpu_predictor = create_paddle_predictor(gpu_config)
-
-    def context(self,
-                num_classes=81,
-                trainable=True,
-                pretrained=True,
-                phase='train'):
-        """
-        Distill the Head Features, so as to perform transfer learning.
-
-        Args:
-            num_classes (int): number of categories
-            trainable (bool): whether to set parameters trainable.
-            pretrained (bool): whether to load default pretrained model.
-            phase (str): optional choices are 'train' and 'predict'.
-
-        Returns:
-             inputs (dict): the input variables.
-             outputs (dict): the output variables.
-             context_prog (Program): the program to execute transfer learning.
-        """
-        context_prog = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(context_prog, startup_program):
-            with fluid.unique_name.guard():
-                image = fluid.layers.data(
-                    name='image', shape=[-1, 3, -1, -1], dtype='float32')
-                # backbone
-                backbone = ResNet(
-                    norm_type='affine_channel',
-                    depth=50,
-                    feature_maps=[2, 3, 4, 5],
-                    freeze_at=2)
-                body_feats = backbone(image)
-                # fpn
-                fpn = FPN(
-                    max_level=6,
-                    min_level=2,
-                    num_chan=256,
-                    spatial_scale=[0.03125, 0.0625, 0.125, 0.25])
-                var_prefix = '@HUB_{}@'.format(self.name)
-                im_info = fluid.layers.data(
-                    name='im_info', shape=[3], dtype='float32', lod_level=0)
-                im_shape = fluid.layers.data(
-                    name='im_shape', shape=[3], dtype='float32', lod_level=0)
-                body_feat_names = list(body_feats.keys())
-                body_feats, spatial_scale = fpn.get_output(body_feats)
-                # rpn_head: RPNHead
-                rpn_head = self.rpn_head()
-                rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
-                # train
-                if phase == 'train':
-                    gt_bbox = fluid.layers.data(
-                        name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
-                    is_crowd = fluid.layers.data(
-                        name='is_crowd', shape=[1], dtype='int32', lod_level=1)
-                    gt_class = fluid.layers.data(
-                        name='gt_class', shape=[1], dtype='int32', lod_level=1)
-                    rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
-                    # bbox_assigner: BBoxAssigner
-                    bbox_assigner = self.bbox_assigner(num_classes)
-                    outs = fluid.layers.generate_proposal_labels(
-                        rpn_rois=rois,
-                        gt_classes=gt_class,
-                        is_crowd=is_crowd,
-                        gt_boxes=gt_bbox,
-                        im_info=im_info,
-                        batch_size_per_im=bbox_assigner.batch_size_per_im,
-                        fg_fraction=bbox_assigner.fg_fraction,
-                        fg_thresh=bbox_assigner.fg_thresh,
-                        bg_thresh_hi=bbox_assigner.bg_thresh_hi,
-                        bg_thresh_lo=bbox_assigner.bg_thresh_lo,
-                        bbox_reg_weights=bbox_assigner.bbox_reg_weights,
-                        class_nums=bbox_assigner.class_nums,
-                        use_random=bbox_assigner.use_random)
-                    rois = outs[0]
-
-                roi_extractor = self.roi_extractor()
-                roi_feat = roi_extractor(
-                    head_inputs=body_feats,
-                    rois=rois,
-                    spatial_scale=spatial_scale)
-                # head_feat
-                bbox_head = self.bbox_head(num_classes)
-                head_feat = bbox_head.head(roi_feat)
-                if isinstance(head_feat, OrderedDict):
-                    head_feat = list(head_feat.values())[0]
-                if phase == 'train':
-                    inputs = {
-                        'image': var_prefix + image.name,
-                        'im_info': var_prefix + im_info.name,
-                        'im_shape': var_prefix + im_shape.name,
-                        'gt_class': var_prefix + gt_class.name,
-                        'gt_bbox': var_prefix + gt_bbox.name,
-                        'is_crowd': var_prefix + is_crowd.name
-                    }
-                    outputs = {
-                        'head_features':
-                        var_prefix + head_feat.name,
-                        'rpn_cls_loss':
-                        var_prefix + rpn_loss['rpn_cls_loss'].name,
-                        'rpn_reg_loss':
-                        var_prefix + rpn_loss['rpn_reg_loss'].name,
-                        'generate_proposal_labels':
-                        [var_prefix + var.name for var in outs]
-                    }
-                elif phase == 'predict':
-                    pred = bbox_head.get_prediction(roi_feat, rois, im_info,
-                                                    im_shape)
-                    inputs = {
-                        'image': var_prefix + image.name,
-                        'im_info': var_prefix + im_info.name,
-                        'im_shape': var_prefix + im_shape.name
-                    }
-                    outputs = {
-                        'head_features': var_prefix + head_feat.name,
-                        'rois': var_prefix + rois.name,
-                        'bbox_out': var_prefix + pred.name
-                    }
-                add_vars_prefix(context_prog, var_prefix)
-                add_vars_prefix(startup_program, var_prefix)
-
-                global_vars = context_prog.global_block().vars
-                inputs = {
-                    key: global_vars[value]
-                    for key, value in inputs.items()
-                }
-                outputs = {
-                    key: global_vars[value] if not isinstance(value, list) else
-                    [global_vars[var] for var in value]
-                    for key, value in outputs.items()
-                }
-
-                for param in context_prog.global_block().iter_parameters():
-                    param.trainable = trainable
-
-                place = fluid.CPUPlace()
-                exe = fluid.Executor(place)
-                exe.run(startup_program)
-                if pretrained:
-
-                    def _if_exist(var):
-                        if num_classes != 81:
-                            if 'bbox_pred' in var.name or 'cls_score' in var.name:
-                                return False
-                        return os.path.exists(
-                            os.path.join(self.default_pretrained_model_path,
-                                         var.name))
-
-                    fluid.io.load_vars(
-                        exe,
-                        self.default_pretrained_model_path,
-                        predicate=_if_exist)
-                return inputs, outputs, context_prog
-
-    def rpn_head(self):
-        return FPNRPNHead(
-            anchor_generator=AnchorGenerator(
-                anchor_sizes=[32, 64, 128, 256, 512],
-                aspect_ratios=[0.5, 1.0, 2.0],
-                stride=[16.0, 16.0],
-                variance=[1.0, 1.0, 1.0, 1.0]),
-            rpn_target_assign=RPNTargetAssign(
-                rpn_batch_size_per_im=256,
-                rpn_fg_fraction=0.5,
-                rpn_negative_overlap=0.3,
-                rpn_positive_overlap=0.7,
-                rpn_straddle_thresh=0.0),
-            train_proposal=GenerateProposals(
-                min_size=0.0,
-                nms_thresh=0.7,
-                post_nms_top_n=2000,
-                pre_nms_top_n=2000),
-            test_proposal=GenerateProposals(
-                min_size=0.0,
-                nms_thresh=0.7,
-                post_nms_top_n=1000,
-                pre_nms_top_n=1000),
-            anchor_start_size=32,
-            num_chan=256,
-            min_level=2,
-            max_level=6)
-
-    def roi_extractor(self):
-        return FPNRoIAlign(
-            canconical_level=4,
-            canonical_size=224,
-            max_level=5,
-            min_level=2,
-            box_resolution=7,
-            sampling_ratio=2)
-
-    def bbox_head(self, num_classes):
-        return BBoxHead(
-            head=TwoFCHead(mlp_dim=1024),
-            nms=MultiClassNMS(
-                keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
-            num_classes=num_classes)
-
-    def bbox_assigner(self, num_classes):
-        return BBoxAssigner(
-            batch_size_per_im=512,
-            bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
-            bg_thresh_hi=0.5,
-            bg_thresh_lo=0.0,
-            fg_fraction=0.25,
-            fg_thresh=0.5,
-            class_nums=num_classes)
-
-    def save_inference_model(self,
-                             dirname,
-                             model_filename=None,
-                             params_filename=None,
-                             combined=True):
-        if combined:
-            model_filename = "__model__" if not model_filename else model_filename
-            params_filename = "__params__" if not params_filename else params_filename
-        place = fluid.CPUPlace()
-        exe = fluid.Executor(place)
-
-        program, feeded_var_names, target_vars = fluid.io.load_inference_model(
-            dirname=self.default_pretrained_model_path, executor=exe)
-
-        fluid.io.save_inference_model(
-            dirname=dirname,
-            main_program=program,
-            executor=exe,
-            feeded_var_names=feeded_var_names,
-            target_vars=target_vars,
-            model_filename=model_filename,
-            params_filename=params_filename)
+            self.gpu_predictor = create_predictor(gpu_config)
 
     def object_detection(self,
                          paths=None,
                          images=None,
-                         data=None,
                          use_gpu=False,
                          batch_size=1,
                          output_dir='detection_result',
@@ -337,8 +99,6 @@ class FasterRCNNResNet50RPN(hub.Module):
                 )
 
         paths = paths if paths else list()
-        if data and 'image' in data:
-            paths += data['image']
 
         all_images = list()
         for yield_data in test_reader(paths, images):
@@ -360,29 +120,37 @@ class FasterRCNNResNet50RPN(hub.Module):
 
             padding_image, padding_info, padding_shape = padding_minibatch(
                 batch_data, coarsest_stride=32, use_padded_im_info=True)
-            padding_image_tensor = PaddleTensor(padding_image.copy())
-            padding_info_tensor = PaddleTensor(padding_info.copy())
-            padding_shape_tensor = PaddleTensor(padding_shape.copy())
             feed_list = [
-                padding_image_tensor, padding_info_tensor, padding_shape_tensor
+                padding_image, padding_info, padding_shape
             ]
 
-            if use_gpu:
-                data_out = self.gpu_predictor.run(feed_list)
-            else:
-                data_out = self.cpu_predictor.run(feed_list)
+            predictor = self.gpu_predictor if use_gpu else self.cpu_predictor
+
+            feed_list = [
+                padding_image, padding_info, padding_shape
+            ]
+
+            input_names = predictor.get_input_names()
+            
+            for i, input_name in enumerate(input_names):
+                data = np.asarray(feed_list[i], dtype=np.float32)
+                handle = predictor.get_input_handle(input_name)
+                handle.copy_from_cpu(data)
+            
+            predictor.run()
+            output_names = predictor.get_output_names()
+            output_handle = predictor.get_output_handle(output_names[0])
 
             output = postprocess(
                 paths=paths,
                 images=images,
-                data_out=data_out,
+                data_out=output_handle,
                 score_thresh=score_thresh,
                 label_names=self.label_names,
                 output_dir=output_dir,
                 handle_id=handle_id,
                 visualization=visualization)
             res += output
-
         return res
 
     def add_module_config_arg(self):
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py
deleted file mode 100644
index bebf8bde..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# coding=utf-8
-
-
-class NameAdapter(object):
-    """Fix the backbones variable names for pretrained weight"""
-
-    def __init__(self, model):
-        super(NameAdapter, self).__init__()
-        self.model = model
-
-    @property
-    def model_type(self):
-        return getattr(self.model, '_model_type', '')
-
-    @property
-    def variant(self):
-        return getattr(self.model, 'variant', '')
-
-    def fix_conv_norm_name(self, name):
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        # the naming rule is same as pretrained weight
-        if self.model_type == 'SEResNeXt':
-            bn_name = name + "_bn"
-        return bn_name
-
-    def fix_shortcut_name(self, name):
-        if self.model_type == 'SEResNeXt':
-            name = 'conv' + name + '_prj'
-        return name
-
-    def fix_bottleneck_name(self, name):
-        if self.model_type == 'SEResNeXt':
-            conv_name1 = 'conv' + name + '_x1'
-            conv_name2 = 'conv' + name + '_x2'
-            conv_name3 = 'conv' + name + '_x3'
-            shortcut_name = name
-        else:
-            conv_name1 = name + "_branch2a"
-            conv_name2 = name + "_branch2b"
-            conv_name3 = name + "_branch2c"
-            shortcut_name = name + "_branch1"
-        return conv_name1, conv_name2, conv_name3, shortcut_name
-
-    def fix_layer_warp_name(self, stage_num, count, i):
-        name = 'res' + str(stage_num)
-        if count > 10 and stage_num == 4:
-            if i == 0:
-                conv_name = name + "a"
-            else:
-                conv_name = name + "b" + str(i)
-        else:
-            conv_name = name + chr(ord("a") + i)
-        if self.model_type == 'SEResNeXt':
-            conv_name = str(stage_num + 2) + '_' + str(i + 1)
-        return conv_name
-
-    def fix_c1_stage_name(self):
-        return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py
deleted file mode 100644
index 599b8dfa..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import paddle.fluid as fluid
-from paddle.fluid import ParamAttr
-
-nonlocal_params = {
-    "use_zero_init_conv": False,
-    "conv_init_std": 0.01,
-    "no_bias": True,
-    "use_maxpool": False,
-    "use_softmax": True,
-    "use_bn": False,
-    "use_scale": True,  # vital for the model prformance!!!
-    "use_affine": False,
-    "bn_momentum": 0.9,
-    "bn_epsilon": 1.0000001e-5,
-    "bn_init_gamma": 0.9,
-    "weight_decay_bn": 1.e-4,
-}
-
-
-def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner,
-                   max_pool_stride=2):
-    cur = input
-    theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \
-                                filter_size = [1, 1], stride = [1, 1], \
-                                padding = [0, 0], \
-                                param_attr=ParamAttr(name = prefix + '_theta' + "_w", \
-                                    initializer = fluid.initializer.Normal(loc = 0.0,
-                                    scale = nonlocal_params["conv_init_std"])), \
-                                bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \
-                                    initializer = fluid.initializer.Constant(value = 0.)) \
-                                        if not nonlocal_params["no_bias"] else False, \
-                                name = prefix + '_theta')
-    theta_shape = theta.shape
-    theta_shape_op = fluid.layers.shape(theta)
-    theta_shape_op.stop_gradient = True
-
-    if nonlocal_params["use_maxpool"]:
-        max_pool = fluid.layers.pool2d(input = cur, \
-                                        pool_size = [max_pool_stride, max_pool_stride], \
-                                        pool_type = 'max', \
-                                        pool_stride = [max_pool_stride, max_pool_stride], \
-                                        pool_padding = [0, 0], \
-                                        name = prefix + '_pool')
-    else:
-        max_pool = cur
-
-    phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \
-                             filter_size = [1, 1], stride = [1, 1], \
-                             padding = [0, 0], \
-                             param_attr = ParamAttr(name = prefix + '_phi' + "_w", \
-                                 initializer = fluid.initializer.Normal(loc = 0.0,
-                                 scale = nonlocal_params["conv_init_std"])), \
-                             bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \
-                                 initializer = fluid.initializer.Constant(value = 0.)) \
-                                      if (nonlocal_params["no_bias"] == 0) else False, \
-                             name = prefix + '_phi')
-    phi_shape = phi.shape
-
-    g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \
-                 filter_size = [1, 1], stride = [1, 1], \
-                 padding = [0, 0], \
-                 param_attr = ParamAttr(name = prefix + '_g' + "_w", \
-                     initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \
-                 bias_attr = ParamAttr(name = prefix + '_g' + "_b", \
-                     initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \
-                 name = prefix + '_g')
-    g_shape = g.shape
-    # we have to use explicit batch size (to support arbitrary spacetime size)
-    # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784)
-    theta = fluid.layers.reshape(theta, shape=(0, 0, -1))
-    theta = fluid.layers.transpose(theta, [0, 2, 1])
-    phi = fluid.layers.reshape(phi, [0, 0, -1])
-    theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity')
-    g = fluid.layers.reshape(g, [0, 0, -1])
-
-    if nonlocal_params["use_softmax"]:
-        if nonlocal_params["use_scale"]:
-            theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5)
-        else:
-            theta_phi_sc = theta_phi
-        p = fluid.layers.softmax(
-            theta_phi_sc, name=prefix + '_affinity' + '_prob')
-    else:
-        # not clear about what is doing in xlw's code
-        p = None  # not implemented
-        raise "Not implemented when not use softmax"
-
-    # note g's axis[2] corresponds to p's axis[2]
-    # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1)
-    p = fluid.layers.transpose(p, [0, 2, 1])
-    t = fluid.layers.matmul(g, p, name=prefix + '_y')
-
-    # reshape back
-    # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14)
-    t_shape = t.shape
-    t_re = fluid.layers.reshape(
-        t, shape=list(theta_shape), actual_shape=theta_shape_op)
-    blob_out = t_re
-    blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \
-                                  filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \
-                                  param_attr = ParamAttr(name = prefix + '_out' + "_w", \
-                                      initializer = fluid.initializer.Constant(value = 0.) \
-                                        if nonlocal_params["use_zero_init_conv"] \
-                                        else fluid.initializer.Normal(loc = 0.0,
-                                            scale = nonlocal_params["conv_init_std"])), \
-                                  bias_attr = ParamAttr(name = prefix + '_out' + "_b", \
-                                          initializer = fluid.initializer.Constant(value = 0.)) \
-                                           if (nonlocal_params["no_bias"] == 0) else False, \
-                                  name = prefix + '_out')
-    blob_out_shape = blob_out.shape
-
-    if nonlocal_params["use_bn"]:
-        bn_name = prefix + "_bn"
-        blob_out = fluid.layers.batch_norm(blob_out, \
-                      # is_test = test_mode, \
-                      momentum = nonlocal_params["bn_momentum"], \
-                      epsilon = nonlocal_params["bn_epsilon"], \
-                      name = bn_name, \
-                      param_attr = ParamAttr(name = bn_name + "_s", \
-                      initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \
-                      regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \
-                      bias_attr = ParamAttr(name = bn_name + "_b", \
-                      regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \
-                      moving_mean_name = bn_name + "_rm", \
-                      moving_variance_name = bn_name + "_riv") # add bn
-
-    if nonlocal_params["use_affine"]:
-        affine_scale = fluid.layers.create_parameter(\
-                       shape=[blob_out_shape[1]], dtype = blob_out.dtype, \
-                       attr=ParamAttr(name=prefix + '_affine' + '_s'), \
-                       default_initializer = fluid.initializer.Constant(value = 1.))
-        affine_bias = fluid.layers.create_parameter(\
-                      shape=[blob_out_shape[1]], dtype = blob_out.dtype, \
-                      attr=ParamAttr(name=prefix + '_affine' + '_b'), \
-                      default_initializer = fluid.initializer.Constant(value = 0.))
-        blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \
-                      bias = affine_bias, name = prefix + '_affine')   # add affine
-
-    return blob_out
-
-
-def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner):
-    '''
-    add_space_nonlocal:
-        Non-local Neural Networks: see https://arxiv.org/abs/1711.07971
-    '''
-    conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner)
-    output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum')
-    return output
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
index 2b3e1ce9..f1524564 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
@@ -12,7 +12,6 @@ __all__ = [
     'postprocess',
 ]
 
-
 def base64_to_cv2(b64str):
     data = base64.b64decode(b64str.encode('utf8'))
     data = np.fromstring(data, np.uint8)
@@ -107,7 +106,7 @@ def postprocess(paths,
                 handle_id,
                 visualization=True):
     """
-    postprocess the lod_tensor produced by fluid.Executor.run
+    postprocess the lod_tensor produced by Executor.run
 
     Args:
         paths (list[str]): the path of images.
@@ -130,9 +129,8 @@ def postprocess(paths,
                 confidence (float): The confidence of detection result.
             save_path (str): The path to save output images.
     """
-    lod_tensor = data_out[0]
-    lod = lod_tensor.lod[0]
-    results = lod_tensor.as_ndarray()
+    lod = data_out.lod()[0]
+    results = data_out.copy_to_cpu()
 
     check_dir(output_dir)
 
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py
deleted file mode 100644
index 4bd6fb61..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py
+++ /dev/null
@@ -1,447 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-from collections import OrderedDict
-from numbers import Integral
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.framework import Variable
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.initializer import Constant
-
-from .nonlocal_helper import add_space_nonlocal
-from .name_adapter import NameAdapter
-
-__all__ = ['ResNet', 'ResNetC5']
-
-
-class ResNet(object):
-    """
-    Residual Network, see https://arxiv.org/abs/1512.03385
-    Args:
-        depth (int): ResNet depth, should be 34, 50.
-        freeze_at (int): freeze the backbone at which stage
-        norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel'
-        freeze_norm (bool): freeze normalization layers
-        norm_decay (float): weight decay for normalization layer weights
-        variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
-        feature_maps (list): index of stages whose feature maps are returned
-        dcn_v2_stages (list): index of stages who select deformable conv v2
-        nonlocal_stages (list): index of stages who select nonlocal networks
-    """
-    __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name']
-
-    def __init__(self,
-                 depth=50,
-                 freeze_at=0,
-                 norm_type='sync_bn',
-                 freeze_norm=False,
-                 norm_decay=0.,
-                 variant='b',
-                 feature_maps=[3, 4, 5],
-                 dcn_v2_stages=[],
-                 weight_prefix_name='',
-                 nonlocal_stages=[],
-                 get_prediction=False,
-                 class_dim=1000):
-        super(ResNet, self).__init__()
-
-        if isinstance(feature_maps, Integral):
-            feature_maps = [feature_maps]
-
-        assert depth in [34, 50], \
-            "depth {} not in [34, 50]"
-        assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant"
-        assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4"
-        assert len(feature_maps) > 0, "need one or more feature maps"
-        assert norm_type in ['bn', 'sync_bn', 'affine_channel']
-        assert not (len(nonlocal_stages)>0 and depth<50), \
-                    "non-local is not supported for resnet18 or resnet34"
-
-        self.depth = depth
-        self.freeze_at = freeze_at
-        self.norm_type = norm_type
-        self.norm_decay = norm_decay
-        self.freeze_norm = freeze_norm
-        self.variant = variant
-        self._model_type = 'ResNet'
-        self.feature_maps = feature_maps
-        self.dcn_v2_stages = dcn_v2_stages
-        self.depth_cfg = {
-            34: ([3, 4, 6, 3], self.basicblock),
-            50: ([3, 4, 6, 3], self.bottleneck),
-        }
-        self.stage_filters = [64, 128, 256, 512]
-        self._c1_out_chan_num = 64
-        self.na = NameAdapter(self)
-        self.prefix_name = weight_prefix_name
-
-        self.nonlocal_stages = nonlocal_stages
-        self.nonlocal_mod_cfg = {
-            50: 2,
-            101: 5,
-            152: 8,
-            200: 12,
-        }
-        self.get_prediction = get_prediction
-        self.class_dim = class_dim
-
-    def _conv_offset(self,
-                     input,
-                     filter_size,
-                     stride,
-                     padding,
-                     act=None,
-                     name=None):
-        out_channel = filter_size * filter_size * 3
-        out = fluid.layers.conv2d(
-            input,
-            num_filters=out_channel,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"),
-            bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"),
-            act=act,
-            name=name)
-        return out
-
-    def _conv_norm(self,
-                   input,
-                   num_filters,
-                   filter_size,
-                   stride=1,
-                   groups=1,
-                   act=None,
-                   name=None,
-                   dcn_v2=False):
-        _name = self.prefix_name + name if self.prefix_name != '' else name
-        if not dcn_v2:
-            conv = fluid.layers.conv2d(
-                input=input,
-                num_filters=num_filters,
-                filter_size=filter_size,
-                stride=stride,
-                padding=(filter_size - 1) // 2,
-                groups=groups,
-                act=None,
-                param_attr=ParamAttr(name=_name + "_weights"),
-                bias_attr=False,
-                name=_name + '.conv2d.output.1')
-        else:
-            # select deformable conv"
-            offset_mask = self._conv_offset(
-                input=input,
-                filter_size=filter_size,
-                stride=stride,
-                padding=(filter_size - 1) // 2,
-                act=None,
-                name=_name + "_conv_offset")
-            offset_channel = filter_size**2 * 2
-            mask_channel = filter_size**2
-            offset, mask = fluid.layers.split(
-                input=offset_mask,
-                num_or_sections=[offset_channel, mask_channel],
-                dim=1)
-            mask = fluid.layers.sigmoid(mask)
-            conv = fluid.layers.deformable_conv(
-                input=input,
-                offset=offset,
-                mask=mask,
-                num_filters=num_filters,
-                filter_size=filter_size,
-                stride=stride,
-                padding=(filter_size - 1) // 2,
-                groups=groups,
-                deformable_groups=1,
-                im2col_step=1,
-                param_attr=ParamAttr(name=_name + "_weights"),
-                bias_attr=False,
-                name=_name + ".conv2d.output.1")
-
-        bn_name = self.na.fix_conv_norm_name(name)
-        bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name
-
-        norm_lr = 0. if self.freeze_norm else 1.
-        norm_decay = self.norm_decay
-        pattr = ParamAttr(
-            name=bn_name + '_scale',
-            learning_rate=norm_lr,
-            regularizer=L2Decay(norm_decay))
-        battr = ParamAttr(
-            name=bn_name + '_offset',
-            learning_rate=norm_lr,
-            regularizer=L2Decay(norm_decay))
-
-        if self.norm_type in ['bn', 'sync_bn']:
-            global_stats = True if self.freeze_norm else False
-            out = fluid.layers.batch_norm(
-                input=conv,
-                act=act,
-                name=bn_name + '.output.1',
-                param_attr=pattr,
-                bias_attr=battr,
-                moving_mean_name=bn_name + '_mean',
-                moving_variance_name=bn_name + '_variance',
-                use_global_stats=global_stats)
-            scale = fluid.framework._get_var(pattr.name)
-            bias = fluid.framework._get_var(battr.name)
-        elif self.norm_type == 'affine_channel':
-            scale = fluid.layers.create_parameter(
-                shape=[conv.shape[1]],
-                dtype=conv.dtype,
-                attr=pattr,
-                default_initializer=fluid.initializer.Constant(1.))
-            bias = fluid.layers.create_parameter(
-                shape=[conv.shape[1]],
-                dtype=conv.dtype,
-                attr=battr,
-                default_initializer=fluid.initializer.Constant(0.))
-            out = fluid.layers.affine_channel(
-                x=conv, scale=scale, bias=bias, act=act)
-        if self.freeze_norm:
-            scale.stop_gradient = True
-            bias.stop_gradient = True
-        return out
-
-    def _shortcut(self, input, ch_out, stride, is_first, name):
-        max_pooling_in_short_cut = self.variant == 'd'
-        ch_in = input.shape[1]
-        # the naming rule is same as pretrained weight
-        name = self.na.fix_shortcut_name(name)
-        std_senet = getattr(self, 'std_senet', False)
-        if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first):
-            if std_senet:
-                if is_first:
-                    return self._conv_norm(input, ch_out, 1, stride, name=name)
-                else:
-                    return self._conv_norm(input, ch_out, 3, stride, name=name)
-            if max_pooling_in_short_cut and not is_first:
-                input = fluid.layers.pool2d(
-                    input=input,
-                    pool_size=2,
-                    pool_stride=2,
-                    pool_padding=0,
-                    ceil_mode=True,
-                    pool_type='avg')
-                return self._conv_norm(input, ch_out, 1, 1, name=name)
-            return self._conv_norm(input, ch_out, 1, stride, name=name)
-        else:
-            return input
-
-    def bottleneck(self,
-                   input,
-                   num_filters,
-                   stride,
-                   is_first,
-                   name,
-                   dcn_v2=False):
-        if self.variant == 'a':
-            stride1, stride2 = stride, 1
-        else:
-            stride1, stride2 = 1, stride
-
-        # ResNeXt
-        groups = getattr(self, 'groups', 1)
-        group_width = getattr(self, 'group_width', -1)
-        if groups == 1:
-            expand = 4
-        elif (groups * group_width) == 256:
-            expand = 1
-        else:  # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d
-            num_filters = num_filters // 2
-            expand = 2
-
-        conv_name1, conv_name2, conv_name3, \
-            shortcut_name = self.na.fix_bottleneck_name(name)
-        std_senet = getattr(self, 'std_senet', False)
-        if std_senet:
-            conv_def = [[
-                int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1
-            ], [num_filters, 3, stride2, 'relu', groups, conv_name2],
-                        [num_filters * expand, 1, 1, None, 1, conv_name3]]
-        else:
-            conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1],
-                        [num_filters, 3, stride2, 'relu', groups, conv_name2],
-                        [num_filters * expand, 1, 1, None, 1, conv_name3]]
-
-        residual = input
-        for i, (c, k, s, act, g, _name) in enumerate(conv_def):
-            residual = self._conv_norm(
-                input=residual,
-                num_filters=c,
-                filter_size=k,
-                stride=s,
-                act=act,
-                groups=g,
-                name=_name,
-                dcn_v2=(i == 1 and dcn_v2))
-        short = self._shortcut(
-            input,
-            num_filters * expand,
-            stride,
-            is_first=is_first,
-            name=shortcut_name)
-        # Squeeze-and-Excitation
-        if callable(getattr(self, '_squeeze_excitation', None)):
-            residual = self._squeeze_excitation(
-                input=residual, num_channels=num_filters, name='fc' + name)
-        return fluid.layers.elementwise_add(
-            x=short, y=residual, act='relu', name=name + ".add.output.5")
-
-    def basicblock(self,
-                   input,
-                   num_filters,
-                   stride,
-                   is_first,
-                   name,
-                   dcn_v2=False):
-        assert dcn_v2 is False, "Not implemented yet."
-        conv0 = self._conv_norm(
-            input=input,
-            num_filters=num_filters,
-            filter_size=3,
-            act='relu',
-            stride=stride,
-            name=name + "_branch2a")
-        conv1 = self._conv_norm(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b")
-        short = self._shortcut(
-            input, num_filters, stride, is_first, name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
-
-    def layer_warp(self, input, stage_num):
-        """
-        Args:
-            input (Variable): input variable.
-            stage_num (int): the stage number, should be 2, 3, 4, 5
-
-        Returns:
-            The last variable in endpoint-th stage.
-        """
-        assert stage_num in [2, 3, 4, 5]
-
-        stages, block_func = self.depth_cfg[self.depth]
-        count = stages[stage_num - 2]
-
-        ch_out = self.stage_filters[stage_num - 2]
-        is_first = False if stage_num != 2 else True
-        dcn_v2 = True if stage_num in self.dcn_v2_stages else False
-
-        nonlocal_mod = 1000
-        if stage_num in self.nonlocal_stages:
-            nonlocal_mod = self.nonlocal_mod_cfg[
-                self.depth] if stage_num == 4 else 2
-
-        # Make the layer name and parameter name consistent
-        # with ImageNet pre-trained model
-        conv = input
-        for i in range(count):
-            conv_name = self.na.fix_layer_warp_name(stage_num, count, i)
-            if self.depth < 50:
-                is_first = True if i == 0 and stage_num == 2 else False
-            conv = block_func(
-                input=conv,
-                num_filters=ch_out,
-                stride=2 if i == 0 and stage_num != 2 else 1,
-                is_first=is_first,
-                name=conv_name,
-                dcn_v2=dcn_v2)
-
-            # add non local model
-            dim_in = conv.shape[1]
-            nonlocal_name = "nonlocal_conv{}".format(stage_num)
-            if i % nonlocal_mod == nonlocal_mod - 1:
-                conv = add_space_nonlocal(conv, dim_in, dim_in,
-                                          nonlocal_name + '_{}'.format(i),
-                                          int(dim_in / 2))
-        return conv
-
-    def c1_stage(self, input):
-        out_chan = self._c1_out_chan_num
-
-        conv1_name = self.na.fix_c1_stage_name()
-
-        if self.variant in ['c', 'd']:
-            conv_def = [
-                [out_chan // 2, 3, 2, "conv1_1"],
-                [out_chan // 2, 3, 1, "conv1_2"],
-                [out_chan, 3, 1, "conv1_3"],
-            ]
-        else:
-            conv_def = [[out_chan, 7, 2, conv1_name]]
-
-        for (c, k, s, _name) in conv_def:
-            input = self._conv_norm(
-                input=input,
-                num_filters=c,
-                filter_size=k,
-                stride=s,
-                act='relu',
-                name=_name)
-
-        output = fluid.layers.pool2d(
-            input=input,
-            pool_size=3,
-            pool_stride=2,
-            pool_padding=1,
-            pool_type='max')
-        return output
-
-    def __call__(self, input):
-        assert isinstance(input, Variable)
-        assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \
-            "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps)
-
-        res_endpoints = []
-
-        res = input
-        feature_maps = self.feature_maps
-        severed_head = getattr(self, 'severed_head', False)
-        if not severed_head:
-            res = self.c1_stage(res)
-            feature_maps = range(2, max(self.feature_maps) + 1)
-
-        for i in feature_maps:
-            res = self.layer_warp(res, i)
-            if i in self.feature_maps:
-                res_endpoints.append(res)
-            if self.freeze_at >= i:
-                res.stop_gradient = True
-        if self.get_prediction:
-            pool = fluid.layers.pool2d(
-                input=res, pool_type='avg', global_pooling=True)
-            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-
-            out = fluid.layers.fc(
-                input=pool,
-                size=self.class_dim,
-                param_attr=fluid.param_attr.ParamAttr(
-                    initializer=fluid.initializer.Uniform(-stdv, stdv)))
-            out = fluid.layers.softmax(out)
-            return out
-        return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat)
-                            for idx, feat in enumerate(res_endpoints)])
-
-
-class ResNetC5(ResNet):
-    def __init__(self,
-                 depth=50,
-                 freeze_at=2,
-                 norm_type='affine_channel',
-                 freeze_norm=True,
-                 norm_decay=0.,
-                 variant='b',
-                 feature_maps=[5],
-                 weight_prefix_name=''):
-        super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm,
-                                       norm_decay, variant, feature_maps)
-        self.severed_head = True
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
deleted file mode 100644
index 6e3398d8..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# coding=utf-8
-import paddle.fluid as fluid
-
-__all__ = ['FPNRoIAlign']
-
-
-class FPNRoIAlign(object):
-    """
-    RoI align pooling for FPN feature maps
-    Args:
-        sampling_ratio (int): number of sampling points
-        min_level (int): lowest level of FPN layer
-        max_level (int): highest level of FPN layer
-        canconical_level (int): the canconical FPN feature map level
-        canonical_size (int): the canconical FPN feature map size
-        box_resolution (int): box resolution
-        mask_resolution (int): mask roi resolution
-    """
-
-    def __init__(self,
-                 sampling_ratio=0,
-                 min_level=2,
-                 max_level=5,
-                 canconical_level=4,
-                 canonical_size=224,
-                 box_resolution=7,
-                 mask_resolution=14):
-        super(FPNRoIAlign, self).__init__()
-        self.sampling_ratio = sampling_ratio
-        self.min_level = min_level
-        self.max_level = max_level
-        self.canconical_level = canconical_level
-        self.canonical_size = canonical_size
-        self.box_resolution = box_resolution
-        self.mask_resolution = mask_resolution
-
-    def __call__(self, head_inputs, rois, spatial_scale, is_mask=False):
-        """
-        Adopt RoI align onto several level of feature maps to get RoI features.
-        Distribute RoIs to different levels by area and get a list of RoI
-        features by distributed RoIs and their corresponding feature maps.
-
-        Returns:
-            roi_feat(Variable): RoI features with shape of [M, C, R, R],
-                where M is the number of RoIs and R is RoI resolution
-
-        """
-        k_min = self.min_level
-        k_max = self.max_level
-        num_roi_lvls = k_max - k_min + 1
-        name_list = list(head_inputs.keys())
-        input_name_list = name_list[-num_roi_lvls:]
-        spatial_scale = spatial_scale[-num_roi_lvls:]
-        rois_dist, restore_index = fluid.layers.distribute_fpn_proposals(
-            rois, k_min, k_max, self.canconical_level, self.canonical_size)
-        # rois_dist is in ascend order
-        roi_out_list = []
-        resolution = is_mask and self.mask_resolution or self.box_resolution
-        for lvl in range(num_roi_lvls):
-            name_index = num_roi_lvls - lvl - 1
-            rois_input = rois_dist[lvl]
-            head_input = head_inputs[input_name_list[name_index]]
-            sc = spatial_scale[name_index]
-            roi_out = fluid.layers.roi_align(
-                input=head_input,
-                rois=rois_input,
-                pooled_height=resolution,
-                pooled_width=resolution,
-                spatial_scale=sc,
-                sampling_ratio=self.sampling_ratio)
-            roi_out_list.append(roi_out)
-        roi_feat_shuffle = fluid.layers.concat(roi_out_list)
-        roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index)
-        roi_feat = fluid.layers.lod_reset(roi_feat_, rois)
-
-        return roi_feat
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
deleted file mode 100644
index e1b69866..00000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
+++ /dev/null
@@ -1,533 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Normal
-from paddle.fluid.regularizer import L2Decay
-
-__all__ = [
-    'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead',
-    'FPNRPNHead'
-]
-
-
-class AnchorGenerator(object):
-    # __op__ = fluid.layers.anchor_generator
-    def __init__(self,
-                 stride=[16.0, 16.0],
-                 anchor_sizes=[32, 64, 128, 256, 512],
-                 aspect_ratios=[0.5, 1., 2.],
-                 variance=[1., 1., 1., 1.]):
-        super(AnchorGenerator, self).__init__()
-        self.anchor_sizes = anchor_sizes
-        self.aspect_ratios = aspect_ratios
-        self.variance = variance
-        self.stride = stride
-
-
-class RPNTargetAssign(object):
-    # __op__ = fluid.layers.rpn_target_assign
-    def __init__(self,
-                 rpn_batch_size_per_im=256,
-                 rpn_straddle_thresh=0.,
-                 rpn_fg_fraction=0.5,
-                 rpn_positive_overlap=0.7,
-                 rpn_negative_overlap=0.3,
-                 use_random=True):
-        super(RPNTargetAssign, self).__init__()
-        self.rpn_batch_size_per_im = rpn_batch_size_per_im
-        self.rpn_straddle_thresh = rpn_straddle_thresh
-        self.rpn_fg_fraction = rpn_fg_fraction
-        self.rpn_positive_overlap = rpn_positive_overlap
-        self.rpn_negative_overlap = rpn_negative_overlap
-        self.use_random = use_random
-
-
-class GenerateProposals(object):
-    # __op__ = fluid.layers.generate_proposals
-    def __init__(self,
-                 pre_nms_top_n=6000,
-                 post_nms_top_n=1000,
-                 nms_thresh=.5,
-                 min_size=.1,
-                 eta=1.):
-        super(GenerateProposals, self).__init__()
-        self.pre_nms_top_n = pre_nms_top_n
-        self.post_nms_top_n = post_nms_top_n
-        self.nms_thresh = nms_thresh
-        self.min_size = min_size
-        self.eta = eta
-
-
-class RPNHead(object):
-    """
-    RPN Head
-
-    Args:
-        anchor_generator (object): `AnchorGenerator` instance
-        rpn_target_assign (object): `RPNTargetAssign` instance
-        train_proposal (object): `GenerateProposals` instance for training
-        test_proposal (object): `GenerateProposals` instance for testing
-        num_classes (int): number of classes in rpn output
-    """
-    __inject__ = [
-        'anchor_generator', 'rpn_target_assign', 'train_proposal',
-        'test_proposal'
-    ]
-
-    def __init__(self,
-                 anchor_generator,
-                 rpn_target_assign,
-                 train_proposal,
-                 test_proposal,
-                 num_classes=1):
-        super(RPNHead, self).__init__()
-        self.anchor_generator = anchor_generator
-        self.rpn_target_assign = rpn_target_assign
-        self.train_proposal = train_proposal
-        self.test_proposal = test_proposal
-        self.num_classes = num_classes
-
-    def _get_output(self, input):
-        """
-        Get anchor and RPN head output.
-
-        Args:
-            input(Variable): feature map from backbone with shape of [N, C, H, W]
-
-        Returns:
-            rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W].
-            rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W].
-        """
-        dim_out = input.shape[1]
-        rpn_conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=dim_out,
-            filter_size=3,
-            stride=1,
-            padding=1,
-            act='relu',
-            name='conv_rpn',
-            param_attr=ParamAttr(
-                name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)),
-            bias_attr=ParamAttr(
-                name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
-        # Generate anchors self.anchor_generator
-        self.anchor, self.anchor_var = fluid.layers.anchor_generator(
-            input=rpn_conv,
-            anchor_sizes=self.anchor_generator.anchor_sizes,
-            aspect_ratios=self.anchor_generator.aspect_ratios,
-            variance=self.anchor_generator.variance,
-            stride=self.anchor_generator.stride)
-
-        num_anchor = self.anchor.shape[2]
-        # Proposal classification scores
-        self.rpn_cls_score = fluid.layers.conv2d(
-            rpn_conv,
-            num_filters=num_anchor * self.num_classes,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act=None,
-            name='rpn_cls_score',
-            param_attr=ParamAttr(
-                name="rpn_cls_logits_w", initializer=Normal(loc=0.,
-                                                            scale=0.01)),
-            bias_attr=ParamAttr(
-                name="rpn_cls_logits_b",
-                learning_rate=2.,
-                regularizer=L2Decay(0.)))
-        # Proposal bbox regression deltas
-        self.rpn_bbox_pred = fluid.layers.conv2d(
-            rpn_conv,
-            num_filters=4 * num_anchor,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act=None,
-            name='rpn_bbox_pred',
-            param_attr=ParamAttr(
-                name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)),
-            bias_attr=ParamAttr(
-                name="rpn_bbox_pred_b",
-                learning_rate=2.,
-                regularizer=L2Decay(0.)))
-        return self.rpn_cls_score, self.rpn_bbox_pred
-
-    def get_proposals(self, body_feats, im_info, mode='train'):
-        """
-        Get proposals according to the output of backbone.
-
-        Args:
-            body_feats (dict): The dictionary of feature maps from backbone.
-            im_info(Variable): The information of image with shape [N, 3] with
-                shape (height, width, scale).
-            body_feat_names(list): A list of names of feature maps from
-                backbone.
-
-        Returns:
-            rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
-        """
-        # In RPN Heads, only the last feature map of backbone is used.
-        # And body_feat_names[-1] represents the last level name of backbone.
-        body_feat = list(body_feats.values())[-1]
-        rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
-
-        if self.num_classes == 1:
-            rpn_cls_prob = fluid.layers.sigmoid(
-                rpn_cls_score, name='rpn_cls_prob')
-        else:
-            rpn_cls_score = fluid.layers.transpose(
-                rpn_cls_score, perm=[0, 2, 3, 1])
-            rpn_cls_score = fluid.layers.reshape(
-                rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
-            rpn_cls_prob_tmp = fluid.layers.softmax(
-                rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
-            rpn_cls_prob_slice = fluid.layers.slice(
-                rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes])
-            rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
-            rpn_cls_prob = fluid.layers.reshape(
-                rpn_cls_prob, shape=(0, 0, 0, -1))
-            rpn_cls_prob = fluid.layers.transpose(
-                rpn_cls_prob, perm=[0, 3, 1, 2])
-        prop_op = self.train_proposal if mode == 'train' else self.test_proposal
-        # prop_op
-        rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
-            scores=rpn_cls_prob,
-            bbox_deltas=rpn_bbox_pred,
-            im_info=im_info,
-            anchors=self.anchor,
-            variances=self.anchor_var,
-            pre_nms_top_n=prop_op.pre_nms_top_n,
-            post_nms_top_n=prop_op.post_nms_top_n,
-            nms_thresh=prop_op.nms_thresh,
-            min_size=prop_op.min_size,
-            eta=prop_op.eta)
-        return rpn_rois
-
-    def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
-                         anchor_var):
-        rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1])
-        rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1])
-        anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
-        anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
-        rpn_cls_score = fluid.layers.reshape(
-            x=rpn_cls_score, shape=(0, -1, self.num_classes))
-        rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
-        return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
-
-    def _get_loss_input(self):
-        for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
-            if not getattr(self, attr, None):
-                raise ValueError("self.{} should not be None,".format(attr),
-                                 "call RPNHead.get_proposals first")
-        return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
-                                     self.anchor, self.anchor_var)
-
-    def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
-        """
-        Sample proposals and Calculate rpn loss.
-
-        Args:
-            im_info(Variable): The information of image with shape [N, 3] with
-                shape (height, width, scale).
-            gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
-                M is the number of groundtruth.
-            is_crowd(Variable): Indicates groud-truth is crowd or not with
-                shape [M, 1]. M is the number of groundtruth.
-
-        Returns:
-            Type: dict
-                rpn_cls_loss(Variable): RPN classification loss.
-                rpn_bbox_loss(Variable): RPN bounding box regression loss.
-
-        """
-        rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
-        if self.num_classes == 1:
-            # self.rpn_target_assign
-            score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
-                fluid.layers.rpn_target_assign(
-                    bbox_pred=rpn_bbox,
-                    cls_logits=rpn_cls,
-                    anchor_box=anchor,
-                    anchor_var=anchor_var,
-                    gt_boxes=gt_box,
-                    is_crowd=is_crowd,
-                    im_info=im_info,
-                    rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im,
-                    rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh,
-                    rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction,
-                    rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap,
-                    rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap,
-                    use_random=self.rpn_target_assign.use_random)
-            score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
-            score_tgt.stop_gradient = True
-            rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
-                x=score_pred, label=score_tgt)
-        else:
-            score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
-                self.rpn_target_assign(
-                    bbox_pred=rpn_bbox,
-                    cls_logits=rpn_cls,
-                    anchor_box=anchor,
-                    anchor_var=anchor_var,
-                    gt_boxes=gt_box,
-                    gt_labels=gt_label,
-                    is_crowd=is_crowd,
-                    num_classes=self.num_classes,
-                    im_info=im_info)
-            labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
-            labels_int64.stop_gradient = True
-            rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
-                logits=score_pred, label=labels_int64, numeric_stable_mode=True)
-
-        rpn_cls_loss = fluid.layers.reduce_mean(
-            rpn_cls_loss, name='loss_rpn_cls')
-
-        loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
-        loc_tgt.stop_gradient = True
-        rpn_reg_loss = fluid.layers.smooth_l1(
-            x=loc_pred,
-            y=loc_tgt,
-            sigma=3.0,
-            inside_weight=bbox_weight,
-            outside_weight=bbox_weight)
-        rpn_reg_loss = fluid.layers.reduce_sum(
-            rpn_reg_loss, name='loss_rpn_bbox')
-        score_shape = fluid.layers.shape(score_tgt)
-        score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
-        norm = fluid.layers.reduce_prod(score_shape)
-        norm.stop_gradient = True
-        rpn_reg_loss = rpn_reg_loss / norm
-        return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss}
-
-
-class FPNRPNHead(RPNHead):
-    """
-    RPN Head that supports FPN input
-
-    Args:
-        anchor_generator (object): `AnchorGenerator` instance
-        rpn_target_assign (object): `RPNTargetAssign` instance
-        train_proposal (object): `GenerateProposals` instance for training
-        test_proposal (object): `GenerateProposals` instance for testing
-        anchor_start_size (int): size of anchor at the first scale
-        num_chan (int): number of FPN output channels
-        min_level (int): lowest level of FPN output
-        max_level (int): highest level of FPN output
-        num_classes (int): number of classes in rpn output
-    """
-
-    def __init__(self,
-                 anchor_generator,
-                 rpn_target_assign,
-                 train_proposal,
-                 test_proposal,
-                 anchor_start_size=32,
-                 num_chan=256,
-                 min_level=2,
-                 max_level=6,
-                 num_classes=1):
-        super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign,
-                                         train_proposal, test_proposal)
-        self.anchor_start_size = anchor_start_size
-        self.num_chan = num_chan
-        self.min_level = min_level
-        self.max_level = max_level
-        self.num_classes = num_classes
-
-        self.fpn_rpn_list = []
-        self.anchors_list = []
-        self.anchor_var_list = []
-
-    def _get_output(self, input, feat_lvl):
-        """
-        Get anchor and FPN RPN head output at one level.
-
-        Args:
-            input(Variable): Body feature from backbone.
-            feat_lvl(int): Indicate the level of rpn output corresponding
-                to the level of feature map.
-
-        Return:
-            rpn_cls_score(Variable): Output of one level of fpn rpn head with
-                shape of [N, num_anchors, H, W].
-            rpn_bbox_pred(Variable): Output of one level of fpn rpn head with
-                shape of [N, num_anchors * 4, H, W].
-        """
-        slvl = str(feat_lvl)
-        conv_name = 'conv_rpn_fpn' + slvl
-        cls_name = 'rpn_cls_logits_fpn' + slvl
-        bbox_name = 'rpn_bbox_pred_fpn' + slvl
-        conv_share_name = 'conv_rpn_fpn' + str(self.min_level)
-        cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level)
-        bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level)
-
-        num_anchors = len(self.anchor_generator.aspect_ratios)
-        conv_rpn_fpn = fluid.layers.conv2d(
-            input=input,
-            num_filters=self.num_chan,
-            filter_size=3,
-            padding=1,
-            act='relu',
-            name=conv_name,
-            param_attr=ParamAttr(
-                name=conv_share_name + '_w',
-                initializer=Normal(loc=0., scale=0.01)),
-            bias_attr=ParamAttr(
-                name=conv_share_name + '_b',
-                learning_rate=2.,
-                regularizer=L2Decay(0.)))
-
-        # self.anchor_generator
-        self.anchors, self.anchor_var = fluid.layers.anchor_generator(
-            input=conv_rpn_fpn,
-            anchor_sizes=(self.anchor_start_size * 2.**
-                          (feat_lvl - self.min_level), ),
-            stride=(2.**feat_lvl, 2.**feat_lvl),
-            aspect_ratios=self.anchor_generator.aspect_ratios,
-            variance=self.anchor_generator.variance)
-
-        cls_num_filters = num_anchors * self.num_classes
-        self.rpn_cls_score = fluid.layers.conv2d(
-            input=conv_rpn_fpn,
-            num_filters=cls_num_filters,
-            filter_size=1,
-            act=None,
-            name=cls_name,
-            param_attr=ParamAttr(
-                name=cls_share_name + '_w',
-                initializer=Normal(loc=0., scale=0.01)),
-            bias_attr=ParamAttr(
-                name=cls_share_name + '_b',
-                learning_rate=2.,
-                regularizer=L2Decay(0.)))
-        self.rpn_bbox_pred = fluid.layers.conv2d(
-            input=conv_rpn_fpn,
-            num_filters=num_anchors * 4,
-            filter_size=1,
-            act=None,
-            name=bbox_name,
-            param_attr=ParamAttr(
-                name=bbox_share_name + '_w',
-                initializer=Normal(loc=0., scale=0.01)),
-            bias_attr=ParamAttr(
-                name=bbox_share_name + '_b',
-                learning_rate=2.,
-                regularizer=L2Decay(0.)))
-        return self.rpn_cls_score, self.rpn_bbox_pred
-
-    def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'):
-        """
-        Get proposals in one level according to the output of fpn rpn head
-
-        Args:
-            body_feat(Variable): the feature map from backone.
-            im_info(Variable): The information of image with shape [N, 3] with
-                format (height, width, scale).
-            feat_lvl(int): Indicate the level of proposals corresponding to
-                the feature maps.
-
-        Returns:
-            rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4).
-            rpn_roi_probs_fpn(Variable): Scores of proposals with
-                shape of (rois_num, 1).
-        """
-
-        rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(
-            body_feat, feat_lvl)
-
-        prop_op = self.train_proposal if mode == 'train' else self.test_proposal
-        if self.num_classes == 1:
-            rpn_cls_prob_fpn = fluid.layers.sigmoid(
-                rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
-        else:
-            rpn_cls_score_fpn = fluid.layers.transpose(
-                rpn_cls_score_fpn, perm=[0, 2, 3, 1])
-            rpn_cls_score_fpn = fluid.layers.reshape(
-                rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
-            rpn_cls_prob_fpn = fluid.layers.softmax(
-                rpn_cls_score_fpn,
-                use_cudnn=False,
-                name='rpn_cls_prob_fpn' + str(feat_lvl))
-            rpn_cls_prob_fpn = fluid.layers.slice(
-                rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes])
-            rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
-            rpn_cls_prob_fpn = fluid.layers.reshape(
-                rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
-            rpn_cls_prob_fpn = fluid.layers.transpose(
-                rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
-        # prop_op
-        rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
-            scores=rpn_cls_prob_fpn,
-            bbox_deltas=rpn_bbox_pred_fpn,
-            im_info=im_info,
-            anchors=self.anchors,
-            variances=self.anchor_var,
-            pre_nms_top_n=prop_op.pre_nms_top_n,
-            post_nms_top_n=prop_op.post_nms_top_n,
-            nms_thresh=prop_op.nms_thresh,
-            min_size=prop_op.min_size,
-            eta=prop_op.eta)
-        return rpn_rois_fpn, rpn_roi_prob_fpn
-
-    def get_proposals(self, fpn_feats, im_info, mode='train'):
-        """
-        Get proposals in multiple levels according to the output of fpn
-        rpn head
-
-        Args:
-            fpn_feats(dict): A dictionary represents the output feature map
-                of FPN with their name.
-            im_info(Variable): The information of image with shape [N, 3] with
-                format (height, width, scale).
-
-        Return:
-            rois_list(Variable): Output proposals in shape of [rois_num, 4]
-        """
-        rois_list = []
-        roi_probs_list = []
-        fpn_feat_names = list(fpn_feats.keys())
-        for lvl in range(self.min_level, self.max_level + 1):
-            fpn_feat_name = fpn_feat_names[self.max_level - lvl]
-            fpn_feat = fpn_feats[fpn_feat_name]
-            rois_fpn, roi_probs_fpn = self._get_single_proposals(
-                fpn_feat, im_info, lvl, mode)
-            self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred))
-            rois_list.append(rois_fpn)
-            roi_probs_list.append(roi_probs_fpn)
-            self.anchors_list.append(self.anchors)
-            self.anchor_var_list.append(self.anchor_var)
-        prop_op = self.train_proposal if mode == 'train' else self.test_proposal
-        post_nms_top_n = prop_op.post_nms_top_n
-        rois_collect = fluid.layers.collect_fpn_proposals(
-            rois_list,
-            roi_probs_list,
-            self.min_level,
-            self.max_level,
-            post_nms_top_n,
-            name='collect')
-        return rois_collect
-
-    def _get_loss_input(self):
-        rpn_clses = []
-        rpn_bboxes = []
-        anchors = []
-        anchor_vars = []
-        for i in range(len(self.fpn_rpn_list)):
-            single_input = self._transform_input(
-                self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1],
-                self.anchors_list[i], self.anchor_var_list[i])
-            rpn_clses.append(single_input[0])
-            rpn_bboxes.append(single_input[1])
-            anchors.append(single_input[2])
-            anchor_vars.append(single_input[3])
-
-        rpn_cls = fluid.layers.concat(rpn_clses, axis=1)
-        rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1)
-        anchors = fluid.layers.concat(anchors)
-        anchor_var = fluid.layers.concat(anchor_vars)
-        return rpn_cls, rpn_bbox, anchors, anchor_var
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py
new file mode 100644
index 00000000..0a775a4f
--- /dev/null
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py
@@ -0,0 +1,108 @@
+import os
+import shutil
+import unittest
+
+import cv2
+import requests
+import paddlehub as hub
+
+
+class TestHubModule(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a'
+        if not os.path.exists('tests'):
+            os.makedirs('tests')
+        response = requests.get(img_url)
+        assert response.status_code == 200, 'Network Error.'
+        with open('tests/test.jpg', 'wb') as f:
+            f.write(response.content)
+        cls.module = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017")
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        shutil.rmtree('tests')
+        shutil.rmtree('inference')
+        shutil.rmtree('detection_result')
+
+    def test_object_detection1(self):
+        results = self.module.object_detection(
+            paths=['tests/test.jpg']
+        )
+        bbox = results[0]['data'][0]
+        label = bbox['label']
+        confidence = bbox['confidence']
+        left = bbox['left']
+        right = bbox['right']
+        top = bbox['top']
+        bottom = bbox['bottom']
+
+        self.assertEqual(label, 'cat')
+        self.assertTrue(confidence > 0.5)
+        self.assertTrue(200 < left < 800)
+        self.assertTrue(2500 < right < 3500)
+        self.assertTrue(500 < top < 1500)
+        self.assertTrue(3500 < bottom < 4500)
+
+    def test_object_detection2(self):
+        results = self.module.object_detection(
+            images=[cv2.imread('tests/test.jpg')]
+        )
+        bbox = results[0]['data'][0]
+        label = bbox['label']
+        confidence = bbox['confidence']
+        left = bbox['left']
+        right = bbox['right']
+        top = bbox['top']
+        bottom = bbox['bottom']
+
+        self.assertEqual(label, 'cat')
+        self.assertTrue(confidence > 0.5)
+        self.assertTrue(200 < left < 800)
+        self.assertTrue(2500 < right < 3500)
+        self.assertTrue(500 < top < 1500)
+        self.assertTrue(3500 < bottom < 4500)
+
+    def test_object_detection3(self):
+        results = self.module.object_detection(
+            images=[cv2.imread('tests/test.jpg')],
+            visualization=False
+        )
+        bbox = results[0]['data'][0]
+        label = bbox['label']
+        confidence = bbox['confidence']
+        left = bbox['left']
+        right = bbox['right']
+        top = bbox['top']
+        bottom = bbox['bottom']
+
+        self.assertEqual(label, 'cat')
+        self.assertTrue(confidence > 0.5)
+        self.assertTrue(200 < left < 800)
+        self.assertTrue(2500 < right < 3500)
+        self.assertTrue(500 < top < 1500)
+        self.assertTrue(3500 < bottom < 4500)
+
+    def test_object_detection4(self):
+        self.assertRaises(
+            AssertionError,
+            self.module.object_detection,
+            paths=['no.jpg']
+        )
+
+    def test_object_detection5(self):
+        self.assertRaises(
+            cv2.error,
+            self.module.object_detection,
+            images=['test.jpg']
+        )
+
+    def test_save_inference_model(self):
+        self.module.save_inference_model('./inference/model')
+
+        self.assertTrue(os.path.exists('./inference/model.pdmodel'))
+        self.assertTrue(os.path.exists('./inference/model.pdiparams'))
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab