From 01002e40f93ed8d59c2e96cb9f4d7d32bbd87f99 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 14:27:14 +0800 Subject: [PATCH] update faster_rcnn_resnet50_fpn_coco2017 (#1948) * update faster_rcnn_resnet50_fpn_coco2017 * update unittest faster_rcnn_resnet50_fpn_coco2017 * update unittest * update unittest * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../README.md | 17 +- .../README_en.md | 17 +- .../bbox_assigner.py | 20 - .../bbox_head.py | 270 --------- .../data_feed.py | 3 - .../faster_rcnn_resnet50_fpn_coco2017/fpn.py | 296 ---------- .../module.py | 302 ++-------- .../name_adapter.py | 61 -- .../nonlocal_helper.py | 154 ----- .../processor.py | 8 +- .../resnet.py | 447 --------------- .../roi_extractor.py | 76 --- .../rpn_head.py | 533 ------------------ .../faster_rcnn_resnet50_fpn_coco2017/test.py | 108 ++++ 14 files changed, 162 insertions(+), 2150 deletions(-) delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py create mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md index 9d003b80..ef5324a7 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md @@ -102,19 +102,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -167,6 +161,11 @@ * 1.0.1 修复numpy数据读取问题 + +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1 + $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md index d90beb64..bf4c7274 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md @@ -101,19 +101,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -166,6 +160,11 @@ * 1.0.1 Fix the problem of reading numpy + +* 1.1.0 + + Remove fluid api + - ```shell - $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1 + $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py deleted file mode 100644 index d033382c..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py +++ /dev/null @@ -1,20 +0,0 @@ -class BBoxAssigner(object): - # __op__ = fluid.layers.generate_proposal_labels - def __init__(self, - batch_size_per_im=512, - fg_fraction=.25, - fg_thresh=.5, - bg_thresh_hi=.5, - bg_thresh_lo=0., - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - class_nums=81, - shuffle_before_sample=True): - super(BBoxAssigner, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.fg_fraction = fg_fraction - self.fg_thresh = fg_thresh - self.bg_thresh_hi = bg_thresh_hi - self.bg_thresh_lo = bg_thresh_lo - self.bbox_reg_weights = bbox_reg_weights - self.class_nums = class_nums - self.use_random = shuffle_before_sample diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py deleted file mode 100644 index 8080ed22..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py +++ /dev/null @@ -1,270 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -class SmoothL1Loss(object): - ''' - Smooth L1 loss - Args: - sigma (float): hyper param in smooth l1 loss - ''' - - def __init__(self, sigma=1.0): - super(SmoothL1Loss, self).__init__() - self.sigma = sigma - - def __call__(self, x, y, inside_weight=None, outside_weight=None): - return fluid.layers.smooth_l1( - x, - y, - inside_weight=inside_weight, - outside_weight=outside_weight, - sigma=self.sigma) - - -class BoxCoder(object): - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - fc6 = fluid.layers.fc( - input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc( - input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) - - return head_feat - - -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder(), - nms=MultiClassNMS(), - bbox_loss=SmoothL1Loss(), - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - self.bbox_loss = bbox_loss - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = self.bbox_loss( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - # self.box_coder - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - target_box=bbox_pred, - prior_box_var=self.box_coder.prior_box_var, - code_type=self.box_coder.code_type, - box_normalized=self.box_coder.box_normalized, - axis=self.box_coder.axis) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - # self.nms - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - normalized=self.nms.normalized, - nms_eta=self.nms.nms_eta, - background_label=self.nms.background_label) - return pred_result diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py index b38501e5..c9e52d54 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py @@ -4,12 +4,9 @@ from __future__ import print_function from __future__ import division import os -from collections import OrderedDict import cv2 import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid __all__ = ['test_reader'] diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py deleted file mode 100644 index bd19c712..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -__all__ = ['ConvNorm', 'FPN'] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr( - name=name + "_weights", - initializer=initializer, - learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr( - name=norm_name + '_scale', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=norm_name + '_offset', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - groups=norm_groups, - param_attr=pattr, - bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=lateral_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=fpn_inner_name + "_w", - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_inner_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len( - spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], - 1, - 'max', - pool_stride=2, - name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py index b8dd5afa..65049189 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py @@ -6,41 +6,32 @@ from __future__ import print_function import os import ast import argparse -from collections import OrderedDict -from functools import partial from math import ceil +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub +import paddle.jit +import paddle.static from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch -from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN -from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet -from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead -from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead -from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner -from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign +from paddle.inference import Config, create_predictor +from paddlehub.utils.parser import txt_parser +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import test_reader, padding_minibatch @moduleinfo( name="faster_rcnn_resnet50_fpn_coco2017", - version="1.0.1", + version="1.1.0", type="cv/object_detection", summary= "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class FasterRCNNResNet50RPN(hub.Module): - def _initialize(self): +class FasterRCNNResNet50RPN: + def __init__(self): # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_fpn_model") + self.directory, "faster_rcnn_resnet50_fpn_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -49,10 +40,12 @@ class FasterRCNNResNet50RPN(hub.Module): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -61,245 +54,14 @@ class FasterRCNNResNet50RPN(hub.Module): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of categories - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs (dict): the input variables. - outputs (dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') - # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=[2, 3, 4, 5], - freeze_at=2) - body_feats = backbone(image) - # fpn - fpn = FPN( - max_level=6, - min_level=2, - num_chan=256, - spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) - var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) - body_feat_names = list(body_feats.keys()) - body_feats, spatial_scale = fpn.get_output(body_feats) - # rpn_head: RPNHead - rpn_head = self.rpn_head() - rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) - # train - if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) - rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) - # bbox_assigner: BBoxAssigner - bbox_assigner = self.bbox_assigner(num_classes) - outs = fluid.layers.generate_proposal_labels( - rpn_rois=rois, - gt_classes=gt_class, - is_crowd=is_crowd, - gt_boxes=gt_bbox, - im_info=im_info, - batch_size_per_im=bbox_assigner.batch_size_per_im, - fg_fraction=bbox_assigner.fg_fraction, - fg_thresh=bbox_assigner.fg_thresh, - bg_thresh_hi=bbox_assigner.bg_thresh_hi, - bg_thresh_lo=bbox_assigner.bg_thresh_lo, - bbox_reg_weights=bbox_assigner.bbox_reg_weights, - class_nums=bbox_assigner.class_nums, - use_random=bbox_assigner.use_random) - rois = outs[0] - - roi_extractor = self.roi_extractor() - roi_feat = roi_extractor( - head_inputs=body_feats, - rois=rois, - spatial_scale=spatial_scale) - # head_feat - bbox_head = self.bbox_head(num_classes) - head_feat = bbox_head.head(roi_feat) - if isinstance(head_feat, OrderedDict): - head_feat = list(head_feat.values())[0] - if phase == 'train': - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name, - 'gt_class': var_prefix + gt_class.name, - 'gt_bbox': var_prefix + gt_bbox.name, - 'is_crowd': var_prefix + is_crowd.name - } - outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] - } - elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name - } - outputs = { - 'head_features': var_prefix + head_feat.name, - 'rois': var_prefix + rois.name, - 'bbox_out': var_prefix + pred.name - } - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - if pretrained: - - def _if_exist(var): - if num_classes != 81: - if 'bbox_pred' in var.name or 'cls_score' in var.name: - return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - return inputs, outputs, context_prog - - def rpn_head(self): - return FPNRPNHead( - anchor_generator=AnchorGenerator( - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]), - rpn_target_assign=RPNTargetAssign( - rpn_batch_size_per_im=256, - rpn_fg_fraction=0.5, - rpn_negative_overlap=0.3, - rpn_positive_overlap=0.7, - rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=2000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=1000, - pre_nms_top_n=1000), - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6) - - def roi_extractor(self): - return FPNRoIAlign( - canconical_level=4, - canonical_size=224, - max_level=5, - min_level=2, - box_resolution=7, - sampling_ratio=2) - - def bbox_head(self, num_classes): - return BBoxHead( - head=TwoFCHead(mlp_dim=1024), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), - num_classes=num_classes) - - def bbox_assigner(self, num_classes): - return BBoxAssigner( - batch_size_per_im=512, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - fg_fraction=0.25, - fg_thresh=0.5, - class_nums=num_classes) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, images=None, - data=None, use_gpu=False, batch_size=1, output_dir='detection_result', @@ -337,8 +99,6 @@ class FasterRCNNResNet50RPN(hub.Module): ) paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] all_images = list() for yield_data in test_reader(paths, images): @@ -360,29 +120,37 @@ class FasterRCNNResNet50RPN(hub.Module): padding_image, padding_info, padding_shape = padding_minibatch( batch_data, coarsest_stride=32, use_padded_im_info=True) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - padding_shape_tensor = PaddleTensor(padding_shape.copy()) feed_list = [ - padding_image_tensor, padding_info_tensor, padding_shape_tensor + padding_image, padding_info, padding_shape ] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + + feed_list = [ + padding_image, padding_info, padding_shape + ] + + input_names = predictor.get_input_names() + + for i, input_name in enumerate(input_names): + data = np.asarray(feed_list[i], dtype=np.float32) + handle = predictor.get_input_handle(input_name) + handle.copy_from_cpu(data) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=handle_id, visualization=visualization) res += output - return res def add_module_config_arg(self): diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py deleted file mode 100644 index bebf8bde..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py index 2b3e1ce9..f1524564 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py @@ -12,7 +12,6 @@ __all__ = [ 'postprocess', ] - def base64_to_cv2(b64str): data = base64.b64decode(b64str.encode('utf8')) data = np.fromstring(data, np.uint8) @@ -107,7 +106,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): the path of images. @@ -130,9 +129,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py deleted file mode 100644 index 6e3398d8..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding=utf-8 -import paddle.fluid as fluid - -__all__ = ['FPNRoIAlign'] - - -class FPNRoIAlign(object): - """ - RoI align pooling for FPN feature maps - Args: - sampling_ratio (int): number of sampling points - min_level (int): lowest level of FPN layer - max_level (int): highest level of FPN layer - canconical_level (int): the canconical FPN feature map level - canonical_size (int): the canconical FPN feature map size - box_resolution (int): box resolution - mask_resolution (int): mask roi resolution - """ - - def __init__(self, - sampling_ratio=0, - min_level=2, - max_level=5, - canconical_level=4, - canonical_size=224, - box_resolution=7, - mask_resolution=14): - super(FPNRoIAlign, self).__init__() - self.sampling_ratio = sampling_ratio - self.min_level = min_level - self.max_level = max_level - self.canconical_level = canconical_level - self.canonical_size = canonical_size - self.box_resolution = box_resolution - self.mask_resolution = mask_resolution - - def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): - """ - Adopt RoI align onto several level of feature maps to get RoI features. - Distribute RoIs to different levels by area and get a list of RoI - features by distributed RoIs and their corresponding feature maps. - - Returns: - roi_feat(Variable): RoI features with shape of [M, C, R, R], - where M is the number of RoIs and R is RoI resolution - - """ - k_min = self.min_level - k_max = self.max_level - num_roi_lvls = k_max - k_min + 1 - name_list = list(head_inputs.keys()) - input_name_list = name_list[-num_roi_lvls:] - spatial_scale = spatial_scale[-num_roi_lvls:] - rois_dist, restore_index = fluid.layers.distribute_fpn_proposals( - rois, k_min, k_max, self.canconical_level, self.canonical_size) - # rois_dist is in ascend order - roi_out_list = [] - resolution = is_mask and self.mask_resolution or self.box_resolution - for lvl in range(num_roi_lvls): - name_index = num_roi_lvls - lvl - 1 - rois_input = rois_dist[lvl] - head_input = head_inputs[input_name_list[name_index]] - sc = spatial_scale[name_index] - roi_out = fluid.layers.roi_align( - input=head_input, - rois=rois_input, - pooled_height=resolution, - pooled_width=resolution, - spatial_scale=sc, - sampling_ratio=self.sampling_ratio) - roi_out_list.append(roi_out) - roi_feat_shuffle = fluid.layers.concat(roi_out_list) - roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) - roi_feat = fluid.layers.lod_reset(roi_feat_, rois) - - return roi_feat diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py deleted file mode 100644 index e1b69866..00000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py +++ /dev/null @@ -1,533 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -__all__ = [ - 'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead', - 'FPNRPNHead' -] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RPNTargetAssign(object): - # __op__ = fluid.layers.rpn_target_assign - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -class GenerateProposals(object): - # __op__ = fluid.layers.generate_proposals - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors self.anchor_generator - self.anchor, self.anchor_var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=self.anchor_generator.anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance, - stride=self.anchor_generator.stride) - - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal(loc=0., - scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - # prop_op - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - # self.rpn_target_assign - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, - use_random=self.rpn_target_assign.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} - - -class FPNRPNHead(RPNHead): - """ - RPN Head that supports FPN input - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - anchor_start_size (int): size of anchor at the first scale - num_chan (int): number of FPN output channels - min_level (int): lowest level of FPN output - max_level (int): highest level of FPN output - num_classes (int): number of classes in rpn output - """ - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6, - num_classes=1): - super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, - train_proposal, test_proposal) - self.anchor_start_size = anchor_start_size - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.num_classes = num_classes - - self.fpn_rpn_list = [] - self.anchors_list = [] - self.anchor_var_list = [] - - def _get_output(self, input, feat_lvl): - """ - Get anchor and FPN RPN head output at one level. - - Args: - input(Variable): Body feature from backbone. - feat_lvl(int): Indicate the level of rpn output corresponding - to the level of feature map. - - Return: - rpn_cls_score(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors * 4, H, W]. - """ - slvl = str(feat_lvl) - conv_name = 'conv_rpn_fpn' + slvl - cls_name = 'rpn_cls_logits_fpn' + slvl - bbox_name = 'rpn_bbox_pred_fpn' + slvl - conv_share_name = 'conv_rpn_fpn' + str(self.min_level) - cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) - bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) - - num_anchors = len(self.anchor_generator.aspect_ratios) - conv_rpn_fpn = fluid.layers.conv2d( - input=input, - num_filters=self.num_chan, - filter_size=3, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # self.anchor_generator - self.anchors, self.anchor_var = fluid.layers.anchor_generator( - input=conv_rpn_fpn, - anchor_sizes=(self.anchor_start_size * 2.** - (feat_lvl - self.min_level), ), - stride=(2.**feat_lvl, 2.**feat_lvl), - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance) - - cls_num_filters = num_anchors * self.num_classes - self.rpn_cls_score = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=cls_num_filters, - filter_size=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - self.rpn_bbox_pred = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=num_anchors * 4, - filter_size=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): - """ - Get proposals in one level according to the output of fpn rpn head - - Args: - body_feat(Variable): the feature map from backone. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - feat_lvl(int): Indicate the level of proposals corresponding to - the feature maps. - - Returns: - rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). - rpn_roi_probs_fpn(Variable): Scores of proposals with - shape of (rois_num, 1). - """ - - rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output( - body_feat, feat_lvl) - - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - if self.num_classes == 1: - rpn_cls_prob_fpn = fluid.layers.sigmoid( - rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) - else: - rpn_cls_score_fpn = fluid.layers.transpose( - rpn_cls_score_fpn, perm=[0, 2, 3, 1]) - rpn_cls_score_fpn = fluid.layers.reshape( - rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_fpn = fluid.layers.softmax( - rpn_cls_score_fpn, - use_cudnn=False, - name='rpn_cls_prob_fpn' + str(feat_lvl)) - rpn_cls_prob_fpn = fluid.layers.slice( - rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) - rpn_cls_prob_fpn = fluid.layers.reshape( - rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) - rpn_cls_prob_fpn = fluid.layers.transpose( - rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) - # prop_op - rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals( - scores=rpn_cls_prob_fpn, - bbox_deltas=rpn_bbox_pred_fpn, - im_info=im_info, - anchors=self.anchors, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois_fpn, rpn_roi_prob_fpn - - def get_proposals(self, fpn_feats, im_info, mode='train'): - """ - Get proposals in multiple levels according to the output of fpn - rpn head - - Args: - fpn_feats(dict): A dictionary represents the output feature map - of FPN with their name. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - - Return: - rois_list(Variable): Output proposals in shape of [rois_num, 4] - """ - rois_list = [] - roi_probs_list = [] - fpn_feat_names = list(fpn_feats.keys()) - for lvl in range(self.min_level, self.max_level + 1): - fpn_feat_name = fpn_feat_names[self.max_level - lvl] - fpn_feat = fpn_feats[fpn_feat_name] - rois_fpn, roi_probs_fpn = self._get_single_proposals( - fpn_feat, im_info, lvl, mode) - self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) - rois_list.append(rois_fpn) - roi_probs_list.append(roi_probs_fpn) - self.anchors_list.append(self.anchors) - self.anchor_var_list.append(self.anchor_var) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - post_nms_top_n = prop_op.post_nms_top_n - rois_collect = fluid.layers.collect_fpn_proposals( - rois_list, - roi_probs_list, - self.min_level, - self.max_level, - post_nms_top_n, - name='collect') - return rois_collect - - def _get_loss_input(self): - rpn_clses = [] - rpn_bboxes = [] - anchors = [] - anchor_vars = [] - for i in range(len(self.fpn_rpn_list)): - single_input = self._transform_input( - self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], - self.anchors_list[i], self.anchor_var_list[i]) - rpn_clses.append(single_input[0]) - rpn_bboxes.append(single_input[1]) - anchors.append(single_input[2]) - anchor_vars.append(single_input[3]) - - rpn_cls = fluid.layers.concat(rpn_clses, axis=1) - rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) - anchors = fluid.layers.concat(anchors) - anchor_var = fluid.layers.concat(anchor_vars) - return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py new file mode 100644 index 00000000..0a775a4f --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + cv2.error, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() -- GitLab