diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
index 9d003b8009e0b69a891bf17b1c7be1a2c3fbb04c..ef5324a7b6d42ddbd83f2fd5de1494ec16da86a6 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
@@ -102,19 +102,13 @@
- ```python
- def save_inference_model(dirname,
- model_filename=None,
- params_filename=None,
- combined=True)
+ def save_inference_model(dirname)
```
- 将模型保存到指定路径。
- **参数**
- - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。
+ - dirname: 模型保存路径
## 四、服务部署
@@ -167,6 +161,11 @@
* 1.0.1
修复numpy数据读取问题
+
+* 1.1.0
+
+ 移除 fluid api
+
- ```shell
- $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1
+ $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0
```
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md
index d90beb6499a3c47d69fee654f0fac04524837af2..bf4c7274d6bf6bd3c8eedd8460187e6fbd70767e 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md
@@ -101,19 +101,13 @@
- ```python
- def save_inference_model(dirname,
- model_filename=None,
- params_filename=None,
- combined=True)
+ def save_inference_model(dirname)
```
- Save model to specific path
- **Parameters**
- - dirname: output dir for saving model
- - model\_filename: filename for saving model
- - params\_filename: filename for saving parameters
- - combined: whether save parameters into one file
+ - dirname: model save path
## IV.Server Deployment
@@ -166,6 +160,11 @@
* 1.0.1
Fix the problem of reading numpy
+
+* 1.1.0
+
+ Remove fluid api
+
- ```shell
- $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1
+ $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0
```
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
deleted file mode 100644
index d033382c41a5d22ad33e16a3b584e2d3a15ba358..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
+++ /dev/null
@@ -1,20 +0,0 @@
-class BBoxAssigner(object):
- # __op__ = fluid.layers.generate_proposal_labels
- def __init__(self,
- batch_size_per_im=512,
- fg_fraction=.25,
- fg_thresh=.5,
- bg_thresh_hi=.5,
- bg_thresh_lo=0.,
- bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
- class_nums=81,
- shuffle_before_sample=True):
- super(BBoxAssigner, self).__init__()
- self.batch_size_per_im = batch_size_per_im
- self.fg_fraction = fg_fraction
- self.fg_thresh = fg_thresh
- self.bg_thresh_hi = bg_thresh_hi
- self.bg_thresh_lo = bg_thresh_lo
- self.bbox_reg_weights = bbox_reg_weights
- self.class_nums = class_nums
- self.use_random = shuffle_before_sample
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
deleted file mode 100644
index 8080ed22f5057ece32f890c6bb2e44564700f9e1..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import OrderedDict
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Normal, Xavier
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.initializer import MSRA
-
-
-class MultiClassNMS(object):
- # __op__ = fluid.layers.multiclass_nms
- def __init__(self,
- score_threshold=.05,
- nms_top_k=-1,
- keep_top_k=100,
- nms_threshold=.5,
- normalized=False,
- nms_eta=1.0,
- background_label=0):
- super(MultiClassNMS, self).__init__()
- self.score_threshold = score_threshold
- self.nms_top_k = nms_top_k
- self.keep_top_k = keep_top_k
- self.nms_threshold = nms_threshold
- self.normalized = normalized
- self.nms_eta = nms_eta
- self.background_label = background_label
-
-
-class SmoothL1Loss(object):
- '''
- Smooth L1 loss
- Args:
- sigma (float): hyper param in smooth l1 loss
- '''
-
- def __init__(self, sigma=1.0):
- super(SmoothL1Loss, self).__init__()
- self.sigma = sigma
-
- def __call__(self, x, y, inside_weight=None, outside_weight=None):
- return fluid.layers.smooth_l1(
- x,
- y,
- inside_weight=inside_weight,
- outside_weight=outside_weight,
- sigma=self.sigma)
-
-
-class BoxCoder(object):
- def __init__(self,
- prior_box_var=[0.1, 0.1, 0.2, 0.2],
- code_type='decode_center_size',
- box_normalized=False,
- axis=1):
- super(BoxCoder, self).__init__()
- self.prior_box_var = prior_box_var
- self.code_type = code_type
- self.box_normalized = box_normalized
- self.axis = axis
-
-
-class TwoFCHead(object):
- """
- RCNN head with two Fully Connected layers
-
- Args:
- mlp_dim (int): num of filters for the fc layers
- """
-
- def __init__(self, mlp_dim=1024):
- super(TwoFCHead, self).__init__()
- self.mlp_dim = mlp_dim
-
- def __call__(self, roi_feat):
- fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
-
- fc6 = fluid.layers.fc(
- input=roi_feat,
- size=self.mlp_dim,
- act='relu',
- name='fc6',
- param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)),
- bias_attr=ParamAttr(
- name='fc6_b', learning_rate=2., regularizer=L2Decay(0.)))
- head_feat = fluid.layers.fc(
- input=fc6,
- size=self.mlp_dim,
- act='relu',
- name='fc7',
- param_attr=ParamAttr(name='fc7_w', initializer=Xavier()),
- bias_attr=ParamAttr(
- name='fc7_b', learning_rate=2., regularizer=L2Decay(0.)))
-
- return head_feat
-
-
-class BBoxHead(object):
- """
- RCNN bbox head
-
- Args:
- head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead`
- box_coder (object): `BoxCoder` instance
- nms (object): `MultiClassNMS` instance
- num_classes: number of output classes
- """
- __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss']
- __shared__ = ['num_classes']
-
- def __init__(self,
- head,
- box_coder=BoxCoder(),
- nms=MultiClassNMS(),
- bbox_loss=SmoothL1Loss(),
- num_classes=81):
- super(BBoxHead, self).__init__()
- self.head = head
- self.num_classes = num_classes
- self.box_coder = box_coder
- self.nms = nms
- self.bbox_loss = bbox_loss
- self.head_feat = None
-
- def get_head_feat(self, input=None):
- """
- Get the bbox head feature map.
- """
-
- if input is not None:
- feat = self.head(input)
- if isinstance(feat, OrderedDict):
- feat = list(feat.values())[0]
- self.head_feat = feat
- return self.head_feat
-
- def _get_output(self, roi_feat):
- """
- Get bbox head output.
-
- Args:
- roi_feat (Variable): RoI feature from RoIExtractor.
-
- Returns:
- cls_score(Variable): Output of rpn head with shape of
- [N, num_anchors, H, W].
- bbox_pred(Variable): Output of rpn head with shape of
- [N, num_anchors * 4, H, W].
- """
- head_feat = self.get_head_feat(roi_feat)
- # when ResNetC5 output a single feature map
- if not isinstance(self.head, TwoFCHead):
- head_feat = fluid.layers.pool2d(
- head_feat, pool_type='avg', global_pooling=True)
- cls_score = fluid.layers.fc(
- input=head_feat,
- size=self.num_classes,
- act=None,
- name='cls_score',
- param_attr=ParamAttr(
- name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)),
- bias_attr=ParamAttr(
- name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.)))
- bbox_pred = fluid.layers.fc(
- input=head_feat,
- size=4 * self.num_classes,
- act=None,
- name='bbox_pred',
- param_attr=ParamAttr(
- name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)),
- bias_attr=ParamAttr(
- name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.)))
- return cls_score, bbox_pred
-
- def get_loss(self, roi_feat, labels_int32, bbox_targets,
- bbox_inside_weights, bbox_outside_weights):
- """
- Get bbox_head loss.
-
- Args:
- roi_feat (Variable): RoI feature from RoIExtractor.
- labels_int32(Variable): Class label of a RoI with shape [P, 1].
- P is the number of RoI.
- bbox_targets(Variable): Box label of a RoI with shape
- [P, 4 * class_nums].
- bbox_inside_weights(Variable): Indicates whether a box should
- contribute to loss. Same shape as bbox_targets.
- bbox_outside_weights(Variable): Indicates whether a box should
- contribute to loss. Same shape as bbox_targets.
-
- Return:
- Type: Dict
- loss_cls(Variable): bbox_head loss.
- loss_bbox(Variable): bbox_head loss.
- """
-
- cls_score, bbox_pred = self._get_output(roi_feat)
-
- labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64')
- labels_int64.stop_gradient = True
- loss_cls = fluid.layers.softmax_with_cross_entropy(
- logits=cls_score, label=labels_int64, numeric_stable_mode=True)
- loss_cls = fluid.layers.reduce_mean(loss_cls)
- loss_bbox = self.bbox_loss(
- x=bbox_pred,
- y=bbox_targets,
- inside_weight=bbox_inside_weights,
- outside_weight=bbox_outside_weights)
- loss_bbox = fluid.layers.reduce_mean(loss_bbox)
- return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
-
- def get_prediction(self,
- roi_feat,
- rois,
- im_info,
- im_shape,
- return_box_score=False):
- """
- Get prediction bounding box in test stage.
-
- Args:
- roi_feat (Variable): RoI feature from RoIExtractor.
- rois (Variable): Output of generate_proposals in rpn head.
- im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
- number of input images, each element consists of im_height,
- im_width, im_scale.
- im_shape (Variable): Actual shape of original image with shape
- [B, 3]. B is the number of images, each element consists of
- original_height, original_width, 1
-
- Returns:
- pred_result(Variable): Prediction result with shape [N, 6]. Each
- row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
- N is the total number of prediction.
- """
- cls_score, bbox_pred = self._get_output(roi_feat)
-
- im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
- im_scale = fluid.layers.sequence_expand(im_scale, rois)
- boxes = rois / im_scale
- cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
- bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
- # self.box_coder
- decoded_box = fluid.layers.box_coder(
- prior_box=boxes,
- target_box=bbox_pred,
- prior_box_var=self.box_coder.prior_box_var,
- code_type=self.box_coder.code_type,
- box_normalized=self.box_coder.box_normalized,
- axis=self.box_coder.axis)
- cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
- if return_box_score:
- return {'bbox': cliped_box, 'score': cls_prob}
- # self.nms
- pred_result = fluid.layers.multiclass_nms(
- bboxes=cliped_box,
- scores=cls_prob,
- score_threshold=self.nms.score_threshold,
- nms_top_k=self.nms.nms_top_k,
- keep_top_k=self.nms.keep_top_k,
- nms_threshold=self.nms.nms_threshold,
- normalized=self.nms.normalized,
- nms_eta=self.nms.nms_eta,
- background_label=self.nms.background_label)
- return pred_result
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
index b38501e5bf18c479f4de0de565cba26c84fa0c9d..c9e52d54c8b35a275dbe02548749796c599c85e9 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
@@ -4,12 +4,9 @@ from __future__ import print_function
from __future__ import division
import os
-from collections import OrderedDict
import cv2
import numpy as np
-from PIL import Image, ImageEnhance
-from paddle import fluid
__all__ = ['test_reader']
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py
deleted file mode 100644
index bd19c712ecc9b0112685c061046812f6ff418e42..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-from collections import OrderedDict
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Xavier
-from paddle.fluid.regularizer import L2Decay
-
-__all__ = ['ConvNorm', 'FPN']
-
-
-def ConvNorm(input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- norm_decay=0.,
- norm_type='affine_channel',
- norm_groups=32,
- dilation=1,
- lr_scale=1,
- freeze_norm=False,
- act=None,
- norm_name=None,
- initializer=None,
- name=None):
- fan = num_filters
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=((filter_size - 1) // 2) * dilation,
- dilation=dilation,
- groups=groups,
- act=None,
- param_attr=ParamAttr(
- name=name + "_weights",
- initializer=initializer,
- learning_rate=lr_scale),
- bias_attr=False,
- name=name + '.conv2d.output.1')
-
- norm_lr = 0. if freeze_norm else 1.
- pattr = ParamAttr(
- name=norm_name + '_scale',
- learning_rate=norm_lr * lr_scale,
- regularizer=L2Decay(norm_decay))
- battr = ParamAttr(
- name=norm_name + '_offset',
- learning_rate=norm_lr * lr_scale,
- regularizer=L2Decay(norm_decay))
-
- if norm_type in ['bn', 'sync_bn']:
- global_stats = True if freeze_norm else False
- out = fluid.layers.batch_norm(
- input=conv,
- act=act,
- name=norm_name + '.output.1',
- param_attr=pattr,
- bias_attr=battr,
- moving_mean_name=norm_name + '_mean',
- moving_variance_name=norm_name + '_variance',
- use_global_stats=global_stats)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif norm_type == 'gn':
- out = fluid.layers.group_norm(
- input=conv,
- act=act,
- name=norm_name + '.output.1',
- groups=norm_groups,
- param_attr=pattr,
- bias_attr=battr)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif norm_type == 'affine_channel':
- scale = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=pattr,
- default_initializer=fluid.initializer.Constant(1.))
- bias = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=battr,
- default_initializer=fluid.initializer.Constant(0.))
- out = fluid.layers.affine_channel(
- x=conv, scale=scale, bias=bias, act=act)
- if freeze_norm:
- scale.stop_gradient = True
- bias.stop_gradient = True
- return out
-
-
-class FPN(object):
- """
- Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
-
- Args:
- num_chan (int): number of feature channels
- min_level (int): lowest level of the backbone feature map to use
- max_level (int): highest level of the backbone feature map to use
- spatial_scale (list): feature map scaling factor
- has_extra_convs (bool): whether has extral convolutions in higher levels
- norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel'
- """
- __shared__ = ['norm_type', 'freeze_norm']
-
- def __init__(self,
- num_chan=256,
- min_level=2,
- max_level=6,
- spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.],
- has_extra_convs=False,
- norm_type=None,
- freeze_norm=False):
- self.freeze_norm = freeze_norm
- self.num_chan = num_chan
- self.min_level = min_level
- self.max_level = max_level
- self.spatial_scale = spatial_scale
- self.has_extra_convs = has_extra_convs
- self.norm_type = norm_type
-
- def _add_topdown_lateral(self, body_name, body_input, upper_output):
- lateral_name = 'fpn_inner_' + body_name + '_lateral'
- topdown_name = 'fpn_topdown_' + body_name
- fan = body_input.shape[1]
- if self.norm_type:
- initializer = Xavier(fan_out=fan)
- lateral = ConvNorm(
- body_input,
- self.num_chan,
- 1,
- initializer=initializer,
- norm_type=self.norm_type,
- freeze_norm=self.freeze_norm,
- name=lateral_name,
- norm_name=lateral_name)
- else:
- lateral = fluid.layers.conv2d(
- body_input,
- self.num_chan,
- 1,
- param_attr=ParamAttr(
- name=lateral_name + "_w", initializer=Xavier(fan_out=fan)),
- bias_attr=ParamAttr(
- name=lateral_name + "_b",
- learning_rate=2.,
- regularizer=L2Decay(0.)),
- name=lateral_name)
- topdown = fluid.layers.resize_nearest(
- upper_output, scale=2., name=topdown_name)
- return lateral + topdown
-
- def get_output(self, body_dict):
- """
- Add FPN onto backbone.
-
- Args:
- body_dict(OrderedDict): Dictionary of variables and each element is the
- output of backbone.
-
- Return:
- fpn_dict(OrderedDict): A dictionary represents the output of FPN with
- their name.
- spatial_scale(list): A list of multiplicative spatial scale factor.
- """
- spatial_scale = copy.deepcopy(self.spatial_scale)
- body_name_list = list(body_dict.keys())[::-1]
- num_backbone_stages = len(body_name_list)
- self.fpn_inner_output = [[] for _ in range(num_backbone_stages)]
- fpn_inner_name = 'fpn_inner_' + body_name_list[0]
- body_input = body_dict[body_name_list[0]]
- fan = body_input.shape[1]
- if self.norm_type:
- initializer = Xavier(fan_out=fan)
- self.fpn_inner_output[0] = ConvNorm(
- body_input,
- self.num_chan,
- 1,
- initializer=initializer,
- norm_type=self.norm_type,
- freeze_norm=self.freeze_norm,
- name=fpn_inner_name,
- norm_name=fpn_inner_name)
- else:
- self.fpn_inner_output[0] = fluid.layers.conv2d(
- body_input,
- self.num_chan,
- 1,
- param_attr=ParamAttr(
- name=fpn_inner_name + "_w",
- initializer=Xavier(fan_out=fan)),
- bias_attr=ParamAttr(
- name=fpn_inner_name + "_b",
- learning_rate=2.,
- regularizer=L2Decay(0.)),
- name=fpn_inner_name)
- for i in range(1, num_backbone_stages):
- body_name = body_name_list[i]
- body_input = body_dict[body_name]
- top_output = self.fpn_inner_output[i - 1]
- fpn_inner_single = self._add_topdown_lateral(
- body_name, body_input, top_output)
- self.fpn_inner_output[i] = fpn_inner_single
- fpn_dict = {}
- fpn_name_list = []
- for i in range(num_backbone_stages):
- fpn_name = 'fpn_' + body_name_list[i]
- fan = self.fpn_inner_output[i].shape[1] * 3 * 3
- if self.norm_type:
- initializer = Xavier(fan_out=fan)
- fpn_output = ConvNorm(
- self.fpn_inner_output[i],
- self.num_chan,
- 3,
- initializer=initializer,
- norm_type=self.norm_type,
- freeze_norm=self.freeze_norm,
- name=fpn_name,
- norm_name=fpn_name)
- else:
- fpn_output = fluid.layers.conv2d(
- self.fpn_inner_output[i],
- self.num_chan,
- filter_size=3,
- padding=1,
- param_attr=ParamAttr(
- name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
- bias_attr=ParamAttr(
- name=fpn_name + "_b",
- learning_rate=2.,
- regularizer=L2Decay(0.)),
- name=fpn_name)
- fpn_dict[fpn_name] = fpn_output
- fpn_name_list.append(fpn_name)
- if not self.has_extra_convs and self.max_level - self.min_level == len(
- spatial_scale):
- body_top_name = fpn_name_list[0]
- body_top_extension = fluid.layers.pool2d(
- fpn_dict[body_top_name],
- 1,
- 'max',
- pool_stride=2,
- name=body_top_name + '_subsampled_2x')
- fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension
- fpn_name_list.insert(0, body_top_name + '_subsampled_2x')
- spatial_scale.insert(0, spatial_scale[0] * 0.5)
- # Coarser FPN levels introduced for RetinaNet
- highest_backbone_level = self.min_level + len(spatial_scale) - 1
- if self.has_extra_convs and self.max_level > highest_backbone_level:
- fpn_blob = body_dict[body_name_list[0]]
- for i in range(highest_backbone_level + 1, self.max_level + 1):
- fpn_blob_in = fpn_blob
- fpn_name = 'fpn_' + str(i)
- if i > highest_backbone_level + 1:
- fpn_blob_in = fluid.layers.relu(fpn_blob)
- fan = fpn_blob_in.shape[1] * 3 * 3
- fpn_blob = fluid.layers.conv2d(
- input=fpn_blob_in,
- num_filters=self.num_chan,
- filter_size=3,
- stride=2,
- padding=1,
- param_attr=ParamAttr(
- name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
- bias_attr=ParamAttr(
- name=fpn_name + "_b",
- learning_rate=2.,
- regularizer=L2Decay(0.)),
- name=fpn_name)
- fpn_dict[fpn_name] = fpn_blob
- fpn_name_list.insert(0, fpn_name)
- spatial_scale.insert(0, spatial_scale[0] * 0.5)
- res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
- return res_dict, spatial_scale
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
index b8dd5afa497e050fea39a6bcc02c9d0ef7dd6caf..650491894db381d1bbc82a3baa3534b6eefd3e02 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
@@ -6,41 +6,32 @@ from __future__ import print_function
import os
import ast
import argparse
-from collections import OrderedDict
-from functools import partial
from math import ceil
+import paddle
import numpy as np
-import paddle.fluid as fluid
-import paddlehub as hub
+import paddle.jit
+import paddle.static
from paddlehub.module.module import moduleinfo, runnable, serving
-from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
-from paddlehub.io.parser import txt_parser
-from paddlehub.common.paddle_helper import add_vars_prefix
-
-from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
-from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
-from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN
-from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet
-from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead
-from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead
-from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner
-from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign
+from paddle.inference import Config, create_predictor
+from paddlehub.utils.parser import txt_parser
+from .processor import load_label_info, postprocess, base64_to_cv2
+from .data_feed import test_reader, padding_minibatch
@moduleinfo(
name="faster_rcnn_resnet50_fpn_coco2017",
- version="1.0.1",
+ version="1.1.0",
type="cv/object_detection",
summary=
"Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
-class FasterRCNNResNet50RPN(hub.Module):
- def _initialize(self):
+class FasterRCNNResNet50RPN:
+ def __init__(self):
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self.default_pretrained_model_path = os.path.join(
- self.directory, "faster_rcnn_resnet50_fpn_model")
+ self.directory, "faster_rcnn_resnet50_fpn_model", "model")
self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt"))
self._set_config()
@@ -49,10 +40,12 @@ class FasterRCNNResNet50RPN(hub.Module):
"""
predictor config setting
"""
- cpu_config = AnalysisConfig(self.default_pretrained_model_path)
+ model = self.default_pretrained_model_path+'.pdmodel'
+ params = self.default_pretrained_model_path+'.pdiparams'
+ cpu_config = Config(model, params)
cpu_config.disable_glog_info()
cpu_config.disable_gpu()
- self.cpu_predictor = create_paddle_predictor(cpu_config)
+ self.cpu_predictor = create_predictor(cpu_config)
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
@@ -61,245 +54,14 @@ class FasterRCNNResNet50RPN(hub.Module):
except:
use_gpu = False
if use_gpu:
- gpu_config = AnalysisConfig(self.default_pretrained_model_path)
+ gpu_config = Config(model, params)
gpu_config.disable_glog_info()
gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
- self.gpu_predictor = create_paddle_predictor(gpu_config)
-
- def context(self,
- num_classes=81,
- trainable=True,
- pretrained=True,
- phase='train'):
- """
- Distill the Head Features, so as to perform transfer learning.
-
- Args:
- num_classes (int): number of categories
- trainable (bool): whether to set parameters trainable.
- pretrained (bool): whether to load default pretrained model.
- phase (str): optional choices are 'train' and 'predict'.
-
- Returns:
- inputs (dict): the input variables.
- outputs (dict): the output variables.
- context_prog (Program): the program to execute transfer learning.
- """
- context_prog = fluid.Program()
- startup_program = fluid.Program()
- with fluid.program_guard(context_prog, startup_program):
- with fluid.unique_name.guard():
- image = fluid.layers.data(
- name='image', shape=[-1, 3, -1, -1], dtype='float32')
- # backbone
- backbone = ResNet(
- norm_type='affine_channel',
- depth=50,
- feature_maps=[2, 3, 4, 5],
- freeze_at=2)
- body_feats = backbone(image)
- # fpn
- fpn = FPN(
- max_level=6,
- min_level=2,
- num_chan=256,
- spatial_scale=[0.03125, 0.0625, 0.125, 0.25])
- var_prefix = '@HUB_{}@'.format(self.name)
- im_info = fluid.layers.data(
- name='im_info', shape=[3], dtype='float32', lod_level=0)
- im_shape = fluid.layers.data(
- name='im_shape', shape=[3], dtype='float32', lod_level=0)
- body_feat_names = list(body_feats.keys())
- body_feats, spatial_scale = fpn.get_output(body_feats)
- # rpn_head: RPNHead
- rpn_head = self.rpn_head()
- rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
- # train
- if phase == 'train':
- gt_bbox = fluid.layers.data(
- name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
- is_crowd = fluid.layers.data(
- name='is_crowd', shape=[1], dtype='int32', lod_level=1)
- gt_class = fluid.layers.data(
- name='gt_class', shape=[1], dtype='int32', lod_level=1)
- rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
- # bbox_assigner: BBoxAssigner
- bbox_assigner = self.bbox_assigner(num_classes)
- outs = fluid.layers.generate_proposal_labels(
- rpn_rois=rois,
- gt_classes=gt_class,
- is_crowd=is_crowd,
- gt_boxes=gt_bbox,
- im_info=im_info,
- batch_size_per_im=bbox_assigner.batch_size_per_im,
- fg_fraction=bbox_assigner.fg_fraction,
- fg_thresh=bbox_assigner.fg_thresh,
- bg_thresh_hi=bbox_assigner.bg_thresh_hi,
- bg_thresh_lo=bbox_assigner.bg_thresh_lo,
- bbox_reg_weights=bbox_assigner.bbox_reg_weights,
- class_nums=bbox_assigner.class_nums,
- use_random=bbox_assigner.use_random)
- rois = outs[0]
-
- roi_extractor = self.roi_extractor()
- roi_feat = roi_extractor(
- head_inputs=body_feats,
- rois=rois,
- spatial_scale=spatial_scale)
- # head_feat
- bbox_head = self.bbox_head(num_classes)
- head_feat = bbox_head.head(roi_feat)
- if isinstance(head_feat, OrderedDict):
- head_feat = list(head_feat.values())[0]
- if phase == 'train':
- inputs = {
- 'image': var_prefix + image.name,
- 'im_info': var_prefix + im_info.name,
- 'im_shape': var_prefix + im_shape.name,
- 'gt_class': var_prefix + gt_class.name,
- 'gt_bbox': var_prefix + gt_bbox.name,
- 'is_crowd': var_prefix + is_crowd.name
- }
- outputs = {
- 'head_features':
- var_prefix + head_feat.name,
- 'rpn_cls_loss':
- var_prefix + rpn_loss['rpn_cls_loss'].name,
- 'rpn_reg_loss':
- var_prefix + rpn_loss['rpn_reg_loss'].name,
- 'generate_proposal_labels':
- [var_prefix + var.name for var in outs]
- }
- elif phase == 'predict':
- pred = bbox_head.get_prediction(roi_feat, rois, im_info,
- im_shape)
- inputs = {
- 'image': var_prefix + image.name,
- 'im_info': var_prefix + im_info.name,
- 'im_shape': var_prefix + im_shape.name
- }
- outputs = {
- 'head_features': var_prefix + head_feat.name,
- 'rois': var_prefix + rois.name,
- 'bbox_out': var_prefix + pred.name
- }
- add_vars_prefix(context_prog, var_prefix)
- add_vars_prefix(startup_program, var_prefix)
-
- global_vars = context_prog.global_block().vars
- inputs = {
- key: global_vars[value]
- for key, value in inputs.items()
- }
- outputs = {
- key: global_vars[value] if not isinstance(value, list) else
- [global_vars[var] for var in value]
- for key, value in outputs.items()
- }
-
- for param in context_prog.global_block().iter_parameters():
- param.trainable = trainable
-
- place = fluid.CPUPlace()
- exe = fluid.Executor(place)
- exe.run(startup_program)
- if pretrained:
-
- def _if_exist(var):
- if num_classes != 81:
- if 'bbox_pred' in var.name or 'cls_score' in var.name:
- return False
- return os.path.exists(
- os.path.join(self.default_pretrained_model_path,
- var.name))
-
- fluid.io.load_vars(
- exe,
- self.default_pretrained_model_path,
- predicate=_if_exist)
- return inputs, outputs, context_prog
-
- def rpn_head(self):
- return FPNRPNHead(
- anchor_generator=AnchorGenerator(
- anchor_sizes=[32, 64, 128, 256, 512],
- aspect_ratios=[0.5, 1.0, 2.0],
- stride=[16.0, 16.0],
- variance=[1.0, 1.0, 1.0, 1.0]),
- rpn_target_assign=RPNTargetAssign(
- rpn_batch_size_per_im=256,
- rpn_fg_fraction=0.5,
- rpn_negative_overlap=0.3,
- rpn_positive_overlap=0.7,
- rpn_straddle_thresh=0.0),
- train_proposal=GenerateProposals(
- min_size=0.0,
- nms_thresh=0.7,
- post_nms_top_n=2000,
- pre_nms_top_n=2000),
- test_proposal=GenerateProposals(
- min_size=0.0,
- nms_thresh=0.7,
- post_nms_top_n=1000,
- pre_nms_top_n=1000),
- anchor_start_size=32,
- num_chan=256,
- min_level=2,
- max_level=6)
-
- def roi_extractor(self):
- return FPNRoIAlign(
- canconical_level=4,
- canonical_size=224,
- max_level=5,
- min_level=2,
- box_resolution=7,
- sampling_ratio=2)
-
- def bbox_head(self, num_classes):
- return BBoxHead(
- head=TwoFCHead(mlp_dim=1024),
- nms=MultiClassNMS(
- keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
- num_classes=num_classes)
-
- def bbox_assigner(self, num_classes):
- return BBoxAssigner(
- batch_size_per_im=512,
- bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
- bg_thresh_hi=0.5,
- bg_thresh_lo=0.0,
- fg_fraction=0.25,
- fg_thresh=0.5,
- class_nums=num_classes)
-
- def save_inference_model(self,
- dirname,
- model_filename=None,
- params_filename=None,
- combined=True):
- if combined:
- model_filename = "__model__" if not model_filename else model_filename
- params_filename = "__params__" if not params_filename else params_filename
- place = fluid.CPUPlace()
- exe = fluid.Executor(place)
-
- program, feeded_var_names, target_vars = fluid.io.load_inference_model(
- dirname=self.default_pretrained_model_path, executor=exe)
-
- fluid.io.save_inference_model(
- dirname=dirname,
- main_program=program,
- executor=exe,
- feeded_var_names=feeded_var_names,
- target_vars=target_vars,
- model_filename=model_filename,
- params_filename=params_filename)
+ self.gpu_predictor = create_predictor(gpu_config)
def object_detection(self,
paths=None,
images=None,
- data=None,
use_gpu=False,
batch_size=1,
output_dir='detection_result',
@@ -337,8 +99,6 @@ class FasterRCNNResNet50RPN(hub.Module):
)
paths = paths if paths else list()
- if data and 'image' in data:
- paths += data['image']
all_images = list()
for yield_data in test_reader(paths, images):
@@ -360,29 +120,37 @@ class FasterRCNNResNet50RPN(hub.Module):
padding_image, padding_info, padding_shape = padding_minibatch(
batch_data, coarsest_stride=32, use_padded_im_info=True)
- padding_image_tensor = PaddleTensor(padding_image.copy())
- padding_info_tensor = PaddleTensor(padding_info.copy())
- padding_shape_tensor = PaddleTensor(padding_shape.copy())
feed_list = [
- padding_image_tensor, padding_info_tensor, padding_shape_tensor
+ padding_image, padding_info, padding_shape
]
- if use_gpu:
- data_out = self.gpu_predictor.run(feed_list)
- else:
- data_out = self.cpu_predictor.run(feed_list)
+ predictor = self.gpu_predictor if use_gpu else self.cpu_predictor
+
+ feed_list = [
+ padding_image, padding_info, padding_shape
+ ]
+
+ input_names = predictor.get_input_names()
+
+ for i, input_name in enumerate(input_names):
+ data = np.asarray(feed_list[i], dtype=np.float32)
+ handle = predictor.get_input_handle(input_name)
+ handle.copy_from_cpu(data)
+
+ predictor.run()
+ output_names = predictor.get_output_names()
+ output_handle = predictor.get_output_handle(output_names[0])
output = postprocess(
paths=paths,
images=images,
- data_out=data_out,
+ data_out=output_handle,
score_thresh=score_thresh,
label_names=self.label_names,
output_dir=output_dir,
handle_id=handle_id,
visualization=visualization)
res += output
-
return res
def add_module_config_arg(self):
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py
deleted file mode 100644
index bebf8bdeeec3aa76357d95cc52ba5a009e19d46f..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# coding=utf-8
-
-
-class NameAdapter(object):
- """Fix the backbones variable names for pretrained weight"""
-
- def __init__(self, model):
- super(NameAdapter, self).__init__()
- self.model = model
-
- @property
- def model_type(self):
- return getattr(self.model, '_model_type', '')
-
- @property
- def variant(self):
- return getattr(self.model, 'variant', '')
-
- def fix_conv_norm_name(self, name):
- if name == "conv1":
- bn_name = "bn_" + name
- else:
- bn_name = "bn" + name[3:]
- # the naming rule is same as pretrained weight
- if self.model_type == 'SEResNeXt':
- bn_name = name + "_bn"
- return bn_name
-
- def fix_shortcut_name(self, name):
- if self.model_type == 'SEResNeXt':
- name = 'conv' + name + '_prj'
- return name
-
- def fix_bottleneck_name(self, name):
- if self.model_type == 'SEResNeXt':
- conv_name1 = 'conv' + name + '_x1'
- conv_name2 = 'conv' + name + '_x2'
- conv_name3 = 'conv' + name + '_x3'
- shortcut_name = name
- else:
- conv_name1 = name + "_branch2a"
- conv_name2 = name + "_branch2b"
- conv_name3 = name + "_branch2c"
- shortcut_name = name + "_branch1"
- return conv_name1, conv_name2, conv_name3, shortcut_name
-
- def fix_layer_warp_name(self, stage_num, count, i):
- name = 'res' + str(stage_num)
- if count > 10 and stage_num == 4:
- if i == 0:
- conv_name = name + "a"
- else:
- conv_name = name + "b" + str(i)
- else:
- conv_name = name + chr(ord("a") + i)
- if self.model_type == 'SEResNeXt':
- conv_name = str(stage_num + 2) + '_' + str(i + 1)
- return conv_name
-
- def fix_c1_stage_name(self):
- return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py
deleted file mode 100644
index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import paddle.fluid as fluid
-from paddle.fluid import ParamAttr
-
-nonlocal_params = {
- "use_zero_init_conv": False,
- "conv_init_std": 0.01,
- "no_bias": True,
- "use_maxpool": False,
- "use_softmax": True,
- "use_bn": False,
- "use_scale": True, # vital for the model prformance!!!
- "use_affine": False,
- "bn_momentum": 0.9,
- "bn_epsilon": 1.0000001e-5,
- "bn_init_gamma": 0.9,
- "weight_decay_bn": 1.e-4,
-}
-
-
-def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner,
- max_pool_stride=2):
- cur = input
- theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \
- filter_size = [1, 1], stride = [1, 1], \
- padding = [0, 0], \
- param_attr=ParamAttr(name = prefix + '_theta' + "_w", \
- initializer = fluid.initializer.Normal(loc = 0.0,
- scale = nonlocal_params["conv_init_std"])), \
- bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \
- initializer = fluid.initializer.Constant(value = 0.)) \
- if not nonlocal_params["no_bias"] else False, \
- name = prefix + '_theta')
- theta_shape = theta.shape
- theta_shape_op = fluid.layers.shape(theta)
- theta_shape_op.stop_gradient = True
-
- if nonlocal_params["use_maxpool"]:
- max_pool = fluid.layers.pool2d(input = cur, \
- pool_size = [max_pool_stride, max_pool_stride], \
- pool_type = 'max', \
- pool_stride = [max_pool_stride, max_pool_stride], \
- pool_padding = [0, 0], \
- name = prefix + '_pool')
- else:
- max_pool = cur
-
- phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \
- filter_size = [1, 1], stride = [1, 1], \
- padding = [0, 0], \
- param_attr = ParamAttr(name = prefix + '_phi' + "_w", \
- initializer = fluid.initializer.Normal(loc = 0.0,
- scale = nonlocal_params["conv_init_std"])), \
- bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \
- initializer = fluid.initializer.Constant(value = 0.)) \
- if (nonlocal_params["no_bias"] == 0) else False, \
- name = prefix + '_phi')
- phi_shape = phi.shape
-
- g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \
- filter_size = [1, 1], stride = [1, 1], \
- padding = [0, 0], \
- param_attr = ParamAttr(name = prefix + '_g' + "_w", \
- initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \
- bias_attr = ParamAttr(name = prefix + '_g' + "_b", \
- initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \
- name = prefix + '_g')
- g_shape = g.shape
- # we have to use explicit batch size (to support arbitrary spacetime size)
- # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784)
- theta = fluid.layers.reshape(theta, shape=(0, 0, -1))
- theta = fluid.layers.transpose(theta, [0, 2, 1])
- phi = fluid.layers.reshape(phi, [0, 0, -1])
- theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity')
- g = fluid.layers.reshape(g, [0, 0, -1])
-
- if nonlocal_params["use_softmax"]:
- if nonlocal_params["use_scale"]:
- theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5)
- else:
- theta_phi_sc = theta_phi
- p = fluid.layers.softmax(
- theta_phi_sc, name=prefix + '_affinity' + '_prob')
- else:
- # not clear about what is doing in xlw's code
- p = None # not implemented
- raise "Not implemented when not use softmax"
-
- # note g's axis[2] corresponds to p's axis[2]
- # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1)
- p = fluid.layers.transpose(p, [0, 2, 1])
- t = fluid.layers.matmul(g, p, name=prefix + '_y')
-
- # reshape back
- # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14)
- t_shape = t.shape
- t_re = fluid.layers.reshape(
- t, shape=list(theta_shape), actual_shape=theta_shape_op)
- blob_out = t_re
- blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \
- filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \
- param_attr = ParamAttr(name = prefix + '_out' + "_w", \
- initializer = fluid.initializer.Constant(value = 0.) \
- if nonlocal_params["use_zero_init_conv"] \
- else fluid.initializer.Normal(loc = 0.0,
- scale = nonlocal_params["conv_init_std"])), \
- bias_attr = ParamAttr(name = prefix + '_out' + "_b", \
- initializer = fluid.initializer.Constant(value = 0.)) \
- if (nonlocal_params["no_bias"] == 0) else False, \
- name = prefix + '_out')
- blob_out_shape = blob_out.shape
-
- if nonlocal_params["use_bn"]:
- bn_name = prefix + "_bn"
- blob_out = fluid.layers.batch_norm(blob_out, \
- # is_test = test_mode, \
- momentum = nonlocal_params["bn_momentum"], \
- epsilon = nonlocal_params["bn_epsilon"], \
- name = bn_name, \
- param_attr = ParamAttr(name = bn_name + "_s", \
- initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \
- regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \
- bias_attr = ParamAttr(name = bn_name + "_b", \
- regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \
- moving_mean_name = bn_name + "_rm", \
- moving_variance_name = bn_name + "_riv") # add bn
-
- if nonlocal_params["use_affine"]:
- affine_scale = fluid.layers.create_parameter(\
- shape=[blob_out_shape[1]], dtype = blob_out.dtype, \
- attr=ParamAttr(name=prefix + '_affine' + '_s'), \
- default_initializer = fluid.initializer.Constant(value = 1.))
- affine_bias = fluid.layers.create_parameter(\
- shape=[blob_out_shape[1]], dtype = blob_out.dtype, \
- attr=ParamAttr(name=prefix + '_affine' + '_b'), \
- default_initializer = fluid.initializer.Constant(value = 0.))
- blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \
- bias = affine_bias, name = prefix + '_affine') # add affine
-
- return blob_out
-
-
-def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner):
- '''
- add_space_nonlocal:
- Non-local Neural Networks: see https://arxiv.org/abs/1711.07971
- '''
- conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner)
- output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum')
- return output
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
index 2b3e1ce9c6fd4ae85dd69a43f2a359192a92bac5..f15245643f54635f4bf48b9fb0888d455bd4bd02 100644
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
@@ -12,7 +12,6 @@ __all__ = [
'postprocess',
]
-
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
@@ -107,7 +106,7 @@ def postprocess(paths,
handle_id,
visualization=True):
"""
- postprocess the lod_tensor produced by fluid.Executor.run
+ postprocess the lod_tensor produced by Executor.run
Args:
paths (list[str]): the path of images.
@@ -130,9 +129,8 @@ def postprocess(paths,
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
- lod_tensor = data_out[0]
- lod = lod_tensor.lod[0]
- results = lod_tensor.as_ndarray()
+ lod = data_out.lod()[0]
+ results = data_out.copy_to_cpu()
check_dir(output_dir)
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py
deleted file mode 100644
index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py
+++ /dev/null
@@ -1,447 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-from collections import OrderedDict
-from numbers import Integral
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.framework import Variable
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.initializer import Constant
-
-from .nonlocal_helper import add_space_nonlocal
-from .name_adapter import NameAdapter
-
-__all__ = ['ResNet', 'ResNetC5']
-
-
-class ResNet(object):
- """
- Residual Network, see https://arxiv.org/abs/1512.03385
- Args:
- depth (int): ResNet depth, should be 34, 50.
- freeze_at (int): freeze the backbone at which stage
- norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel'
- freeze_norm (bool): freeze normalization layers
- norm_decay (float): weight decay for normalization layer weights
- variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
- feature_maps (list): index of stages whose feature maps are returned
- dcn_v2_stages (list): index of stages who select deformable conv v2
- nonlocal_stages (list): index of stages who select nonlocal networks
- """
- __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name']
-
- def __init__(self,
- depth=50,
- freeze_at=0,
- norm_type='sync_bn',
- freeze_norm=False,
- norm_decay=0.,
- variant='b',
- feature_maps=[3, 4, 5],
- dcn_v2_stages=[],
- weight_prefix_name='',
- nonlocal_stages=[],
- get_prediction=False,
- class_dim=1000):
- super(ResNet, self).__init__()
-
- if isinstance(feature_maps, Integral):
- feature_maps = [feature_maps]
-
- assert depth in [34, 50], \
- "depth {} not in [34, 50]"
- assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant"
- assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4"
- assert len(feature_maps) > 0, "need one or more feature maps"
- assert norm_type in ['bn', 'sync_bn', 'affine_channel']
- assert not (len(nonlocal_stages)>0 and depth<50), \
- "non-local is not supported for resnet18 or resnet34"
-
- self.depth = depth
- self.freeze_at = freeze_at
- self.norm_type = norm_type
- self.norm_decay = norm_decay
- self.freeze_norm = freeze_norm
- self.variant = variant
- self._model_type = 'ResNet'
- self.feature_maps = feature_maps
- self.dcn_v2_stages = dcn_v2_stages
- self.depth_cfg = {
- 34: ([3, 4, 6, 3], self.basicblock),
- 50: ([3, 4, 6, 3], self.bottleneck),
- }
- self.stage_filters = [64, 128, 256, 512]
- self._c1_out_chan_num = 64
- self.na = NameAdapter(self)
- self.prefix_name = weight_prefix_name
-
- self.nonlocal_stages = nonlocal_stages
- self.nonlocal_mod_cfg = {
- 50: 2,
- 101: 5,
- 152: 8,
- 200: 12,
- }
- self.get_prediction = get_prediction
- self.class_dim = class_dim
-
- def _conv_offset(self,
- input,
- filter_size,
- stride,
- padding,
- act=None,
- name=None):
- out_channel = filter_size * filter_size * 3
- out = fluid.layers.conv2d(
- input,
- num_filters=out_channel,
- filter_size=filter_size,
- stride=stride,
- padding=padding,
- param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"),
- bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"),
- act=act,
- name=name)
- return out
-
- def _conv_norm(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None,
- dcn_v2=False):
- _name = self.prefix_name + name if self.prefix_name != '' else name
- if not dcn_v2:
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- groups=groups,
- act=None,
- param_attr=ParamAttr(name=_name + "_weights"),
- bias_attr=False,
- name=_name + '.conv2d.output.1')
- else:
- # select deformable conv"
- offset_mask = self._conv_offset(
- input=input,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- act=None,
- name=_name + "_conv_offset")
- offset_channel = filter_size**2 * 2
- mask_channel = filter_size**2
- offset, mask = fluid.layers.split(
- input=offset_mask,
- num_or_sections=[offset_channel, mask_channel],
- dim=1)
- mask = fluid.layers.sigmoid(mask)
- conv = fluid.layers.deformable_conv(
- input=input,
- offset=offset,
- mask=mask,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- groups=groups,
- deformable_groups=1,
- im2col_step=1,
- param_attr=ParamAttr(name=_name + "_weights"),
- bias_attr=False,
- name=_name + ".conv2d.output.1")
-
- bn_name = self.na.fix_conv_norm_name(name)
- bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name
-
- norm_lr = 0. if self.freeze_norm else 1.
- norm_decay = self.norm_decay
- pattr = ParamAttr(
- name=bn_name + '_scale',
- learning_rate=norm_lr,
- regularizer=L2Decay(norm_decay))
- battr = ParamAttr(
- name=bn_name + '_offset',
- learning_rate=norm_lr,
- regularizer=L2Decay(norm_decay))
-
- if self.norm_type in ['bn', 'sync_bn']:
- global_stats = True if self.freeze_norm else False
- out = fluid.layers.batch_norm(
- input=conv,
- act=act,
- name=bn_name + '.output.1',
- param_attr=pattr,
- bias_attr=battr,
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance',
- use_global_stats=global_stats)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif self.norm_type == 'affine_channel':
- scale = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=pattr,
- default_initializer=fluid.initializer.Constant(1.))
- bias = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=battr,
- default_initializer=fluid.initializer.Constant(0.))
- out = fluid.layers.affine_channel(
- x=conv, scale=scale, bias=bias, act=act)
- if self.freeze_norm:
- scale.stop_gradient = True
- bias.stop_gradient = True
- return out
-
- def _shortcut(self, input, ch_out, stride, is_first, name):
- max_pooling_in_short_cut = self.variant == 'd'
- ch_in = input.shape[1]
- # the naming rule is same as pretrained weight
- name = self.na.fix_shortcut_name(name)
- std_senet = getattr(self, 'std_senet', False)
- if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first):
- if std_senet:
- if is_first:
- return self._conv_norm(input, ch_out, 1, stride, name=name)
- else:
- return self._conv_norm(input, ch_out, 3, stride, name=name)
- if max_pooling_in_short_cut and not is_first:
- input = fluid.layers.pool2d(
- input=input,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- ceil_mode=True,
- pool_type='avg')
- return self._conv_norm(input, ch_out, 1, 1, name=name)
- return self._conv_norm(input, ch_out, 1, stride, name=name)
- else:
- return input
-
- def bottleneck(self,
- input,
- num_filters,
- stride,
- is_first,
- name,
- dcn_v2=False):
- if self.variant == 'a':
- stride1, stride2 = stride, 1
- else:
- stride1, stride2 = 1, stride
-
- # ResNeXt
- groups = getattr(self, 'groups', 1)
- group_width = getattr(self, 'group_width', -1)
- if groups == 1:
- expand = 4
- elif (groups * group_width) == 256:
- expand = 1
- else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d
- num_filters = num_filters // 2
- expand = 2
-
- conv_name1, conv_name2, conv_name3, \
- shortcut_name = self.na.fix_bottleneck_name(name)
- std_senet = getattr(self, 'std_senet', False)
- if std_senet:
- conv_def = [[
- int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1
- ], [num_filters, 3, stride2, 'relu', groups, conv_name2],
- [num_filters * expand, 1, 1, None, 1, conv_name3]]
- else:
- conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1],
- [num_filters, 3, stride2, 'relu', groups, conv_name2],
- [num_filters * expand, 1, 1, None, 1, conv_name3]]
-
- residual = input
- for i, (c, k, s, act, g, _name) in enumerate(conv_def):
- residual = self._conv_norm(
- input=residual,
- num_filters=c,
- filter_size=k,
- stride=s,
- act=act,
- groups=g,
- name=_name,
- dcn_v2=(i == 1 and dcn_v2))
- short = self._shortcut(
- input,
- num_filters * expand,
- stride,
- is_first=is_first,
- name=shortcut_name)
- # Squeeze-and-Excitation
- if callable(getattr(self, '_squeeze_excitation', None)):
- residual = self._squeeze_excitation(
- input=residual, num_channels=num_filters, name='fc' + name)
- return fluid.layers.elementwise_add(
- x=short, y=residual, act='relu', name=name + ".add.output.5")
-
- def basicblock(self,
- input,
- num_filters,
- stride,
- is_first,
- name,
- dcn_v2=False):
- assert dcn_v2 is False, "Not implemented yet."
- conv0 = self._conv_norm(
- input=input,
- num_filters=num_filters,
- filter_size=3,
- act='relu',
- stride=stride,
- name=name + "_branch2a")
- conv1 = self._conv_norm(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
- act=None,
- name=name + "_branch2b")
- short = self._shortcut(
- input, num_filters, stride, is_first, name=name + "_branch1")
- return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
-
- def layer_warp(self, input, stage_num):
- """
- Args:
- input (Variable): input variable.
- stage_num (int): the stage number, should be 2, 3, 4, 5
-
- Returns:
- The last variable in endpoint-th stage.
- """
- assert stage_num in [2, 3, 4, 5]
-
- stages, block_func = self.depth_cfg[self.depth]
- count = stages[stage_num - 2]
-
- ch_out = self.stage_filters[stage_num - 2]
- is_first = False if stage_num != 2 else True
- dcn_v2 = True if stage_num in self.dcn_v2_stages else False
-
- nonlocal_mod = 1000
- if stage_num in self.nonlocal_stages:
- nonlocal_mod = self.nonlocal_mod_cfg[
- self.depth] if stage_num == 4 else 2
-
- # Make the layer name and parameter name consistent
- # with ImageNet pre-trained model
- conv = input
- for i in range(count):
- conv_name = self.na.fix_layer_warp_name(stage_num, count, i)
- if self.depth < 50:
- is_first = True if i == 0 and stage_num == 2 else False
- conv = block_func(
- input=conv,
- num_filters=ch_out,
- stride=2 if i == 0 and stage_num != 2 else 1,
- is_first=is_first,
- name=conv_name,
- dcn_v2=dcn_v2)
-
- # add non local model
- dim_in = conv.shape[1]
- nonlocal_name = "nonlocal_conv{}".format(stage_num)
- if i % nonlocal_mod == nonlocal_mod - 1:
- conv = add_space_nonlocal(conv, dim_in, dim_in,
- nonlocal_name + '_{}'.format(i),
- int(dim_in / 2))
- return conv
-
- def c1_stage(self, input):
- out_chan = self._c1_out_chan_num
-
- conv1_name = self.na.fix_c1_stage_name()
-
- if self.variant in ['c', 'd']:
- conv_def = [
- [out_chan // 2, 3, 2, "conv1_1"],
- [out_chan // 2, 3, 1, "conv1_2"],
- [out_chan, 3, 1, "conv1_3"],
- ]
- else:
- conv_def = [[out_chan, 7, 2, conv1_name]]
-
- for (c, k, s, _name) in conv_def:
- input = self._conv_norm(
- input=input,
- num_filters=c,
- filter_size=k,
- stride=s,
- act='relu',
- name=_name)
-
- output = fluid.layers.pool2d(
- input=input,
- pool_size=3,
- pool_stride=2,
- pool_padding=1,
- pool_type='max')
- return output
-
- def __call__(self, input):
- assert isinstance(input, Variable)
- assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \
- "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps)
-
- res_endpoints = []
-
- res = input
- feature_maps = self.feature_maps
- severed_head = getattr(self, 'severed_head', False)
- if not severed_head:
- res = self.c1_stage(res)
- feature_maps = range(2, max(self.feature_maps) + 1)
-
- for i in feature_maps:
- res = self.layer_warp(res, i)
- if i in self.feature_maps:
- res_endpoints.append(res)
- if self.freeze_at >= i:
- res.stop_gradient = True
- if self.get_prediction:
- pool = fluid.layers.pool2d(
- input=res, pool_type='avg', global_pooling=True)
- stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-
- out = fluid.layers.fc(
- input=pool,
- size=self.class_dim,
- param_attr=fluid.param_attr.ParamAttr(
- initializer=fluid.initializer.Uniform(-stdv, stdv)))
- out = fluid.layers.softmax(out)
- return out
- return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat)
- for idx, feat in enumerate(res_endpoints)])
-
-
-class ResNetC5(ResNet):
- def __init__(self,
- depth=50,
- freeze_at=2,
- norm_type='affine_channel',
- freeze_norm=True,
- norm_decay=0.,
- variant='b',
- feature_maps=[5],
- weight_prefix_name=''):
- super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm,
- norm_decay, variant, feature_maps)
- self.severed_head = True
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
deleted file mode 100644
index 6e3398d8c4ceb4e78802f86de515c26d0a41e34b..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# coding=utf-8
-import paddle.fluid as fluid
-
-__all__ = ['FPNRoIAlign']
-
-
-class FPNRoIAlign(object):
- """
- RoI align pooling for FPN feature maps
- Args:
- sampling_ratio (int): number of sampling points
- min_level (int): lowest level of FPN layer
- max_level (int): highest level of FPN layer
- canconical_level (int): the canconical FPN feature map level
- canonical_size (int): the canconical FPN feature map size
- box_resolution (int): box resolution
- mask_resolution (int): mask roi resolution
- """
-
- def __init__(self,
- sampling_ratio=0,
- min_level=2,
- max_level=5,
- canconical_level=4,
- canonical_size=224,
- box_resolution=7,
- mask_resolution=14):
- super(FPNRoIAlign, self).__init__()
- self.sampling_ratio = sampling_ratio
- self.min_level = min_level
- self.max_level = max_level
- self.canconical_level = canconical_level
- self.canonical_size = canonical_size
- self.box_resolution = box_resolution
- self.mask_resolution = mask_resolution
-
- def __call__(self, head_inputs, rois, spatial_scale, is_mask=False):
- """
- Adopt RoI align onto several level of feature maps to get RoI features.
- Distribute RoIs to different levels by area and get a list of RoI
- features by distributed RoIs and their corresponding feature maps.
-
- Returns:
- roi_feat(Variable): RoI features with shape of [M, C, R, R],
- where M is the number of RoIs and R is RoI resolution
-
- """
- k_min = self.min_level
- k_max = self.max_level
- num_roi_lvls = k_max - k_min + 1
- name_list = list(head_inputs.keys())
- input_name_list = name_list[-num_roi_lvls:]
- spatial_scale = spatial_scale[-num_roi_lvls:]
- rois_dist, restore_index = fluid.layers.distribute_fpn_proposals(
- rois, k_min, k_max, self.canconical_level, self.canonical_size)
- # rois_dist is in ascend order
- roi_out_list = []
- resolution = is_mask and self.mask_resolution or self.box_resolution
- for lvl in range(num_roi_lvls):
- name_index = num_roi_lvls - lvl - 1
- rois_input = rois_dist[lvl]
- head_input = head_inputs[input_name_list[name_index]]
- sc = spatial_scale[name_index]
- roi_out = fluid.layers.roi_align(
- input=head_input,
- rois=rois_input,
- pooled_height=resolution,
- pooled_width=resolution,
- spatial_scale=sc,
- sampling_ratio=self.sampling_ratio)
- roi_out_list.append(roi_out)
- roi_feat_shuffle = fluid.layers.concat(roi_out_list)
- roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index)
- roi_feat = fluid.layers.lod_reset(roi_feat_, rois)
-
- return roi_feat
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
deleted file mode 100644
index e1b69866d3938764669e63aebda321eb1e4c5439..0000000000000000000000000000000000000000
--- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
+++ /dev/null
@@ -1,533 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from paddle import fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Normal
-from paddle.fluid.regularizer import L2Decay
-
-__all__ = [
- 'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead',
- 'FPNRPNHead'
-]
-
-
-class AnchorGenerator(object):
- # __op__ = fluid.layers.anchor_generator
- def __init__(self,
- stride=[16.0, 16.0],
- anchor_sizes=[32, 64, 128, 256, 512],
- aspect_ratios=[0.5, 1., 2.],
- variance=[1., 1., 1., 1.]):
- super(AnchorGenerator, self).__init__()
- self.anchor_sizes = anchor_sizes
- self.aspect_ratios = aspect_ratios
- self.variance = variance
- self.stride = stride
-
-
-class RPNTargetAssign(object):
- # __op__ = fluid.layers.rpn_target_assign
- def __init__(self,
- rpn_batch_size_per_im=256,
- rpn_straddle_thresh=0.,
- rpn_fg_fraction=0.5,
- rpn_positive_overlap=0.7,
- rpn_negative_overlap=0.3,
- use_random=True):
- super(RPNTargetAssign, self).__init__()
- self.rpn_batch_size_per_im = rpn_batch_size_per_im
- self.rpn_straddle_thresh = rpn_straddle_thresh
- self.rpn_fg_fraction = rpn_fg_fraction
- self.rpn_positive_overlap = rpn_positive_overlap
- self.rpn_negative_overlap = rpn_negative_overlap
- self.use_random = use_random
-
-
-class GenerateProposals(object):
- # __op__ = fluid.layers.generate_proposals
- def __init__(self,
- pre_nms_top_n=6000,
- post_nms_top_n=1000,
- nms_thresh=.5,
- min_size=.1,
- eta=1.):
- super(GenerateProposals, self).__init__()
- self.pre_nms_top_n = pre_nms_top_n
- self.post_nms_top_n = post_nms_top_n
- self.nms_thresh = nms_thresh
- self.min_size = min_size
- self.eta = eta
-
-
-class RPNHead(object):
- """
- RPN Head
-
- Args:
- anchor_generator (object): `AnchorGenerator` instance
- rpn_target_assign (object): `RPNTargetAssign` instance
- train_proposal (object): `GenerateProposals` instance for training
- test_proposal (object): `GenerateProposals` instance for testing
- num_classes (int): number of classes in rpn output
- """
- __inject__ = [
- 'anchor_generator', 'rpn_target_assign', 'train_proposal',
- 'test_proposal'
- ]
-
- def __init__(self,
- anchor_generator,
- rpn_target_assign,
- train_proposal,
- test_proposal,
- num_classes=1):
- super(RPNHead, self).__init__()
- self.anchor_generator = anchor_generator
- self.rpn_target_assign = rpn_target_assign
- self.train_proposal = train_proposal
- self.test_proposal = test_proposal
- self.num_classes = num_classes
-
- def _get_output(self, input):
- """
- Get anchor and RPN head output.
-
- Args:
- input(Variable): feature map from backbone with shape of [N, C, H, W]
-
- Returns:
- rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W].
- rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W].
- """
- dim_out = input.shape[1]
- rpn_conv = fluid.layers.conv2d(
- input=input,
- num_filters=dim_out,
- filter_size=3,
- stride=1,
- padding=1,
- act='relu',
- name='conv_rpn',
- param_attr=ParamAttr(
- name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)),
- bias_attr=ParamAttr(
- name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
- # Generate anchors self.anchor_generator
- self.anchor, self.anchor_var = fluid.layers.anchor_generator(
- input=rpn_conv,
- anchor_sizes=self.anchor_generator.anchor_sizes,
- aspect_ratios=self.anchor_generator.aspect_ratios,
- variance=self.anchor_generator.variance,
- stride=self.anchor_generator.stride)
-
- num_anchor = self.anchor.shape[2]
- # Proposal classification scores
- self.rpn_cls_score = fluid.layers.conv2d(
- rpn_conv,
- num_filters=num_anchor * self.num_classes,
- filter_size=1,
- stride=1,
- padding=0,
- act=None,
- name='rpn_cls_score',
- param_attr=ParamAttr(
- name="rpn_cls_logits_w", initializer=Normal(loc=0.,
- scale=0.01)),
- bias_attr=ParamAttr(
- name="rpn_cls_logits_b",
- learning_rate=2.,
- regularizer=L2Decay(0.)))
- # Proposal bbox regression deltas
- self.rpn_bbox_pred = fluid.layers.conv2d(
- rpn_conv,
- num_filters=4 * num_anchor,
- filter_size=1,
- stride=1,
- padding=0,
- act=None,
- name='rpn_bbox_pred',
- param_attr=ParamAttr(
- name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)),
- bias_attr=ParamAttr(
- name="rpn_bbox_pred_b",
- learning_rate=2.,
- regularizer=L2Decay(0.)))
- return self.rpn_cls_score, self.rpn_bbox_pred
-
- def get_proposals(self, body_feats, im_info, mode='train'):
- """
- Get proposals according to the output of backbone.
-
- Args:
- body_feats (dict): The dictionary of feature maps from backbone.
- im_info(Variable): The information of image with shape [N, 3] with
- shape (height, width, scale).
- body_feat_names(list): A list of names of feature maps from
- backbone.
-
- Returns:
- rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
- """
- # In RPN Heads, only the last feature map of backbone is used.
- # And body_feat_names[-1] represents the last level name of backbone.
- body_feat = list(body_feats.values())[-1]
- rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
-
- if self.num_classes == 1:
- rpn_cls_prob = fluid.layers.sigmoid(
- rpn_cls_score, name='rpn_cls_prob')
- else:
- rpn_cls_score = fluid.layers.transpose(
- rpn_cls_score, perm=[0, 2, 3, 1])
- rpn_cls_score = fluid.layers.reshape(
- rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
- rpn_cls_prob_tmp = fluid.layers.softmax(
- rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
- rpn_cls_prob_slice = fluid.layers.slice(
- rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes])
- rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
- rpn_cls_prob = fluid.layers.reshape(
- rpn_cls_prob, shape=(0, 0, 0, -1))
- rpn_cls_prob = fluid.layers.transpose(
- rpn_cls_prob, perm=[0, 3, 1, 2])
- prop_op = self.train_proposal if mode == 'train' else self.test_proposal
- # prop_op
- rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
- scores=rpn_cls_prob,
- bbox_deltas=rpn_bbox_pred,
- im_info=im_info,
- anchors=self.anchor,
- variances=self.anchor_var,
- pre_nms_top_n=prop_op.pre_nms_top_n,
- post_nms_top_n=prop_op.post_nms_top_n,
- nms_thresh=prop_op.nms_thresh,
- min_size=prop_op.min_size,
- eta=prop_op.eta)
- return rpn_rois
-
- def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
- anchor_var):
- rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1])
- rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1])
- anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
- anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
- rpn_cls_score = fluid.layers.reshape(
- x=rpn_cls_score, shape=(0, -1, self.num_classes))
- rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
- return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
-
- def _get_loss_input(self):
- for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
- if not getattr(self, attr, None):
- raise ValueError("self.{} should not be None,".format(attr),
- "call RPNHead.get_proposals first")
- return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
- self.anchor, self.anchor_var)
-
- def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
- """
- Sample proposals and Calculate rpn loss.
-
- Args:
- im_info(Variable): The information of image with shape [N, 3] with
- shape (height, width, scale).
- gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
- M is the number of groundtruth.
- is_crowd(Variable): Indicates groud-truth is crowd or not with
- shape [M, 1]. M is the number of groundtruth.
-
- Returns:
- Type: dict
- rpn_cls_loss(Variable): RPN classification loss.
- rpn_bbox_loss(Variable): RPN bounding box regression loss.
-
- """
- rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
- if self.num_classes == 1:
- # self.rpn_target_assign
- score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
- fluid.layers.rpn_target_assign(
- bbox_pred=rpn_bbox,
- cls_logits=rpn_cls,
- anchor_box=anchor,
- anchor_var=anchor_var,
- gt_boxes=gt_box,
- is_crowd=is_crowd,
- im_info=im_info,
- rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im,
- rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh,
- rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction,
- rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap,
- rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap,
- use_random=self.rpn_target_assign.use_random)
- score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
- score_tgt.stop_gradient = True
- rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
- x=score_pred, label=score_tgt)
- else:
- score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
- self.rpn_target_assign(
- bbox_pred=rpn_bbox,
- cls_logits=rpn_cls,
- anchor_box=anchor,
- anchor_var=anchor_var,
- gt_boxes=gt_box,
- gt_labels=gt_label,
- is_crowd=is_crowd,
- num_classes=self.num_classes,
- im_info=im_info)
- labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
- labels_int64.stop_gradient = True
- rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
- logits=score_pred, label=labels_int64, numeric_stable_mode=True)
-
- rpn_cls_loss = fluid.layers.reduce_mean(
- rpn_cls_loss, name='loss_rpn_cls')
-
- loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
- loc_tgt.stop_gradient = True
- rpn_reg_loss = fluid.layers.smooth_l1(
- x=loc_pred,
- y=loc_tgt,
- sigma=3.0,
- inside_weight=bbox_weight,
- outside_weight=bbox_weight)
- rpn_reg_loss = fluid.layers.reduce_sum(
- rpn_reg_loss, name='loss_rpn_bbox')
- score_shape = fluid.layers.shape(score_tgt)
- score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
- norm = fluid.layers.reduce_prod(score_shape)
- norm.stop_gradient = True
- rpn_reg_loss = rpn_reg_loss / norm
- return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss}
-
-
-class FPNRPNHead(RPNHead):
- """
- RPN Head that supports FPN input
-
- Args:
- anchor_generator (object): `AnchorGenerator` instance
- rpn_target_assign (object): `RPNTargetAssign` instance
- train_proposal (object): `GenerateProposals` instance for training
- test_proposal (object): `GenerateProposals` instance for testing
- anchor_start_size (int): size of anchor at the first scale
- num_chan (int): number of FPN output channels
- min_level (int): lowest level of FPN output
- max_level (int): highest level of FPN output
- num_classes (int): number of classes in rpn output
- """
-
- def __init__(self,
- anchor_generator,
- rpn_target_assign,
- train_proposal,
- test_proposal,
- anchor_start_size=32,
- num_chan=256,
- min_level=2,
- max_level=6,
- num_classes=1):
- super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign,
- train_proposal, test_proposal)
- self.anchor_start_size = anchor_start_size
- self.num_chan = num_chan
- self.min_level = min_level
- self.max_level = max_level
- self.num_classes = num_classes
-
- self.fpn_rpn_list = []
- self.anchors_list = []
- self.anchor_var_list = []
-
- def _get_output(self, input, feat_lvl):
- """
- Get anchor and FPN RPN head output at one level.
-
- Args:
- input(Variable): Body feature from backbone.
- feat_lvl(int): Indicate the level of rpn output corresponding
- to the level of feature map.
-
- Return:
- rpn_cls_score(Variable): Output of one level of fpn rpn head with
- shape of [N, num_anchors, H, W].
- rpn_bbox_pred(Variable): Output of one level of fpn rpn head with
- shape of [N, num_anchors * 4, H, W].
- """
- slvl = str(feat_lvl)
- conv_name = 'conv_rpn_fpn' + slvl
- cls_name = 'rpn_cls_logits_fpn' + slvl
- bbox_name = 'rpn_bbox_pred_fpn' + slvl
- conv_share_name = 'conv_rpn_fpn' + str(self.min_level)
- cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level)
- bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level)
-
- num_anchors = len(self.anchor_generator.aspect_ratios)
- conv_rpn_fpn = fluid.layers.conv2d(
- input=input,
- num_filters=self.num_chan,
- filter_size=3,
- padding=1,
- act='relu',
- name=conv_name,
- param_attr=ParamAttr(
- name=conv_share_name + '_w',
- initializer=Normal(loc=0., scale=0.01)),
- bias_attr=ParamAttr(
- name=conv_share_name + '_b',
- learning_rate=2.,
- regularizer=L2Decay(0.)))
-
- # self.anchor_generator
- self.anchors, self.anchor_var = fluid.layers.anchor_generator(
- input=conv_rpn_fpn,
- anchor_sizes=(self.anchor_start_size * 2.**
- (feat_lvl - self.min_level), ),
- stride=(2.**feat_lvl, 2.**feat_lvl),
- aspect_ratios=self.anchor_generator.aspect_ratios,
- variance=self.anchor_generator.variance)
-
- cls_num_filters = num_anchors * self.num_classes
- self.rpn_cls_score = fluid.layers.conv2d(
- input=conv_rpn_fpn,
- num_filters=cls_num_filters,
- filter_size=1,
- act=None,
- name=cls_name,
- param_attr=ParamAttr(
- name=cls_share_name + '_w',
- initializer=Normal(loc=0., scale=0.01)),
- bias_attr=ParamAttr(
- name=cls_share_name + '_b',
- learning_rate=2.,
- regularizer=L2Decay(0.)))
- self.rpn_bbox_pred = fluid.layers.conv2d(
- input=conv_rpn_fpn,
- num_filters=num_anchors * 4,
- filter_size=1,
- act=None,
- name=bbox_name,
- param_attr=ParamAttr(
- name=bbox_share_name + '_w',
- initializer=Normal(loc=0., scale=0.01)),
- bias_attr=ParamAttr(
- name=bbox_share_name + '_b',
- learning_rate=2.,
- regularizer=L2Decay(0.)))
- return self.rpn_cls_score, self.rpn_bbox_pred
-
- def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'):
- """
- Get proposals in one level according to the output of fpn rpn head
-
- Args:
- body_feat(Variable): the feature map from backone.
- im_info(Variable): The information of image with shape [N, 3] with
- format (height, width, scale).
- feat_lvl(int): Indicate the level of proposals corresponding to
- the feature maps.
-
- Returns:
- rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4).
- rpn_roi_probs_fpn(Variable): Scores of proposals with
- shape of (rois_num, 1).
- """
-
- rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(
- body_feat, feat_lvl)
-
- prop_op = self.train_proposal if mode == 'train' else self.test_proposal
- if self.num_classes == 1:
- rpn_cls_prob_fpn = fluid.layers.sigmoid(
- rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
- else:
- rpn_cls_score_fpn = fluid.layers.transpose(
- rpn_cls_score_fpn, perm=[0, 2, 3, 1])
- rpn_cls_score_fpn = fluid.layers.reshape(
- rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
- rpn_cls_prob_fpn = fluid.layers.softmax(
- rpn_cls_score_fpn,
- use_cudnn=False,
- name='rpn_cls_prob_fpn' + str(feat_lvl))
- rpn_cls_prob_fpn = fluid.layers.slice(
- rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes])
- rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
- rpn_cls_prob_fpn = fluid.layers.reshape(
- rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
- rpn_cls_prob_fpn = fluid.layers.transpose(
- rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
- # prop_op
- rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
- scores=rpn_cls_prob_fpn,
- bbox_deltas=rpn_bbox_pred_fpn,
- im_info=im_info,
- anchors=self.anchors,
- variances=self.anchor_var,
- pre_nms_top_n=prop_op.pre_nms_top_n,
- post_nms_top_n=prop_op.post_nms_top_n,
- nms_thresh=prop_op.nms_thresh,
- min_size=prop_op.min_size,
- eta=prop_op.eta)
- return rpn_rois_fpn, rpn_roi_prob_fpn
-
- def get_proposals(self, fpn_feats, im_info, mode='train'):
- """
- Get proposals in multiple levels according to the output of fpn
- rpn head
-
- Args:
- fpn_feats(dict): A dictionary represents the output feature map
- of FPN with their name.
- im_info(Variable): The information of image with shape [N, 3] with
- format (height, width, scale).
-
- Return:
- rois_list(Variable): Output proposals in shape of [rois_num, 4]
- """
- rois_list = []
- roi_probs_list = []
- fpn_feat_names = list(fpn_feats.keys())
- for lvl in range(self.min_level, self.max_level + 1):
- fpn_feat_name = fpn_feat_names[self.max_level - lvl]
- fpn_feat = fpn_feats[fpn_feat_name]
- rois_fpn, roi_probs_fpn = self._get_single_proposals(
- fpn_feat, im_info, lvl, mode)
- self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred))
- rois_list.append(rois_fpn)
- roi_probs_list.append(roi_probs_fpn)
- self.anchors_list.append(self.anchors)
- self.anchor_var_list.append(self.anchor_var)
- prop_op = self.train_proposal if mode == 'train' else self.test_proposal
- post_nms_top_n = prop_op.post_nms_top_n
- rois_collect = fluid.layers.collect_fpn_proposals(
- rois_list,
- roi_probs_list,
- self.min_level,
- self.max_level,
- post_nms_top_n,
- name='collect')
- return rois_collect
-
- def _get_loss_input(self):
- rpn_clses = []
- rpn_bboxes = []
- anchors = []
- anchor_vars = []
- for i in range(len(self.fpn_rpn_list)):
- single_input = self._transform_input(
- self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1],
- self.anchors_list[i], self.anchor_var_list[i])
- rpn_clses.append(single_input[0])
- rpn_bboxes.append(single_input[1])
- anchors.append(single_input[2])
- anchor_vars.append(single_input[3])
-
- rpn_cls = fluid.layers.concat(rpn_clses, axis=1)
- rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1)
- anchors = fluid.layers.concat(anchors)
- anchor_var = fluid.layers.concat(anchor_vars)
- return rpn_cls, rpn_bbox, anchors, anchor_var
diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a775a4f50c063e00e7aa0a2f8b5c534a342601b
--- /dev/null
+++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py
@@ -0,0 +1,108 @@
+import os
+import shutil
+import unittest
+
+import cv2
+import requests
+import paddlehub as hub
+
+
+class TestHubModule(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a'
+ if not os.path.exists('tests'):
+ os.makedirs('tests')
+ response = requests.get(img_url)
+ assert response.status_code == 200, 'Network Error.'
+ with open('tests/test.jpg', 'wb') as f:
+ f.write(response.content)
+ cls.module = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017")
+
+ @classmethod
+ def tearDownClass(cls) -> None:
+ shutil.rmtree('tests')
+ shutil.rmtree('inference')
+ shutil.rmtree('detection_result')
+
+ def test_object_detection1(self):
+ results = self.module.object_detection(
+ paths=['tests/test.jpg']
+ )
+ bbox = results[0]['data'][0]
+ label = bbox['label']
+ confidence = bbox['confidence']
+ left = bbox['left']
+ right = bbox['right']
+ top = bbox['top']
+ bottom = bbox['bottom']
+
+ self.assertEqual(label, 'cat')
+ self.assertTrue(confidence > 0.5)
+ self.assertTrue(200 < left < 800)
+ self.assertTrue(2500 < right < 3500)
+ self.assertTrue(500 < top < 1500)
+ self.assertTrue(3500 < bottom < 4500)
+
+ def test_object_detection2(self):
+ results = self.module.object_detection(
+ images=[cv2.imread('tests/test.jpg')]
+ )
+ bbox = results[0]['data'][0]
+ label = bbox['label']
+ confidence = bbox['confidence']
+ left = bbox['left']
+ right = bbox['right']
+ top = bbox['top']
+ bottom = bbox['bottom']
+
+ self.assertEqual(label, 'cat')
+ self.assertTrue(confidence > 0.5)
+ self.assertTrue(200 < left < 800)
+ self.assertTrue(2500 < right < 3500)
+ self.assertTrue(500 < top < 1500)
+ self.assertTrue(3500 < bottom < 4500)
+
+ def test_object_detection3(self):
+ results = self.module.object_detection(
+ images=[cv2.imread('tests/test.jpg')],
+ visualization=False
+ )
+ bbox = results[0]['data'][0]
+ label = bbox['label']
+ confidence = bbox['confidence']
+ left = bbox['left']
+ right = bbox['right']
+ top = bbox['top']
+ bottom = bbox['bottom']
+
+ self.assertEqual(label, 'cat')
+ self.assertTrue(confidence > 0.5)
+ self.assertTrue(200 < left < 800)
+ self.assertTrue(2500 < right < 3500)
+ self.assertTrue(500 < top < 1500)
+ self.assertTrue(3500 < bottom < 4500)
+
+ def test_object_detection4(self):
+ self.assertRaises(
+ AssertionError,
+ self.module.object_detection,
+ paths=['no.jpg']
+ )
+
+ def test_object_detection5(self):
+ self.assertRaises(
+ cv2.error,
+ self.module.object_detection,
+ images=['test.jpg']
+ )
+
+ def test_save_inference_model(self):
+ self.module.save_inference_model('./inference/model')
+
+ self.assertTrue(os.path.exists('./inference/model.pdmodel'))
+ self.assertTrue(os.path.exists('./inference/model.pdiparams'))
+
+
+if __name__ == "__main__":
+ unittest.main()