未验证 提交 01002e40 编写于 作者: jm_12138's avatar jm_12138 提交者: GitHub

update faster_rcnn_resnet50_fpn_coco2017 (#1948)

* update faster_rcnn_resnet50_fpn_coco2017

* update unittest faster_rcnn_resnet50_fpn_coco2017

* update unittest

* update unittest

* update gpu config

* update

* add clean func

* update save inference model
Co-authored-by: Nchenjian <chenjian26@baidu.com>
上级 3fcdd778
......@@ -102,19 +102,13 @@
- ```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
def save_inference_model(dirname)
```
- 将模型保存到指定路径。
- **参数**
- dirname: 存在模型的目录名称; <br/>
- model\_filename: 模型文件名称,默认为\_\_model\_\_; <br/>
- params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);<br/>
- combined: 是否将参数保存到统一的一个文件中。
- dirname: 模型保存路径 <br/>
## 四、服务部署
......@@ -167,6 +161,11 @@
* 1.0.1
修复numpy数据读取问题
* 1.1.0
移除 fluid api
- ```shell
$ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1
$ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0
```
......@@ -101,19 +101,13 @@
- ```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
def save_inference_model(dirname)
```
- Save model to specific path
- **Parameters**
- dirname: output dir for saving model
- model\_filename: filename for saving model
- params\_filename: filename for saving parameters
- combined: whether save parameters into one file
- dirname: model save path
## IV.Server Deployment
......@@ -166,6 +160,11 @@
* 1.0.1
Fix the problem of reading numpy
* 1.1.0
Remove fluid api
- ```shell
$ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1
$ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0
```
class BBoxAssigner(object):
# __op__ = fluid.layers.generate_proposal_labels
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=.5,
bg_thresh_hi=.5,
bg_thresh_lo=0.,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=81,
shuffle_before_sample=True):
super(BBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.class_nums = class_nums
self.use_random = shuffle_before_sample
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import MSRA
class MultiClassNMS(object):
# __op__ = fluid.layers.multiclass_nms
def __init__(self,
score_threshold=.05,
nms_top_k=-1,
keep_top_k=100,
nms_threshold=.5,
normalized=False,
nms_eta=1.0,
background_label=0):
super(MultiClassNMS, self).__init__()
self.score_threshold = score_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.nms_threshold = nms_threshold
self.normalized = normalized
self.nms_eta = nms_eta
self.background_label = background_label
class SmoothL1Loss(object):
'''
Smooth L1 loss
Args:
sigma (float): hyper param in smooth l1 loss
'''
def __init__(self, sigma=1.0):
super(SmoothL1Loss, self).__init__()
self.sigma = sigma
def __call__(self, x, y, inside_weight=None, outside_weight=None):
return fluid.layers.smooth_l1(
x,
y,
inside_weight=inside_weight,
outside_weight=outside_weight,
sigma=self.sigma)
class BoxCoder(object):
def __init__(self,
prior_box_var=[0.1, 0.1, 0.2, 0.2],
code_type='decode_center_size',
box_normalized=False,
axis=1):
super(BoxCoder, self).__init__()
self.prior_box_var = prior_box_var
self.code_type = code_type
self.box_normalized = box_normalized
self.axis = axis
class TwoFCHead(object):
"""
RCNN head with two Fully Connected layers
Args:
mlp_dim (int): num of filters for the fc layers
"""
def __init__(self, mlp_dim=1024):
super(TwoFCHead, self).__init__()
self.mlp_dim = mlp_dim
def __call__(self, roi_feat):
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
fc6 = fluid.layers.fc(
input=roi_feat,
size=self.mlp_dim,
act='relu',
name='fc6',
param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name='fc6_b', learning_rate=2., regularizer=L2Decay(0.)))
head_feat = fluid.layers.fc(
input=fc6,
size=self.mlp_dim,
act='relu',
name='fc7',
param_attr=ParamAttr(name='fc7_w', initializer=Xavier()),
bias_attr=ParamAttr(
name='fc7_b', learning_rate=2., regularizer=L2Decay(0.)))
return head_feat
class BBoxHead(object):
"""
RCNN bbox head
Args:
head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead`
box_coder (object): `BoxCoder` instance
nms (object): `MultiClassNMS` instance
num_classes: number of output classes
"""
__inject__ = ['head', 'box_coder', 'nms', 'bbox_loss']
__shared__ = ['num_classes']
def __init__(self,
head,
box_coder=BoxCoder(),
nms=MultiClassNMS(),
bbox_loss=SmoothL1Loss(),
num_classes=81):
super(BBoxHead, self).__init__()
self.head = head
self.num_classes = num_classes
self.box_coder = box_coder
self.nms = nms
self.bbox_loss = bbox_loss
self.head_feat = None
def get_head_feat(self, input=None):
"""
Get the bbox head feature map.
"""
if input is not None:
feat = self.head(input)
if isinstance(feat, OrderedDict):
feat = list(feat.values())[0]
self.head_feat = feat
return self.head_feat
def _get_output(self, roi_feat):
"""
Get bbox head output.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
Returns:
cls_score(Variable): Output of rpn head with shape of
[N, num_anchors, H, W].
bbox_pred(Variable): Output of rpn head with shape of
[N, num_anchors * 4, H, W].
"""
head_feat = self.get_head_feat(roi_feat)
# when ResNetC5 output a single feature map
if not isinstance(self.head, TwoFCHead):
head_feat = fluid.layers.pool2d(
head_feat, pool_type='avg', global_pooling=True)
cls_score = fluid.layers.fc(
input=head_feat,
size=self.num_classes,
act=None,
name='cls_score',
param_attr=ParamAttr(
name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.)))
bbox_pred = fluid.layers.fc(
input=head_feat,
size=4 * self.num_classes,
act=None,
name='bbox_pred',
param_attr=ParamAttr(
name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)),
bias_attr=ParamAttr(
name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.)))
return cls_score, bbox_pred
def get_loss(self, roi_feat, labels_int32, bbox_targets,
bbox_inside_weights, bbox_outside_weights):
"""
Get bbox_head loss.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
labels_int32(Variable): Class label of a RoI with shape [P, 1].
P is the number of RoI.
bbox_targets(Variable): Box label of a RoI with shape
[P, 4 * class_nums].
bbox_inside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
bbox_outside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
Return:
Type: Dict
loss_cls(Variable): bbox_head loss.
loss_bbox(Variable): bbox_head loss.
"""
cls_score, bbox_pred = self._get_output(roi_feat)
labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64')
labels_int64.stop_gradient = True
loss_cls = fluid.layers.softmax_with_cross_entropy(
logits=cls_score, label=labels_int64, numeric_stable_mode=True)
loss_cls = fluid.layers.reduce_mean(loss_cls)
loss_bbox = self.bbox_loss(
x=bbox_pred,
y=bbox_targets,
inside_weight=bbox_inside_weights,
outside_weight=bbox_outside_weights)
loss_bbox = fluid.layers.reduce_mean(loss_bbox)
return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
def get_prediction(self,
roi_feat,
rois,
im_info,
im_shape,
return_box_score=False):
"""
Get prediction bounding box in test stage.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
rois (Variable): Output of generate_proposals in rpn head.
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists of im_height,
im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each
row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
N is the total number of prediction.
"""
cls_score, bbox_pred = self._get_output(roi_feat)
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
boxes = rois / im_scale
cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
# self.box_coder
decoded_box = fluid.layers.box_coder(
prior_box=boxes,
target_box=bbox_pred,
prior_box_var=self.box_coder.prior_box_var,
code_type=self.box_coder.code_type,
box_normalized=self.box_coder.box_normalized,
axis=self.box_coder.axis)
cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
if return_box_score:
return {'bbox': cliped_box, 'score': cls_prob}
# self.nms
pred_result = fluid.layers.multiclass_nms(
bboxes=cliped_box,
scores=cls_prob,
score_threshold=self.nms.score_threshold,
nms_top_k=self.nms.nms_top_k,
keep_top_k=self.nms.keep_top_k,
nms_threshold=self.nms.nms_threshold,
normalized=self.nms.normalized,
nms_eta=self.nms.nms_eta,
background_label=self.nms.background_label)
return pred_result
......@@ -4,12 +4,9 @@ from __future__ import print_function
from __future__ import division
import os
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from paddle import fluid
__all__ = ['test_reader']
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay
__all__ = ['ConvNorm', 'FPN']
def ConvNorm(input,
num_filters,
filter_size,
stride=1,
groups=1,
norm_decay=0.,
norm_type='affine_channel',
norm_groups=32,
dilation=1,
lr_scale=1,
freeze_norm=False,
act=None,
norm_name=None,
initializer=None,
name=None):
fan = num_filters
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=((filter_size - 1) // 2) * dilation,
dilation=dilation,
groups=groups,
act=None,
param_attr=ParamAttr(
name=name + "_weights",
initializer=initializer,
learning_rate=lr_scale),
bias_attr=False,
name=name + '.conv2d.output.1')
norm_lr = 0. if freeze_norm else 1.
pattr = ParamAttr(
name=norm_name + '_scale',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
battr = ParamAttr(
name=norm_name + '_offset',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
if norm_type in ['bn', 'sync_bn']:
global_stats = True if freeze_norm else False
out = fluid.layers.batch_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=norm_name + '_mean',
moving_variance_name=norm_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'gn':
out = fluid.layers.group_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
groups=norm_groups,
param_attr=pattr,
bias_attr=battr)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'affine_channel':
scale = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=pattr,
default_initializer=fluid.initializer.Constant(1.))
bias = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=battr,
default_initializer=fluid.initializer.Constant(0.))
out = fluid.layers.affine_channel(
x=conv, scale=scale, bias=bias, act=act)
if freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
class FPN(object):
"""
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
num_chan (int): number of feature channels
min_level (int): lowest level of the backbone feature map to use
max_level (int): highest level of the backbone feature map to use
spatial_scale (list): feature map scaling factor
has_extra_convs (bool): whether has extral convolutions in higher levels
norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel'
"""
__shared__ = ['norm_type', 'freeze_norm']
def __init__(self,
num_chan=256,
min_level=2,
max_level=6,
spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.],
has_extra_convs=False,
norm_type=None,
freeze_norm=False):
self.freeze_norm = freeze_norm
self.num_chan = num_chan
self.min_level = min_level
self.max_level = max_level
self.spatial_scale = spatial_scale
self.has_extra_convs = has_extra_convs
self.norm_type = norm_type
def _add_topdown_lateral(self, body_name, body_input, upper_output):
lateral_name = 'fpn_inner_' + body_name + '_lateral'
topdown_name = 'fpn_topdown_' + body_name
fan = body_input.shape[1]
if self.norm_type:
initializer = Xavier(fan_out=fan)
lateral = ConvNorm(
body_input,
self.num_chan,
1,
initializer=initializer,
norm_type=self.norm_type,
freeze_norm=self.freeze_norm,
name=lateral_name,
norm_name=lateral_name)
else:
lateral = fluid.layers.conv2d(
body_input,
self.num_chan,
1,
param_attr=ParamAttr(
name=lateral_name + "_w", initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name=lateral_name + "_b",
learning_rate=2.,
regularizer=L2Decay(0.)),
name=lateral_name)
topdown = fluid.layers.resize_nearest(
upper_output, scale=2., name=topdown_name)
return lateral + topdown
def get_output(self, body_dict):
"""
Add FPN onto backbone.
Args:
body_dict(OrderedDict): Dictionary of variables and each element is the
output of backbone.
Return:
fpn_dict(OrderedDict): A dictionary represents the output of FPN with
their name.
spatial_scale(list): A list of multiplicative spatial scale factor.
"""
spatial_scale = copy.deepcopy(self.spatial_scale)
body_name_list = list(body_dict.keys())[::-1]
num_backbone_stages = len(body_name_list)
self.fpn_inner_output = [[] for _ in range(num_backbone_stages)]
fpn_inner_name = 'fpn_inner_' + body_name_list[0]
body_input = body_dict[body_name_list[0]]
fan = body_input.shape[1]
if self.norm_type:
initializer = Xavier(fan_out=fan)
self.fpn_inner_output[0] = ConvNorm(
body_input,
self.num_chan,
1,
initializer=initializer,
norm_type=self.norm_type,
freeze_norm=self.freeze_norm,
name=fpn_inner_name,
norm_name=fpn_inner_name)
else:
self.fpn_inner_output[0] = fluid.layers.conv2d(
body_input,
self.num_chan,
1,
param_attr=ParamAttr(
name=fpn_inner_name + "_w",
initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name=fpn_inner_name + "_b",
learning_rate=2.,
regularizer=L2Decay(0.)),
name=fpn_inner_name)
for i in range(1, num_backbone_stages):
body_name = body_name_list[i]
body_input = body_dict[body_name]
top_output = self.fpn_inner_output[i - 1]
fpn_inner_single = self._add_topdown_lateral(
body_name, body_input, top_output)
self.fpn_inner_output[i] = fpn_inner_single
fpn_dict = {}
fpn_name_list = []
for i in range(num_backbone_stages):
fpn_name = 'fpn_' + body_name_list[i]
fan = self.fpn_inner_output[i].shape[1] * 3 * 3
if self.norm_type:
initializer = Xavier(fan_out=fan)
fpn_output = ConvNorm(
self.fpn_inner_output[i],
self.num_chan,
3,
initializer=initializer,
norm_type=self.norm_type,
freeze_norm=self.freeze_norm,
name=fpn_name,
norm_name=fpn_name)
else:
fpn_output = fluid.layers.conv2d(
self.fpn_inner_output[i],
self.num_chan,
filter_size=3,
padding=1,
param_attr=ParamAttr(
name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name=fpn_name + "_b",
learning_rate=2.,
regularizer=L2Decay(0.)),
name=fpn_name)
fpn_dict[fpn_name] = fpn_output
fpn_name_list.append(fpn_name)
if not self.has_extra_convs and self.max_level - self.min_level == len(
spatial_scale):
body_top_name = fpn_name_list[0]
body_top_extension = fluid.layers.pool2d(
fpn_dict[body_top_name],
1,
'max',
pool_stride=2,
name=body_top_name + '_subsampled_2x')
fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension
fpn_name_list.insert(0, body_top_name + '_subsampled_2x')
spatial_scale.insert(0, spatial_scale[0] * 0.5)
# Coarser FPN levels introduced for RetinaNet
highest_backbone_level = self.min_level + len(spatial_scale) - 1
if self.has_extra_convs and self.max_level > highest_backbone_level:
fpn_blob = body_dict[body_name_list[0]]
for i in range(highest_backbone_level + 1, self.max_level + 1):
fpn_blob_in = fpn_blob
fpn_name = 'fpn_' + str(i)
if i > highest_backbone_level + 1:
fpn_blob_in = fluid.layers.relu(fpn_blob)
fan = fpn_blob_in.shape[1] * 3 * 3
fpn_blob = fluid.layers.conv2d(
input=fpn_blob_in,
num_filters=self.num_chan,
filter_size=3,
stride=2,
padding=1,
param_attr=ParamAttr(
name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name=fpn_name + "_b",
learning_rate=2.,
regularizer=L2Decay(0.)),
name=fpn_name)
fpn_dict[fpn_name] = fpn_blob
fpn_name_list.insert(0, fpn_name)
spatial_scale.insert(0, spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, spatial_scale
......@@ -6,41 +6,32 @@ from __future__ import print_function
import os
import ast
import argparse
from collections import OrderedDict
from functools import partial
from math import ceil
import paddle
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
import paddle.jit
import paddle.static
from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN
from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet
from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead
from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead
from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign
from paddle.inference import Config, create_predictor
from paddlehub.utils.parser import txt_parser
from .processor import load_label_info, postprocess, base64_to_cv2
from .data_feed import test_reader, padding_minibatch
@moduleinfo(
name="faster_rcnn_resnet50_fpn_coco2017",
version="1.0.1",
version="1.1.0",
type="cv/object_detection",
summary=
"Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class FasterRCNNResNet50RPN(hub.Module):
def _initialize(self):
class FasterRCNNResNet50RPN:
def __init__(self):
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self.default_pretrained_model_path = os.path.join(
self.directory, "faster_rcnn_resnet50_fpn_model")
self.directory, "faster_rcnn_resnet50_fpn_model", "model")
self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt"))
self._set_config()
......@@ -49,10 +40,12 @@ class FasterRCNNResNet50RPN(hub.Module):
"""
predictor config setting
"""
cpu_config = AnalysisConfig(self.default_pretrained_model_path)
model = self.default_pretrained_model_path+'.pdmodel'
params = self.default_pretrained_model_path+'.pdiparams'
cpu_config = Config(model, params)
cpu_config.disable_glog_info()
cpu_config.disable_gpu()
self.cpu_predictor = create_paddle_predictor(cpu_config)
self.cpu_predictor = create_predictor(cpu_config)
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
......@@ -61,245 +54,14 @@ class FasterRCNNResNet50RPN(hub.Module):
except:
use_gpu = False
if use_gpu:
gpu_config = AnalysisConfig(self.default_pretrained_model_path)
gpu_config = Config(model, params)
gpu_config.disable_glog_info()
gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
self.gpu_predictor = create_paddle_predictor(gpu_config)
def context(self,
num_classes=81,
trainable=True,
pretrained=True,
phase='train'):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
num_classes (int): number of categories
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=[-1, 3, -1, -1], dtype='float32')
# backbone
backbone = ResNet(
norm_type='affine_channel',
depth=50,
feature_maps=[2, 3, 4, 5],
freeze_at=2)
body_feats = backbone(image)
# fpn
fpn = FPN(
max_level=6,
min_level=2,
num_chan=256,
spatial_scale=[0.03125, 0.0625, 0.125, 0.25])
var_prefix = '@HUB_{}@'.format(self.name)
im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32', lod_level=0)
im_shape = fluid.layers.data(
name='im_shape', shape=[3], dtype='float32', lod_level=0)
body_feat_names = list(body_feats.keys())
body_feats, spatial_scale = fpn.get_output(body_feats)
# rpn_head: RPNHead
rpn_head = self.rpn_head()
rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
# train
if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
bbox_assigner = self.bbox_assigner(num_classes)
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
roi_extractor = self.roi_extractor()
roi_feat = roi_extractor(
head_inputs=body_feats,
rois=rois,
spatial_scale=spatial_scale)
# head_feat
bbox_head = self.bbox_head(num_classes)
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_features':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_features': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(startup_program, var_prefix)
global_vars = context_prog.global_block().vars
inputs = {
key: global_vars[value]
for key, value in inputs.items()
}
outputs = {
key: global_vars[value] if not isinstance(value, list) else
[global_vars[var] for var in value]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
if pretrained:
def _if_exist(var):
if num_classes != 81:
if 'bbox_pred' in var.name or 'cls_score' in var.name:
return False
return os.path.exists(
os.path.join(self.default_pretrained_model_path,
var.name))
fluid.io.load_vars(
exe,
self.default_pretrained_model_path,
predicate=_if_exist)
return inputs, outputs, context_prog
def rpn_head(self):
return FPNRPNHead(
anchor_generator=AnchorGenerator(
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]),
rpn_target_assign=RPNTargetAssign(
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
rpn_negative_overlap=0.3,
rpn_positive_overlap=0.7,
rpn_straddle_thresh=0.0),
train_proposal=GenerateProposals(
min_size=0.0,
nms_thresh=0.7,
post_nms_top_n=2000,
pre_nms_top_n=2000),
test_proposal=GenerateProposals(
min_size=0.0,
nms_thresh=0.7,
post_nms_top_n=1000,
pre_nms_top_n=1000),
anchor_start_size=32,
num_chan=256,
min_level=2,
max_level=6)
def roi_extractor(self):
return FPNRoIAlign(
canconical_level=4,
canonical_size=224,
max_level=5,
min_level=2,
box_resolution=7,
sampling_ratio=2)
def bbox_head(self, num_classes):
return BBoxHead(
head=TwoFCHead(mlp_dim=1024),
nms=MultiClassNMS(
keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
num_classes=num_classes)
def bbox_assigner(self, num_classes):
return BBoxAssigner(
batch_size_per_im=512,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
fg_fraction=0.25,
fg_thresh=0.5,
class_nums=num_classes)
def save_inference_model(self,
dirname,
model_filename=None,
params_filename=None,
combined=True):
if combined:
model_filename = "__model__" if not model_filename else model_filename
params_filename = "__params__" if not params_filename else params_filename
place = fluid.CPUPlace()
exe = fluid.Executor(place)
program, feeded_var_names, target_vars = fluid.io.load_inference_model(
dirname=self.default_pretrained_model_path, executor=exe)
fluid.io.save_inference_model(
dirname=dirname,
main_program=program,
executor=exe,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
model_filename=model_filename,
params_filename=params_filename)
self.gpu_predictor = create_predictor(gpu_config)
def object_detection(self,
paths=None,
images=None,
data=None,
use_gpu=False,
batch_size=1,
output_dir='detection_result',
......@@ -337,8 +99,6 @@ class FasterRCNNResNet50RPN(hub.Module):
)
paths = paths if paths else list()
if data and 'image' in data:
paths += data['image']
all_images = list()
for yield_data in test_reader(paths, images):
......@@ -360,29 +120,37 @@ class FasterRCNNResNet50RPN(hub.Module):
padding_image, padding_info, padding_shape = padding_minibatch(
batch_data, coarsest_stride=32, use_padded_im_info=True)
padding_image_tensor = PaddleTensor(padding_image.copy())
padding_info_tensor = PaddleTensor(padding_info.copy())
padding_shape_tensor = PaddleTensor(padding_shape.copy())
feed_list = [
padding_image_tensor, padding_info_tensor, padding_shape_tensor
padding_image, padding_info, padding_shape
]
if use_gpu:
data_out = self.gpu_predictor.run(feed_list)
else:
data_out = self.cpu_predictor.run(feed_list)
predictor = self.gpu_predictor if use_gpu else self.cpu_predictor
feed_list = [
padding_image, padding_info, padding_shape
]
input_names = predictor.get_input_names()
for i, input_name in enumerate(input_names):
data = np.asarray(feed_list[i], dtype=np.float32)
handle = predictor.get_input_handle(input_name)
handle.copy_from_cpu(data)
predictor.run()
output_names = predictor.get_output_names()
output_handle = predictor.get_output_handle(output_names[0])
output = postprocess(
paths=paths,
images=images,
data_out=data_out,
data_out=output_handle,
score_thresh=score_thresh,
label_names=self.label_names,
output_dir=output_dir,
handle_id=handle_id,
visualization=visualization)
res += output
return res
def add_module_config_arg(self):
......
# coding=utf-8
class NameAdapter(object):
"""Fix the backbones variable names for pretrained weight"""
def __init__(self, model):
super(NameAdapter, self).__init__()
self.model = model
@property
def model_type(self):
return getattr(self.model, '_model_type', '')
@property
def variant(self):
return getattr(self.model, 'variant', '')
def fix_conv_norm_name(self, name):
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
# the naming rule is same as pretrained weight
if self.model_type == 'SEResNeXt':
bn_name = name + "_bn"
return bn_name
def fix_shortcut_name(self, name):
if self.model_type == 'SEResNeXt':
name = 'conv' + name + '_prj'
return name
def fix_bottleneck_name(self, name):
if self.model_type == 'SEResNeXt':
conv_name1 = 'conv' + name + '_x1'
conv_name2 = 'conv' + name + '_x2'
conv_name3 = 'conv' + name + '_x3'
shortcut_name = name
else:
conv_name1 = name + "_branch2a"
conv_name2 = name + "_branch2b"
conv_name3 = name + "_branch2c"
shortcut_name = name + "_branch1"
return conv_name1, conv_name2, conv_name3, shortcut_name
def fix_layer_warp_name(self, stage_num, count, i):
name = 'res' + str(stage_num)
if count > 10 and stage_num == 4:
if i == 0:
conv_name = name + "a"
else:
conv_name = name + "b" + str(i)
else:
conv_name = name + chr(ord("a") + i)
if self.model_type == 'SEResNeXt':
conv_name = str(stage_num + 2) + '_' + str(i + 1)
return conv_name
def fix_c1_stage_name(self):
return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
nonlocal_params = {
"use_zero_init_conv": False,
"conv_init_std": 0.01,
"no_bias": True,
"use_maxpool": False,
"use_softmax": True,
"use_bn": False,
"use_scale": True, # vital for the model prformance!!!
"use_affine": False,
"bn_momentum": 0.9,
"bn_epsilon": 1.0000001e-5,
"bn_init_gamma": 0.9,
"weight_decay_bn": 1.e-4,
}
def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner,
max_pool_stride=2):
cur = input
theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \
filter_size = [1, 1], stride = [1, 1], \
padding = [0, 0], \
param_attr=ParamAttr(name = prefix + '_theta' + "_w", \
initializer = fluid.initializer.Normal(loc = 0.0,
scale = nonlocal_params["conv_init_std"])), \
bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \
initializer = fluid.initializer.Constant(value = 0.)) \
if not nonlocal_params["no_bias"] else False, \
name = prefix + '_theta')
theta_shape = theta.shape
theta_shape_op = fluid.layers.shape(theta)
theta_shape_op.stop_gradient = True
if nonlocal_params["use_maxpool"]:
max_pool = fluid.layers.pool2d(input = cur, \
pool_size = [max_pool_stride, max_pool_stride], \
pool_type = 'max', \
pool_stride = [max_pool_stride, max_pool_stride], \
pool_padding = [0, 0], \
name = prefix + '_pool')
else:
max_pool = cur
phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \
filter_size = [1, 1], stride = [1, 1], \
padding = [0, 0], \
param_attr = ParamAttr(name = prefix + '_phi' + "_w", \
initializer = fluid.initializer.Normal(loc = 0.0,
scale = nonlocal_params["conv_init_std"])), \
bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \
initializer = fluid.initializer.Constant(value = 0.)) \
if (nonlocal_params["no_bias"] == 0) else False, \
name = prefix + '_phi')
phi_shape = phi.shape
g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \
filter_size = [1, 1], stride = [1, 1], \
padding = [0, 0], \
param_attr = ParamAttr(name = prefix + '_g' + "_w", \
initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \
bias_attr = ParamAttr(name = prefix + '_g' + "_b", \
initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \
name = prefix + '_g')
g_shape = g.shape
# we have to use explicit batch size (to support arbitrary spacetime size)
# e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784)
theta = fluid.layers.reshape(theta, shape=(0, 0, -1))
theta = fluid.layers.transpose(theta, [0, 2, 1])
phi = fluid.layers.reshape(phi, [0, 0, -1])
theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity')
g = fluid.layers.reshape(g, [0, 0, -1])
if nonlocal_params["use_softmax"]:
if nonlocal_params["use_scale"]:
theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5)
else:
theta_phi_sc = theta_phi
p = fluid.layers.softmax(
theta_phi_sc, name=prefix + '_affinity' + '_prob')
else:
# not clear about what is doing in xlw's code
p = None # not implemented
raise "Not implemented when not use softmax"
# note g's axis[2] corresponds to p's axis[2]
# e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1)
p = fluid.layers.transpose(p, [0, 2, 1])
t = fluid.layers.matmul(g, p, name=prefix + '_y')
# reshape back
# e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14)
t_shape = t.shape
t_re = fluid.layers.reshape(
t, shape=list(theta_shape), actual_shape=theta_shape_op)
blob_out = t_re
blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \
filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \
param_attr = ParamAttr(name = prefix + '_out' + "_w", \
initializer = fluid.initializer.Constant(value = 0.) \
if nonlocal_params["use_zero_init_conv"] \
else fluid.initializer.Normal(loc = 0.0,
scale = nonlocal_params["conv_init_std"])), \
bias_attr = ParamAttr(name = prefix + '_out' + "_b", \
initializer = fluid.initializer.Constant(value = 0.)) \
if (nonlocal_params["no_bias"] == 0) else False, \
name = prefix + '_out')
blob_out_shape = blob_out.shape
if nonlocal_params["use_bn"]:
bn_name = prefix + "_bn"
blob_out = fluid.layers.batch_norm(blob_out, \
# is_test = test_mode, \
momentum = nonlocal_params["bn_momentum"], \
epsilon = nonlocal_params["bn_epsilon"], \
name = bn_name, \
param_attr = ParamAttr(name = bn_name + "_s", \
initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \
regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \
bias_attr = ParamAttr(name = bn_name + "_b", \
regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \
moving_mean_name = bn_name + "_rm", \
moving_variance_name = bn_name + "_riv") # add bn
if nonlocal_params["use_affine"]:
affine_scale = fluid.layers.create_parameter(\
shape=[blob_out_shape[1]], dtype = blob_out.dtype, \
attr=ParamAttr(name=prefix + '_affine' + '_s'), \
default_initializer = fluid.initializer.Constant(value = 1.))
affine_bias = fluid.layers.create_parameter(\
shape=[blob_out_shape[1]], dtype = blob_out.dtype, \
attr=ParamAttr(name=prefix + '_affine' + '_b'), \
default_initializer = fluid.initializer.Constant(value = 0.))
blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \
bias = affine_bias, name = prefix + '_affine') # add affine
return blob_out
def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner):
'''
add_space_nonlocal:
Non-local Neural Networks: see https://arxiv.org/abs/1711.07971
'''
conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner)
output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum')
return output
......@@ -12,7 +12,6 @@ __all__ = [
'postprocess',
]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
......@@ -107,7 +106,7 @@ def postprocess(paths,
handle_id,
visualization=True):
"""
postprocess the lod_tensor produced by fluid.Executor.run
postprocess the lod_tensor produced by Executor.run
Args:
paths (list[str]): the path of images.
......@@ -130,9 +129,8 @@ def postprocess(paths,
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray()
lod = data_out.lod()[0]
results = data_out.copy_to_cpu()
check_dir(output_dir)
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from collections import OrderedDict
from numbers import Integral
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.framework import Variable
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import Constant
from .nonlocal_helper import add_space_nonlocal
from .name_adapter import NameAdapter
__all__ = ['ResNet', 'ResNetC5']
class ResNet(object):
"""
Residual Network, see https://arxiv.org/abs/1512.03385
Args:
depth (int): ResNet depth, should be 34, 50.
freeze_at (int): freeze the backbone at which stage
norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel'
freeze_norm (bool): freeze normalization layers
norm_decay (float): weight decay for normalization layer weights
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
feature_maps (list): index of stages whose feature maps are returned
dcn_v2_stages (list): index of stages who select deformable conv v2
nonlocal_stages (list): index of stages who select nonlocal networks
"""
__shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name']
def __init__(self,
depth=50,
freeze_at=0,
norm_type='sync_bn',
freeze_norm=False,
norm_decay=0.,
variant='b',
feature_maps=[3, 4, 5],
dcn_v2_stages=[],
weight_prefix_name='',
nonlocal_stages=[],
get_prediction=False,
class_dim=1000):
super(ResNet, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
assert depth in [34, 50], \
"depth {} not in [34, 50]"
assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant"
assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4"
assert len(feature_maps) > 0, "need one or more feature maps"
assert norm_type in ['bn', 'sync_bn', 'affine_channel']
assert not (len(nonlocal_stages)>0 and depth<50), \
"non-local is not supported for resnet18 or resnet34"
self.depth = depth
self.freeze_at = freeze_at
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.variant = variant
self._model_type = 'ResNet'
self.feature_maps = feature_maps
self.dcn_v2_stages = dcn_v2_stages
self.depth_cfg = {
34: ([3, 4, 6, 3], self.basicblock),
50: ([3, 4, 6, 3], self.bottleneck),
}
self.stage_filters = [64, 128, 256, 512]
self._c1_out_chan_num = 64
self.na = NameAdapter(self)
self.prefix_name = weight_prefix_name
self.nonlocal_stages = nonlocal_stages
self.nonlocal_mod_cfg = {
50: 2,
101: 5,
152: 8,
200: 12,
}
self.get_prediction = get_prediction
self.class_dim = class_dim
def _conv_offset(self,
input,
filter_size,
stride,
padding,
act=None,
name=None):
out_channel = filter_size * filter_size * 3
out = fluid.layers.conv2d(
input,
num_filters=out_channel,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"),
bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"),
act=act,
name=name)
return out
def _conv_norm(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None,
dcn_v2=False):
_name = self.prefix_name + name if self.prefix_name != '' else name
if not dcn_v2:
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=_name + "_weights"),
bias_attr=False,
name=_name + '.conv2d.output.1')
else:
# select deformable conv"
offset_mask = self._conv_offset(
input=input,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
act=None,
name=_name + "_conv_offset")
offset_channel = filter_size**2 * 2
mask_channel = filter_size**2
offset, mask = fluid.layers.split(
input=offset_mask,
num_or_sections=[offset_channel, mask_channel],
dim=1)
mask = fluid.layers.sigmoid(mask)
conv = fluid.layers.deformable_conv(
input=input,
offset=offset,
mask=mask,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
deformable_groups=1,
im2col_step=1,
param_attr=ParamAttr(name=_name + "_weights"),
bias_attr=False,
name=_name + ".conv2d.output.1")
bn_name = self.na.fix_conv_norm_name(name)
bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name
norm_lr = 0. if self.freeze_norm else 1.
norm_decay = self.norm_decay
pattr = ParamAttr(
name=bn_name + '_scale',
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay))
battr = ParamAttr(
name=bn_name + '_offset',
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay))
if self.norm_type in ['bn', 'sync_bn']:
global_stats = True if self.freeze_norm else False
out = fluid.layers.batch_norm(
input=conv,
act=act,
name=bn_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif self.norm_type == 'affine_channel':
scale = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=pattr,
default_initializer=fluid.initializer.Constant(1.))
bias = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=battr,
default_initializer=fluid.initializer.Constant(0.))
out = fluid.layers.affine_channel(
x=conv, scale=scale, bias=bias, act=act)
if self.freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
def _shortcut(self, input, ch_out, stride, is_first, name):
max_pooling_in_short_cut = self.variant == 'd'
ch_in = input.shape[1]
# the naming rule is same as pretrained weight
name = self.na.fix_shortcut_name(name)
std_senet = getattr(self, 'std_senet', False)
if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first):
if std_senet:
if is_first:
return self._conv_norm(input, ch_out, 1, stride, name=name)
else:
return self._conv_norm(input, ch_out, 3, stride, name=name)
if max_pooling_in_short_cut and not is_first:
input = fluid.layers.pool2d(
input=input,
pool_size=2,
pool_stride=2,
pool_padding=0,
ceil_mode=True,
pool_type='avg')
return self._conv_norm(input, ch_out, 1, 1, name=name)
return self._conv_norm(input, ch_out, 1, stride, name=name)
else:
return input
def bottleneck(self,
input,
num_filters,
stride,
is_first,
name,
dcn_v2=False):
if self.variant == 'a':
stride1, stride2 = stride, 1
else:
stride1, stride2 = 1, stride
# ResNeXt
groups = getattr(self, 'groups', 1)
group_width = getattr(self, 'group_width', -1)
if groups == 1:
expand = 4
elif (groups * group_width) == 256:
expand = 1
else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d
num_filters = num_filters // 2
expand = 2
conv_name1, conv_name2, conv_name3, \
shortcut_name = self.na.fix_bottleneck_name(name)
std_senet = getattr(self, 'std_senet', False)
if std_senet:
conv_def = [[
int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1
], [num_filters, 3, stride2, 'relu', groups, conv_name2],
[num_filters * expand, 1, 1, None, 1, conv_name3]]
else:
conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1],
[num_filters, 3, stride2, 'relu', groups, conv_name2],
[num_filters * expand, 1, 1, None, 1, conv_name3]]
residual = input
for i, (c, k, s, act, g, _name) in enumerate(conv_def):
residual = self._conv_norm(
input=residual,
num_filters=c,
filter_size=k,
stride=s,
act=act,
groups=g,
name=_name,
dcn_v2=(i == 1 and dcn_v2))
short = self._shortcut(
input,
num_filters * expand,
stride,
is_first=is_first,
name=shortcut_name)
# Squeeze-and-Excitation
if callable(getattr(self, '_squeeze_excitation', None)):
residual = self._squeeze_excitation(
input=residual, num_channels=num_filters, name='fc' + name)
return fluid.layers.elementwise_add(
x=short, y=residual, act='relu', name=name + ".add.output.5")
def basicblock(self,
input,
num_filters,
stride,
is_first,
name,
dcn_v2=False):
assert dcn_v2 is False, "Not implemented yet."
conv0 = self._conv_norm(
input=input,
num_filters=num_filters,
filter_size=3,
act='relu',
stride=stride,
name=name + "_branch2a")
conv1 = self._conv_norm(
input=conv0,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b")
short = self._shortcut(
input, num_filters, stride, is_first, name=name + "_branch1")
return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
def layer_warp(self, input, stage_num):
"""
Args:
input (Variable): input variable.
stage_num (int): the stage number, should be 2, 3, 4, 5
Returns:
The last variable in endpoint-th stage.
"""
assert stage_num in [2, 3, 4, 5]
stages, block_func = self.depth_cfg[self.depth]
count = stages[stage_num - 2]
ch_out = self.stage_filters[stage_num - 2]
is_first = False if stage_num != 2 else True
dcn_v2 = True if stage_num in self.dcn_v2_stages else False
nonlocal_mod = 1000
if stage_num in self.nonlocal_stages:
nonlocal_mod = self.nonlocal_mod_cfg[
self.depth] if stage_num == 4 else 2
# Make the layer name and parameter name consistent
# with ImageNet pre-trained model
conv = input
for i in range(count):
conv_name = self.na.fix_layer_warp_name(stage_num, count, i)
if self.depth < 50:
is_first = True if i == 0 and stage_num == 2 else False
conv = block_func(
input=conv,
num_filters=ch_out,
stride=2 if i == 0 and stage_num != 2 else 1,
is_first=is_first,
name=conv_name,
dcn_v2=dcn_v2)
# add non local model
dim_in = conv.shape[1]
nonlocal_name = "nonlocal_conv{}".format(stage_num)
if i % nonlocal_mod == nonlocal_mod - 1:
conv = add_space_nonlocal(conv, dim_in, dim_in,
nonlocal_name + '_{}'.format(i),
int(dim_in / 2))
return conv
def c1_stage(self, input):
out_chan = self._c1_out_chan_num
conv1_name = self.na.fix_c1_stage_name()
if self.variant in ['c', 'd']:
conv_def = [
[out_chan // 2, 3, 2, "conv1_1"],
[out_chan // 2, 3, 1, "conv1_2"],
[out_chan, 3, 1, "conv1_3"],
]
else:
conv_def = [[out_chan, 7, 2, conv1_name]]
for (c, k, s, _name) in conv_def:
input = self._conv_norm(
input=input,
num_filters=c,
filter_size=k,
stride=s,
act='relu',
name=_name)
output = fluid.layers.pool2d(
input=input,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
return output
def __call__(self, input):
assert isinstance(input, Variable)
assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \
"feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps)
res_endpoints = []
res = input
feature_maps = self.feature_maps
severed_head = getattr(self, 'severed_head', False)
if not severed_head:
res = self.c1_stage(res)
feature_maps = range(2, max(self.feature_maps) + 1)
for i in feature_maps:
res = self.layer_warp(res, i)
if i in self.feature_maps:
res_endpoints.append(res)
if self.freeze_at >= i:
res.stop_gradient = True
if self.get_prediction:
pool = fluid.layers.pool2d(
input=res, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
size=self.class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
out = fluid.layers.softmax(out)
return out
return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat)
for idx, feat in enumerate(res_endpoints)])
class ResNetC5(ResNet):
def __init__(self,
depth=50,
freeze_at=2,
norm_type='affine_channel',
freeze_norm=True,
norm_decay=0.,
variant='b',
feature_maps=[5],
weight_prefix_name=''):
super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm,
norm_decay, variant, feature_maps)
self.severed_head = True
# coding=utf-8
import paddle.fluid as fluid
__all__ = ['FPNRoIAlign']
class FPNRoIAlign(object):
"""
RoI align pooling for FPN feature maps
Args:
sampling_ratio (int): number of sampling points
min_level (int): lowest level of FPN layer
max_level (int): highest level of FPN layer
canconical_level (int): the canconical FPN feature map level
canonical_size (int): the canconical FPN feature map size
box_resolution (int): box resolution
mask_resolution (int): mask roi resolution
"""
def __init__(self,
sampling_ratio=0,
min_level=2,
max_level=5,
canconical_level=4,
canonical_size=224,
box_resolution=7,
mask_resolution=14):
super(FPNRoIAlign, self).__init__()
self.sampling_ratio = sampling_ratio
self.min_level = min_level
self.max_level = max_level
self.canconical_level = canconical_level
self.canonical_size = canonical_size
self.box_resolution = box_resolution
self.mask_resolution = mask_resolution
def __call__(self, head_inputs, rois, spatial_scale, is_mask=False):
"""
Adopt RoI align onto several level of feature maps to get RoI features.
Distribute RoIs to different levels by area and get a list of RoI
features by distributed RoIs and their corresponding feature maps.
Returns:
roi_feat(Variable): RoI features with shape of [M, C, R, R],
where M is the number of RoIs and R is RoI resolution
"""
k_min = self.min_level
k_max = self.max_level
num_roi_lvls = k_max - k_min + 1
name_list = list(head_inputs.keys())
input_name_list = name_list[-num_roi_lvls:]
spatial_scale = spatial_scale[-num_roi_lvls:]
rois_dist, restore_index = fluid.layers.distribute_fpn_proposals(
rois, k_min, k_max, self.canconical_level, self.canonical_size)
# rois_dist is in ascend order
roi_out_list = []
resolution = is_mask and self.mask_resolution or self.box_resolution
for lvl in range(num_roi_lvls):
name_index = num_roi_lvls - lvl - 1
rois_input = rois_dist[lvl]
head_input = head_inputs[input_name_list[name_index]]
sc = spatial_scale[name_index]
roi_out = fluid.layers.roi_align(
input=head_input,
rois=rois_input,
pooled_height=resolution,
pooled_width=resolution,
spatial_scale=sc,
sampling_ratio=self.sampling_ratio)
roi_out_list.append(roi_out)
roi_feat_shuffle = fluid.layers.concat(roi_out_list)
roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index)
roi_feat = fluid.layers.lod_reset(roi_feat_, rois)
return roi_feat
import os
import shutil
import unittest
import cv2
import requests
import paddlehub as hub
class TestHubModule(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a'
if not os.path.exists('tests'):
os.makedirs('tests')
response = requests.get(img_url)
assert response.status_code == 200, 'Network Error.'
with open('tests/test.jpg', 'wb') as f:
f.write(response.content)
cls.module = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017")
@classmethod
def tearDownClass(cls) -> None:
shutil.rmtree('tests')
shutil.rmtree('inference')
shutil.rmtree('detection_result')
def test_object_detection1(self):
results = self.module.object_detection(
paths=['tests/test.jpg']
)
bbox = results[0]['data'][0]
label = bbox['label']
confidence = bbox['confidence']
left = bbox['left']
right = bbox['right']
top = bbox['top']
bottom = bbox['bottom']
self.assertEqual(label, 'cat')
self.assertTrue(confidence > 0.5)
self.assertTrue(200 < left < 800)
self.assertTrue(2500 < right < 3500)
self.assertTrue(500 < top < 1500)
self.assertTrue(3500 < bottom < 4500)
def test_object_detection2(self):
results = self.module.object_detection(
images=[cv2.imread('tests/test.jpg')]
)
bbox = results[0]['data'][0]
label = bbox['label']
confidence = bbox['confidence']
left = bbox['left']
right = bbox['right']
top = bbox['top']
bottom = bbox['bottom']
self.assertEqual(label, 'cat')
self.assertTrue(confidence > 0.5)
self.assertTrue(200 < left < 800)
self.assertTrue(2500 < right < 3500)
self.assertTrue(500 < top < 1500)
self.assertTrue(3500 < bottom < 4500)
def test_object_detection3(self):
results = self.module.object_detection(
images=[cv2.imread('tests/test.jpg')],
visualization=False
)
bbox = results[0]['data'][0]
label = bbox['label']
confidence = bbox['confidence']
left = bbox['left']
right = bbox['right']
top = bbox['top']
bottom = bbox['bottom']
self.assertEqual(label, 'cat')
self.assertTrue(confidence > 0.5)
self.assertTrue(200 < left < 800)
self.assertTrue(2500 < right < 3500)
self.assertTrue(500 < top < 1500)
self.assertTrue(3500 < bottom < 4500)
def test_object_detection4(self):
self.assertRaises(
AssertionError,
self.module.object_detection,
paths=['no.jpg']
)
def test_object_detection5(self):
self.assertRaises(
cv2.error,
self.module.object_detection,
images=['test.jpg']
)
def test_save_inference_model(self):
self.module.save_inference_model('./inference/model')
self.assertTrue(os.path.exists('./inference/model.pdmodel'))
self.assertTrue(os.path.exists('./inference/model.pdiparams'))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册