未验证 提交 646996f4 编写于 作者: W wangguanzhong 提交者: GitHub

refactor dygraph & add mask rcnn fpn (#1171)

* refactor dygraph & add mask rcnn fpn

* fix initialization & minor update

* refine architecture of mask_rcnn
上级 d8704f28
architecture: MaskRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_fpn_1x/model_final
num_classes: 81
load_static_weights: True
# Model Achitecture
MaskRCNN:
# model anchor info flow
anchor: AnchorRPN
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor:
name: RoIExtractor
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 4
feat_in: 256
feat_out: 256
mask_roi_extractor:
name: RoIExtractor
resolution: 14
sampling_ratio: 2
share_bbox_feat: False
feat_in: 256
AnchorRPN:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
bbox_post_process: # used in infer
name: BBoxPostProcess
# decode -> clip -> nms
decode_clip_nms:
name: DecodeClipNms
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 28
mask_post_process:
name: MaskPostProcess
mask_resolution: 28
# Train
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
_READER_: 'mask_reader.yml'
......@@ -18,8 +18,8 @@ TrainReader:
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
target_size: 512
max_size: 512
target_size: 800
max_size: 1333
interp: 1
use_cv2: true
- !Permute
......@@ -39,8 +39,6 @@ TrainReader:
EvalReader:
inputs_def:
fields: ['image', 'im_info', 'im_id', 'im_shape']
# for voc
#fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
dataset:
!COCODataSet
image_dir: val2017
......
......@@ -127,7 +127,6 @@ class COCODataSet(DataSet):
if not self.load_image_only:
ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
instances = coco.loadAnns(ins_anno_ids)
bboxes = []
for inst in instances:
x, y, box_w, box_h = inst['bbox']
......@@ -135,6 +134,7 @@ class COCODataSet(DataSet):
y1 = max(0, y)
x2 = min(im_w - 1, x1 + max(0, box_w - 1))
y2 = min(im_h - 1, y1 + max(0, box_h - 1))
if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
inst['clean_bbox'] = [x1, y1, x2, y2]
bboxes.append(inst)
......@@ -143,6 +143,7 @@ class COCODataSet(DataSet):
'Found an invalid bbox in annotations: im_id: {}, '
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
img_id, float(inst['area']), x1, y1, x2, y2))
num_bbox = len(bboxes)
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -24,7 +24,6 @@ except Exception:
import logging
import cv2
import numpy as np
from .operators import register_op, BaseOperator
from .op_helper import jaccard_overlap, gaussian2D
......@@ -50,10 +49,11 @@ class PadBatch(BaseOperator):
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, use_padded_im_info=True):
def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False):
super(PadBatch, self).__init__()
self.pad_to_stride = pad_to_stride
self.use_padded_im_info = use_padded_im_info
self.pad_gt = pad_gt
def __call__(self, samples, context=None):
"""
......@@ -61,11 +61,11 @@ class PadBatch(BaseOperator):
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
if coarsest_stride == 0:
return samples
#if coarsest_stride == 0:
# return samples
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
......@@ -82,6 +82,52 @@ class PadBatch(BaseOperator):
data['image'] = padding_im
if self.use_padded_im_info:
data['im_info'][:2] = max_shape[1:3]
if self.pad_gt:
gt_num = []
if data['gt_poly'] is not None and len(data['gt_poly']) > 0:
pad_mask = True
else:
pad_mask = False
if pad_mask:
poly_num = []
poly_part_num = []
point_num = []
for data in samples:
gt_num.append(data['gt_bbox'].shape[0])
if pad_mask:
poly_num.append(len(data['gt_poly']))
for poly in data['gt_poly']:
poly_part_num.append(int(len(poly)))
for p_p in poly:
point_num.append(int(len(p_p) / 2))
gt_num_max = max(gt_num)
gt_box_data = np.zeros([gt_num_max, 4])
gt_class_data = np.zeros([gt_num_max])
is_crowd_data = np.ones([gt_num_max])
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
point_num_max = max(point_num)
gt_masks_data = -np.ones(
[poly_num_max, poly_part_num_max, point_num_max, 2])
for i, data in enumerate(samples):
gt_num = data['gt_bbox'].shape[0]
gt_box_data[0:gt_num, :] = data['gt_bbox']
gt_class_data[0:gt_num] = np.squeeze(data['gt_class'])
is_crowd_data[0:gt_num] = np.squeeze(data['is_crowd'])
if pad_mask:
for j, poly in enumerate(data['gt_poly']):
for k, p_p in enumerate(poly):
pp_np = np.array(p_p).reshape(-1, 2)
gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
data['gt_poly'] = gt_masks_data
data['gt_bbox'] = gt_box_data
data['gt_class'] = gt_class_data
data['is_crowd'] = is_crowd_data
return samples
......
......@@ -122,7 +122,6 @@ class DecodeImage(BaseOperator):
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
sample['image'] = im
if 'h' not in sample:
sample['h'] = im.shape[0]
elif sample['h'] != im.shape[0]:
......@@ -333,7 +332,6 @@ class ResizeImage(BaseOperator):
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
......
......@@ -2,6 +2,7 @@ from . import ops
from . import bbox
from . import mask
from . import backbone
from . import neck
from . import head
from . import architecture
......@@ -9,5 +10,6 @@ from .ops import *
from .bbox import *
from .mask import *
from .backbone import *
from .neck import *
from .head import *
from .architecture import *
......@@ -4,7 +4,6 @@ from __future__ import print_function
from paddle import fluid
from ppdet.core.workspace import register
from ppdet.utils.data_structure import BufferDict
from .meta_arch import BaseArch
__all__ = ['MaskRCNN']
......@@ -18,84 +17,107 @@ class MaskRCNN(BaseArch):
'proposal',
'mask',
'backbone',
'neck',
'rpn_head',
'bbox_head',
'mask_head',
]
def __init__(self, anchor, proposal, mask, backbone, rpn_head, bbox_head,
mask_head, *args, **kwargs):
super(MaskRCNN, self).__init__(*args, **kwargs)
def __init__(self,
anchor,
proposal,
mask,
backbone,
rpn_head,
bbox_head,
mask_head,
neck=None):
super(MaskRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.mask = mask
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.mask_head = mask_head
def model_arch(self, ):
def model_arch(self):
# Backbone
bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out)
body_feats = self.backbone(self.inputs)
spatial_scale = None
# Neck
if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats)
# RPN
rpn_head_out = self.rpn_head(self.gbd)
self.gbd.update(rpn_head_out)
# rpn_head returns two list: rpn_feat, rpn_head_out
# each element in rpn_feats contains rpn feature on each level,
# and the length is 1 when the neck is not applied.
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta)
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats)
# Anchor
anchor_out = self.anchor(self.gbd)
self.gbd.update(anchor_out)
# Proposal BBox
self.gbd['stage'] = 0
proposal_out = self.proposal(self.gbd)
self.gbd.update({'proposal_0': proposal_out})
# anchor_out returns a list,
# each element contains (anchor, anchor_var)
self.anchor_out = self.anchor(rpn_feat)
# Proposal RoI
# compute targets here when training
rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out)
# BBox Head
bboxhead_out = self.bbox_head(self.gbd)
self.gbd.update({'bbox_head_0': bboxhead_out})
bbox_feat, self.bbox_head_out = self.bbox_head(body_feats, rois,
spatial_scale)
rois_has_mask_int32 = None
if self.inputs['mode'] == 'infer':
# Refine bbox by the output from bbox_head at test stage
self.bboxes = self.proposal.post_process(self.inputs,
self.bbox_head_out, rois)
else:
# Proposal RoI for Mask branch
# bboxes update at training stage only
bbox_targets = self.proposal.get_targets()[0]
self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois,
bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(self.inputs, body_feats,
self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale)
if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd)
self.gbd.update(bbox_out)
def loss(self, ):
loss = {}
# Mask
mask_out = self.mask(self.gbd)
self.gbd.update(mask_out)
# RPN loss
rpn_loss_inputs = self.anchor.generate_loss_inputs(
self.inputs, self.rpn_head_out, self.anchor_out)
loss_rpn = self.rpn_head.loss(rpn_loss_inputs)
loss.update(loss_rpn)
# Mask Head
mask_head_out = self.mask_head(self.gbd)
self.gbd.update(mask_head_out)
# BBox loss
bbox_targets = self.proposal.get_targets()
loss_bbox = self.bbox_head.loss(self.bbox_head_out, bbox_targets)
loss.update(loss_bbox)
if self.gbd['mode'] == 'infer':
mask_out = self.mask.post_process(self.gbd)
self.gbd.update(mask_out)
# Mask loss
mask_targets = self.mask.get_targets()
loss_mask = self.mask_head.loss(self.mask_head_out, mask_targets)
loss.update(loss_mask)
def loss(self, ):
losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd)
mask_loss = self.mask_head.loss(self.gbd)
losses = [
rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss
]
loss = fluid.layers.sum(losses)
out = {
'loss': loss,
'loss_rpn_cls': rpn_cls_loss,
'loss_rpn_reg': rpn_reg_loss,
'loss_bbox_cls': bbox_cls_loss,
'loss_bbox_reg': bbox_reg_loss,
'loss_mask': mask_loss
}
return out
total_loss = fluid.layers.sums(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def infer(self, ):
outs = {
'bbox': self.gbd['predicted_bbox'].numpy(),
'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(),
'mask': self.gbd['predicted_mask'].numpy(),
'im_id': self.gbd['im_id'].numpy()
mask = self.mask.post_process(self.bboxes, self.mask_head_out,
self.inputs['im_info'])
bbox, bbox_num = self.bboxes
output = {
'bbox': bbox.numpy(),
'bbox_num': bbox_num.numpy(),
'im_id': self.inputs['im_id'].numpy()
}
return inputs
output.update(mask)
return output
......@@ -13,39 +13,36 @@ __all__ = ['BaseArch']
@register
class BaseArch(Layer):
def __init__(self, *args, **kwargs):
def __init__(self):
super(BaseArch, self).__init__()
self.args = args
self.kwargs = kwargs
def forward(self, inputs, inputs_keys):
self.gbd = BufferDict()
self.gbd.update(self.kwargs)
assert self.gbd[
'mode'] is not None, "Please specify mode train or infer in config file!"
if self.kwargs['open_debug'] is None:
self.gbd['open_debug'] = False
self.build_inputs(inputs, inputs_keys)
def forward(self, data, input_def, mode):
self.inputs = self.build_inputs(data, input_def)
self.inputs['mode'] = mode
self.model_arch()
self.gbd.debug()
if self.gbd['mode'] == 'train':
if mode == 'train':
out = self.loss()
elif self.gbd['mode'] == 'infer':
elif mode == 'infer':
out = self.infer()
else:
raise "Now, only support train or infer mode!"
return out
def build_inputs(self, inputs, inputs_keys):
for i, k in enumerate(inputs_keys):
v = to_variable(np.array([x[i] for x in inputs]))
self.gbd.set(k, v)
def model_arch(self, ):
def build_inputs(self, data, input_def):
inputs = {}
for name in input_def:
inputs[name] = []
batch_size = len(data)
for bs in range(batch_size):
for name, input in zip(input_def, data[bs]):
input_v = np.array(input)[np.newaxis, ...]
inputs[name].append(input_v)
for name in input_def:
inputs[name] = to_variable(np.concatenate(inputs[name]))
return inputs
def model_arch(self, mode):
raise NotImplementedError("Should implement model_arch method!")
def loss(self, ):
......
class NameAdapter(object):
"""Fix the backbones variable names for pretrained weight"""
def __init__(self, model):
super(NameAdapter, self).__init__()
self.model = model
@property
def model_type(self):
return getattr(self.model, '_model_type', '')
@property
def variant(self):
return getattr(self.model, 'variant', '')
def fix_conv_norm_name(self, name):
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
# the naming rule is same as pretrained weight
if self.model_type == 'SEResNeXt':
bn_name = name + "_bn"
return bn_name
def fix_shortcut_name(self, name):
if self.model_type == 'SEResNeXt':
name = 'conv' + name + '_prj'
return name
def fix_bottleneck_name(self, name):
if self.model_type == 'SEResNeXt':
conv_name1 = 'conv' + name + '_x1'
conv_name2 = 'conv' + name + '_x2'
conv_name3 = 'conv' + name + '_x3'
shortcut_name = name
else:
conv_name1 = name + "_branch2a"
conv_name2 = name + "_branch2b"
conv_name3 = name + "_branch2c"
shortcut_name = name + "_branch1"
return conv_name1, conv_name2, conv_name3, shortcut_name
def fix_layer_warp_name(self, stage_num, count, i):
name = 'res' + str(stage_num)
if count > 10 and stage_num == 4:
if i == 0:
conv_name = name + "a"
else:
conv_name = name + "b" + str(i)
else:
conv_name = name + chr(ord("a") + i)
if self.model_type == 'SEResNeXt':
conv_name = str(stage_num + 2) + '_' + str(i + 1)
return conv_name
def fix_c1_stage_name(self):
return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Layer, Sequential
from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from ppdet.core.workspace import register, serializable
from paddle.fluid.regularizer import L2Decay
from .name_adapter import NameAdapter
from numbers import Integral
class ConvBNLayer(Layer):
class ConvNormLayer(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
filter_size,
stride,
padding,
act='relu',
lr=1.0):
super(ConvBNLayer, self).__init__()
self.conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=1,
act=act,
param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=lr),
bias_attr=ParamAttr(name=name_scope + "_bias"))
if name_scope == "conv1":
bn_name = "bn_" + name_scope
else:
bn_name = "bn" + name_scope[3:]
self.bn = BatchNorm(
num_channels=ch_out,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(name=bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
out = self.conv(inputs)
out = self.bn(out)
return out
class ConvAffineLayer(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
filter_size,
stride,
padding,
name_adapter,
act=None,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
lr=1.0,
act='relu'):
super(ConvAffineLayer, self).__init__()
name=None):
super(ConvNormLayer, self).__init__()
assert norm_type in ['bn', 'affine_channel']
self.norm_type = norm_type
self.act = act
self.conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
padding=(filter_size - 1) // 2,
groups=1,
act=None,
param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=lr),
learning_rate=lr, name=name + "_weights"),
bias_attr=False)
if name_scope == "conv1":
bn_name = "bn_" + name_scope
else:
bn_name = "bn" + name_scope[3:]
self.scale = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=ParamAttr(
name=bn_name + '_scale', learning_rate=0.),
default_initializer=Constant(1.))
self.offset = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=ParamAttr(
name=bn_name + '_offset', learning_rate=0.),
default_initializer=Constant(0.))
self.act = act
bn_name = name_adapter.fix_conv_norm_name(name)
norm_lr = 0. if freeze_norm else lr
param_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
name=bn_name + "_scale",
trainable=False if freeze_norm else True)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
name=bn_name + "_offset",
trainable=False if freeze_norm else True)
if norm_type in ['bn', 'sync_bn']:
global_stats = True if freeze_norm else False
self.norm = BatchNorm(
num_channels=ch_out,
act=act,
param_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
norm_params = self.norm.parameters()
elif norm_type == 'affine_channel':
self.scale = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=param_attr,
default_initializer=Constant(1.))
self.offset = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=bias_attr,
default_initializer=Constant(0.))
norm_params = [self.scale, self.offset]
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
def forward(self, inputs):
out = self.conv(inputs)
out = fluid.layers.affine_channel(
out, scale=self.scale, bias=self.offset)
if self.act == 'relu':
out = fluid.layers.relu(out)
if self.norm_type == 'bn':
out = self.norm(out)
elif self.norm_type == 'affine_channel':
out = fluid.layers.affine_channel(
out, scale=self.scale, bias=self.offset, act=self.act)
return out
class BottleNeck(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
stride,
shortcut=True,
shortcut,
name_adapter,
name,
variant='b',
lr=1.0,
norm_type='bn'):
norm_type='bn',
norm_decay=0.,
freeze_norm=True):
super(BottleNeck, self).__init__()
self.name_scope = name_scope
if norm_type == 'bn':
atom_block = ConvBNLayer
elif norm_type == 'affine':
atom_block = ConvAffineLayer
if variant == 'a':
stride1, stride2 = stride, 1
else:
atom_block = None
assert atom_block != None, 'NormType only support BatchNorm and Affine!'
stride1, stride2 = 1, stride
conv_name1, conv_name2, conv_name3, \
shortcut_name = name_adapter.fix_bottleneck_name(name)
self.shortcut = shortcut
if not shortcut:
self.branch1 = atom_block(
name_scope + "_branch1",
self.short = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out * 4,
filter_size=1,
stride=stride,
padding=0,
act=None,
lr=lr)
self.branch2a = atom_block(
name_scope + "_branch2a",
name_adapter=name_adapter,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
name=shortcut_name)
self.branch2a = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=stride,
padding=0,
lr=lr)
stride=stride1,
name_adapter=name_adapter,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
name=conv_name1)
self.branch2b = atom_block(
name_scope + "_branch2b",
self.branch2b = ConvNormLayer(
ch_in=ch_out,
ch_out=ch_out,
filter_size=3,
stride=1,
padding=1,
lr=lr)
stride=stride2,
name_adapter=name_adapter,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
name=conv_name2)
self.branch2c = atom_block(
name_scope + "_branch2c",
self.branch2c = ConvNormLayer(
ch_in=ch_out,
ch_out=ch_out * 4,
filter_size=1,
stride=1,
padding=0,
name_adapter=name_adapter,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
act=None)
name=conv_name3)
def forward(self, inputs):
if self.shortcut:
short = inputs
else:
short = self.branch1(inputs)
short = self.short(inputs)
out = self.branch2a(inputs)
out = self.branch2b(out)
out = self.branch2c(out)
out = fluid.layers.elementwise_add(
x=short, y=out, act='relu', name=self.name_scope + ".add.output.5")
out = fluid.layers.elementwise_add(x=short, y=out, act='relu')
return out
class Blocks(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
count,
stride,
name_adapter,
stage_num,
lr=1.0,
norm_type='bn'):
norm_type='bn',
norm_decay=0.,
freeze_norm=True):
super(Blocks, self).__init__()
self.blocks = []
for i in range(count):
if i == 0:
name = name_scope + "a"
self.stride = stride
self.shortcut = False
else:
name = name_scope + chr(ord("a") + i)
self.stride = 1
self.shortcut = True
conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i)
block = self.add_sublayer(
name,
conv_name,
BottleNeck(
name,
ch_in=ch_in if i == 0 else ch_out * 4,
ch_out=ch_out,
stride=self.stride,
shortcut=self.shortcut,
stride=2 if i == 0 and stage_num != 2 else 1,
shortcut=False if i == 0 else True,
name_adapter=name_adapter,
name=conv_name,
variant=name_adapter.variant,
lr=lr,
norm_type=norm_type))
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm))
self.blocks.append(block)
shortcut = True
def forward(self, inputs):
res_out = self.blocks[0](inputs)
for block in self.blocks[1:]:
res_out = block(res_out)
return res_out
block_out = inputs
for block in self.blocks:
block_out = block(block_out)
return block_out
ResNet_cfg = {'50': [3, 4, 6, 3], '101': [3, 4, 23, 3], '152': [3, 8, 36, 3]}
ResNet_cfg = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}
@register
@serializable
class ResNet(Layer):
def __init__(self, depth=50, norm_type='bn', freeze_at='res2'):
def __init__(self,
depth=50,
variant='b',
lr_mult=1.,
norm_type='bn',
norm_decay=0,
freeze_norm=True,
freeze_at=0,
return_idx=[0, 1, 2, 3],
num_stages=4):
super(ResNet, self).__init__()
self.depth = depth
self.variant = variant
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.freeze_at = freeze_at
block_nums = ResNet_cfg[str(self.depth)]
if self.norm_type == 'bn':
atom_block = ConvBNLayer
elif self.norm_type == 'affine':
atom_block = ConvAffineLayer
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert max(return_idx) < num_stages, \
'the maximum return index must smaller than num_stages, ' \
'but received maximum return index is {} and num_stages ' \
'is {}'.format(max(return_idx), num_stages)
self.return_idx = return_idx
self.num_stages = num_stages
block_nums = ResNet_cfg[depth]
na = NameAdapter(self)
conv1_name = na.fix_c1_stage_name()
if variant in ['c', 'd']:
conv_def = [
[3, 32, 3, 2, "conv1_1"],
[32, 32, 3, 1, "conv1_2"],
[32, 64, 3, 1, "conv1_3"],
]
else:
atom_block = None
assert atom_block != None, 'NormType only support BatchNorm and Affine!'
self.conv1 = atom_block(
'conv1', ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3)
conv_def = [[3, 64, 7, 2, conv1_name]]
self.conv1 = Sequential()
for (c_in, c_out, k, s, _name) in conv_def:
self.conv1.add_sublayer(
_name,
ConvNormLayer(
ch_in=c_in,
ch_out=c_out,
filter_size=k,
stride=s,
name_adapter=na,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr_mult,
name=_name))
self.pool = Pool2D(
pool_type='max', pool_size=3, pool_stride=2, pool_padding=1)
self.stage2 = Blocks(
"res2",
ch_in=64,
ch_out=64,
count=block_nums[0],
stride=1,
norm_type=norm_type)
self.stage3 = Blocks(
"res3",
ch_in=256,
ch_out=128,
count=block_nums[1],
stride=2,
norm_type=norm_type)
self.stage4 = Blocks(
"res4",
ch_in=512,
ch_out=256,
count=block_nums[2],
stride=2,
norm_type=norm_type)
ch_in_list = [64, 256, 512, 1024]
ch_out_list = [64, 128, 256, 512]
self.res_layers = []
for i in range(num_stages):
stage_num = i + 2
res_name = "res{}".format(stage_num)
res_layer = self.add_sublayer(
res_name,
Blocks(
ch_in_list[i],
ch_out_list[i],
count=block_nums[i],
name_adapter=na,
stage_num=stage_num,
lr=lr_mult,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm))
self.res_layers.append(res_layer)
def forward(self, inputs):
x = inputs['image']
conv1 = self.conv1(x)
pool1 = self.pool(conv1)
res2 = self.stage2(pool1)
res3 = self.stage3(res2)
res4 = self.stage4(res3)
outs = {
'res2': res2,
'res3': res3,
'res4': res4,
'res_norm_type': self.norm_type
}
outs[self.freeze_at].stop_gradient = True
x = self.pool(conv1)
outs = []
for idx, stage in enumerate(self.res_layers):
x = stage(x)
if idx == self.freeze_at:
x.stop_gradient = True
if idx in self.return_idx:
outs.append(x)
return outs
......@@ -5,51 +5,65 @@ from ppdet.core.workspace import register
@register
class BBoxPostProcess(object):
__shared__ = ['num_classes', 'num_stages']
__shared__ = ['num_classes']
__inject__ = ['decode_clip_nms']
def __init__(self,
decode_clip_nms,
num_classes=81,
num_stages=1,
cls_agnostic=False,
decode=None,
clip=None,
nms=None):
nms=None,
score_stage=[0, 1, 2],
delta_stage=[2]):
super(BBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.num_stages = num_stages
self.decode = decode
self.clip = clip
self.nms = nms
self.decode_clip_nms = decode_clip_nms
self.score_stage = score_stage
self.delta_stage = delta_stage
self.out_dim = 2 if cls_agnostic else num_classes
self.cls_agnostic = cls_agnostic
def __call__(self, inputs):
def __call__(self, inputs, bboxheads, rois):
# TODO: split into 3 steps
# TODO: modify related ops for deploying
# decode
# clip
# nms
if self.num_stages > 0:
bbox_prob_list = []
for i in range(self.num_stages):
bbox_prob_list.append(inputs['bbox_head_' + str(i)][
'bbox_prob'])
bbox_prob = fluid.layers.sum(bbox_prob_list) / float(
len(bbox_prob_list))
bbox_delta = inputs['bbox_head_' + str(i)]['bbox_delta']
if inputs['bbox_head_0']['cls_agnostic_bbox_reg'] == 2:
bbox_delta = fluid.layers.slice(
bbox_delta, axes=1, starts=[1], ends=[2])
bbox_delta = fluid.layers.expand(bbox_delta,
[1, self.num_classes, 1])
if isinstance(rois, tuple):
proposal, proposal_num = rois
score, delta = bboxheads[0]
bbox_prob = fluid.layers.softmax(score)
delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4))
else:
bbox_prob = inputs['bbox_prob']
bbox_delta = inputs['bbox_delta']
outs = self.decode_clip_nms(inputs['rpn_rois'], bbox_prob, bbox_delta,
inputs['im_info'])
outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]}
return outs
num_stage = len(rois)
proposal_list = []
prob_list = []
delta_list = []
for stage, (proposals, bboxhead) in zip(rois, bboxheads):
score, delta = bboxhead
proposal, proposal_num = proposals
if stage in self.score_stage:
bbox_prob = fluid.layers.softmax(score)
prob_list.append(bbox_prob)
if stage in self.delta_stage:
proposal_list.append(proposal)
delta_list.append(delta)
bbox_prob = fluid.layers.mean(prob_list)
delta = fluid.layers.mean(delta_list)
proposal = fluid.layers.mean(proposal_list)
delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4))
if self.cls_agnostic:
delta = delta[:, 1:2, :]
delta = fluid.layers.expand(delta, [1, self.num_classes, 1])
bboxes = (proposal, proposal_num)
bboxes, bbox_nums = self.decode_clip_nms(bboxes, bbox_prob, delta,
inputs['im_info'])
return bboxes, bbox_nums
@register
......@@ -97,36 +111,51 @@ class AnchorRPN(object):
self.anchor_generator = anchor_generator
self.anchor_target_generator = anchor_target_generator
def __call__(self, inputs):
outs = self.generate_anchors(inputs)
return outs
def generate_anchors(self, inputs):
# TODO: update here to use int to specify featmap size
outs = self.anchor_generator(inputs['rpn_feat'])
outs = {'anchor': outs[0], 'anchor_var': outs[1], 'anchor_module': self}
return outs
def generate_anchors_target(self, inputs):
rpn_rois_score = fluid.layers.transpose(
inputs['rpn_rois_score'], perm=[0, 2, 3, 1])
rpn_rois_delta = fluid.layers.transpose(
inputs['rpn_rois_delta'], perm=[0, 2, 3, 1])
rpn_rois_score = fluid.layers.reshape(
x=rpn_rois_score, shape=(0, -1, 1))
rpn_rois_delta = fluid.layers.reshape(
x=rpn_rois_delta, shape=(0, -1, 4))
anchor = fluid.layers.reshape(inputs['anchor'], shape=(-1, 4))
def __call__(self, rpn_feats):
anchors = []
num_level = len(rpn_feats)
for i, rpn_feat in enumerate(rpn_feats):
anchor, var = self.anchor_generator(rpn_feat, i)
anchors.append((anchor, var))
return anchors
def _get_target_input(self, rpn_feats, anchors):
rpn_score_list = []
rpn_delta_list = []
anchor_list = []
for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_feats, anchors):
rpn_score = fluid.layers.transpose(rpn_score, perm=[0, 2, 3, 1])
rpn_delta = fluid.layers.transpose(rpn_delta, perm=[0, 2, 3, 1])
rpn_score = fluid.layers.reshape(x=rpn_score, shape=(0, -1, 1))
rpn_delta = fluid.layers.reshape(x=rpn_delta, shape=(0, -1, 4))
anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
var = fluid.layers.reshape(var, shape=(-1, 4))
rpn_score_list.append(rpn_score)
rpn_delta_list.append(rpn_delta)
anchor_list.append(anchor)
rpn_scores = fluid.layers.concat(rpn_score_list, axis=1)
rpn_deltas = fluid.layers.concat(rpn_delta_list, axis=1)
anchors = fluid.layers.concat(anchor_list)
return rpn_scores, rpn_deltas, anchors
def generate_loss_inputs(self, inputs, rpn_head_out, anchors):
assert len(rpn_head_out) == len(
anchors
), "rpn_head_out and anchors should have same length, but received rpn_head_out' length is {} and anchors' length is {}".format(
len(rpn_head_out), len(anchors))
rpn_score, rpn_delta, anchors = self._get_target_input(rpn_head_out,
anchors)
score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator(
bbox_pred=rpn_rois_delta,
cls_logits=rpn_rois_score,
anchor_box=anchor,
bbox_pred=rpn_delta,
cls_logits=rpn_score,
anchor_box=anchors,
gt_boxes=inputs['gt_bbox'],
is_crowd=inputs['is_crowd'],
im_info=inputs['im_info'],
open_debug=inputs['open_debug'])
im_info=inputs['im_info'])
outs = {
'rpn_score_pred': score_pred,
'rpn_score_target': score_tgt,
......@@ -180,86 +209,107 @@ class Proposal(object):
self.proposal_target_generator = proposal_target_generator
self.bbox_post_process = bbox_post_process
def __call__(self, inputs):
outs = {}
if inputs['stage'] == 0:
proposal_out = self.generate_proposal(inputs)
inputs.update(proposal_out)
if inputs['mode'] == 'train':
proposal_target_out = self.generate_proposal_target(inputs)
outs.update(proposal_target_out)
return outs
def generate_proposal(self, inputs):
rpn_rois_prob = fluid.layers.sigmoid(
inputs['rpn_rois_score'], name='rpn_rois_prob')
outs = self.proposal_generator(
scores=rpn_rois_prob,
bbox_deltas=inputs['rpn_rois_delta'],
anchors=inputs['anchor'],
variances=inputs['anchor_var'],
im_info=inputs['im_info'],
mode=inputs['mode'])
outs = {
'rpn_rois': outs[0],
'rpn_rois_probs': outs[1],
'rpn_rois_nums': outs[2]
}
return outs
def generate_proposal_target(self, inputs):
if inputs['stage'] == 0:
rois = inputs['rpn_rois']
rois_num = inputs['rpn_rois_nums']
elif inputs['stage'] > 0:
last_proposal_out = inputs['proposal_' + str(inputs['stage'] - 1)]
rois = last_proposal_out['refined_bbox']
rois_num = last_proposal_out['rois_nums']
def generate_proposal(self, inputs, rpn_head_out, anchor_out):
rpn_rois_list = []
rpn_prob_list = []
rpn_rois_num_list = []
for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_head_out,
anchor_out):
rpn_prob = fluid.layers.sigmoid(rpn_score)
rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = self.proposal_generator(
scores=rpn_prob,
bbox_deltas=rpn_delta,
anchors=anchor,
variances=var,
im_info=inputs['im_info'],
mode=inputs['mode'])
if len(rpn_head_out) == 1:
return rpn_rois, rpn_rois_num
rpn_rois_list.append(rpn_rois)
rpn_prob_list.append(rpn_rois_prob)
rpn_rois_num_list.append(rpn_rois_num)
start_level = 2
end_level = start_level + len(rpn_head_out)
rois_collect, rois_num_collect = fluid.layers.collect_fpn_proposals(
rpn_rois_list,
rpn_prob_list,
start_level,
end_level,
post_nms_top_n,
rois_num_per_level=rpn_rois_num_list)
return rois_collect, rois_num_collect
def generate_proposal_target(self, inputs, rois, rois_num, stage=0):
outs = self.proposal_target_generator(
rpn_rois=rois,
rpn_rois_nums=rois_num,
rpn_rois_num=rois_num,
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'],
stage=inputs['stage'],
open_debug=inputs['open_debug'])
outs = {
'rois': outs[0],
stage=stage)
rois = outs[0]
rois_num = outs[-1]
targets = {
'labels_int32': outs[1],
'bbox_targets': outs[2],
'bbox_inside_weights': outs[3],
'bbox_outside_weights': outs[4],
'rois_nums': outs[5]
'bbox_outside_weights': outs[4]
}
return outs
def refine_bbox(self, inputs):
if inputs['mode'] == 'train':
rois = inputs['proposal_' + str(inputs['stage'])]['rois']
else:
rois = inputs['rpn_rois']
bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]
return rois, rois_num, targets
bbox_delta_r = fluid.layers.reshape(
bbox_head_out['bbox_delta'],
(-1, inputs['bbox_head_0']['cls_agnostic_bbox_reg'], 4))
def refine_bbox(self, rois, bbox_delta, stage=0):
out_dim = bbox_delta.shape[1] / 4
bbox_delta_r = fluid.layers.reshape(bbox_delta, (-1, out_dim, 4))
bbox_delta_s = fluid.layers.slice(
bbox_delta_r, axes=[1], starts=[1], ends=[2])
refined_bbox = fluid.layers.box_coder(
prior_box=rois,
prior_box_var=self.proposal_target_generator.bbox_reg_weights[
inputs['stage']],
stage],
target_box=bbox_delta_s,
code_type='decode_center_size',
box_normalized=False,
axis=1)
refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4])
outs = {'refined_bbox': refined_bbox}
return outs
return refined_bbox
def __call__(self,
inputs,
rpn_head_out,
anchor_out,
stage=0,
proposal_out=None,
bbox_head_outs=None,
refined=False):
if refined:
assert proposal_out is not None, "If proposal has been refined, proposal_out should not be None."
return proposal_out
if stage == 0:
roi, rois_num = self.generate_proposal(inputs, rpn_head_out,
anchor_out)
self.proposals_list = []
self.targets_list = []
def post_process(self, inputs):
outs = self.bbox_post_process(inputs)
return outs
else:
bbox_delta = bbox_head_outs[stage][0]
roi = self.refine_bbox(proposal_out[0], bbox_delta, stage - 1)
rois_num = proposal_out[1]
if inputs['mode'] == 'train':
roi, rois_num, targets = self.generate_proposal_target(
inputs, roi, rois_num, stage)
self.targets_list.append(targets)
self.proposals_list.append((roi, rois_num))
return roi, rois_num
def get_targets(self):
return self.targets_list
def get_proposals(self):
return self.proposals_list
def post_process(self, inputs, bbox_head_out, rois):
bboxes = self.bbox_post_process(inputs, bbox_head_out, rois)
return bboxes
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from ppdet.core.workspace import register
# TODO: del import and use inject
from ..backbone.resnet import Blocks
@register
class BBoxFeat(Layer):
__inject__ = ['roi_extractor']
class TwoFCHead(Layer):
__shared__ = ['num_stages']
def __init__(self, roi_extractor, feat_in=1024, feat_out=512, num_stages=1):
def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1):
super(TwoFCHead, self).__init__()
self.in_dim = in_dim
self.mlp_dim = mlp_dim
self.num_stages = num_stages
fan = in_dim * resolution * resolution
self.fc6_list = []
self.fc7_list = []
for stage in range(num_stages):
fc6_name = 'fc6_{}'.format(stage)
fc7_name = 'fc7_{}'.format(stage)
fc6 = self.add_sublayer(
fc6_name,
Linear(
in_dim * resolution * resolution,
mlp_dim,
act='relu',
param_attr=ParamAttr(
#name='fc6_w',
initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
#name='fc6_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
fc7 = self.add_sublayer(
fc7_name,
Linear(
mlp_dim,
mlp_dim,
act='relu',
param_attr=ParamAttr(
#name='fc7_w',
initializer=Xavier()),
bias_attr=ParamAttr(
#name='fc7_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.fc6_list.append(fc6)
self.fc7_list.append(fc7)
def forward(self, rois_feat, stage=0):
rois_feat = fluid.layers.flatten(rois_feat)
fc6 = self.fc6_list[stage](rois_feat)
fc7 = self.fc7_list[stage](fc6)
return fc7
@register
class BBoxFeat(Layer):
__inject__ = ['roi_extractor', 'head_feat']
def __init__(self, roi_extractor, head_feat):
super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor
self.num_stages = num_stages
self.res5s = []
for i in range(self.num_stages):
if i == 0:
postfix = ''
else:
postfix = '_' + str(i)
# TODO: set norm type
res5 = Blocks(
"res5" + postfix,
ch_in=feat_in,
ch_out=feat_out,
count=3,
stride=2)
self.res5s.append(res5)
self.res5_pool = fluid.dygraph.Pool2D(
pool_type='avg', global_pooling=True)
def forward(self, inputs):
if inputs['mode'] == 'train':
in_rois = inputs['proposal_' + str(inputs['stage'])]
rois = in_rois['rois']
rois_num = in_rois['rois_nums']
elif inputs['mode'] == 'infer':
rois = inputs['rpn_rois']
rois_num = inputs['rpn_rois_nums']
else:
raise "BBoxFeat only support train or infer mode!"
rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num)
# TODO: add others
y_res5 = self.res5s[inputs['stage']](rois_feat)
y = self.res5_pool(y_res5)
y = fluid.layers.squeeze(y, axes=[2, 3])
outs = {
'rois_feat': rois_feat,
'res5': y_res5,
"bbox_feat": y,
'shared_res5_block': self.res5s[inputs['stage']],
'shared_roi_extractor': self.roi_extractor
}
return outs
self.head_feat = head_feat
def forward(self, body_feats, rois, spatial_scale, stage=0):
rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_feat = self.head_feat(rois_feat, stage)
return bbox_feat
@register
class BBoxHead(Layer):
__inject__ = ['bbox_feat']
__shared__ = ['num_classes', 'num_stages']
__inject__ = ['bbox_feat']
def __init__(self,
bbox_feat,
feat_in=2048,
in_feat=1024,
num_classes=81,
cls_agnostic_bbox_reg=81,
num_stages=1):
cls_agnostic=False,
num_stages=1,
with_pool=False):
super(BBoxHead, self).__init__()
self.bbox_feat = bbox_feat
self.num_classes = num_classes
self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg
self.delta_dim = 2 if cls_agnostic else num_classes
self.bbox_feat = bbox_feat
self.num_stages = num_stages
self.bbox_scores = []
self.bbox_deltas = []
for i in range(self.num_stages):
if i == 0:
postfix = ''
else:
postfix = '_' + str(i)
bbox_score = fluid.dygraph.Linear(
input_dim=feat_in,
output_dim=1 * self.num_classes,
act=None,
param_attr=ParamAttr(
name='cls_score_w' + postfix,
initializer=Normal(
loc=0.0, scale=0.001)),
bias_attr=ParamAttr(
name='cls_score_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
bbox_delta = fluid.dygraph.Linear(
input_dim=feat_in,
output_dim=4 * self.cls_agnostic_bbox_reg,
act=None,
param_attr=ParamAttr(
name='bbox_pred_w' + postfix,
initializer=Normal(
loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
name='bbox_pred_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
self.bbox_scores.append(bbox_score)
self.bbox_deltas.append(bbox_delta)
def forward(self, inputs):
outs = self.bbox_feat(inputs)
x = outs['bbox_feat']
bs = self.bbox_scores[inputs['stage']](x)
bd = self.bbox_deltas[inputs['stage']](x)
outs.update({'bbox_score': bs, 'bbox_delta': bd})
if inputs['stage'] == 0:
outs.update({"cls_agnostic_bbox_reg": self.cls_agnostic_bbox_reg})
if inputs['mode'] == 'infer':
bbox_prob = fluid.layers.softmax(bs, use_cudnn=False)
outs['bbox_prob'] = bbox_prob
return outs
def loss(self, inputs):
bbox_out = inputs['bbox_head_' + str(inputs['stage'])]
bbox_target = inputs['proposal_' + str(inputs['stage'])]
self.bbox_score_list = []
self.bbox_delta_list = []
self.with_pool = with_pool
for stage in range(num_stages):
score_name = 'bbox_score_{}'.format(stage)
delta_name = 'bbox_delta_{}'.format(stage)
bbox_score = self.add_sublayer(
score_name,
fluid.dygraph.Linear(
input_dim=in_feat,
output_dim=1 * self.num_classes,
act=None,
param_attr=ParamAttr(
#name='cls_score_w',
initializer=Normal(
loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
#name='cls_score_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
bbox_delta = self.add_sublayer(
delta_name,
fluid.dygraph.Linear(
input_dim=in_feat,
output_dim=4 * self.delta_dim,
act=None,
param_attr=ParamAttr(
#name='bbox_pred_w',
initializer=Normal(
loc=0.0, scale=0.001)),
bias_attr=ParamAttr(
#name='bbox_pred_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.bbox_score_list.append(bbox_score)
self.bbox_delta_list.append(bbox_delta)
def forward(self, body_feats, rois, spatial_scale, stage=0):
bbox_feat = self.bbox_feat(body_feats, rois, spatial_scale, stage)
if self.with_pool:
bbox_feat = fluid.layers.pool2d(
bbox_feat, pool_type='avg', global_pooling=True)
bbox_head_out = []
scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat)
bbox_head_out.append((scores, deltas))
return bbox_feat, bbox_head_out
def _get_head_loss(self, score, delta, target):
# bbox cls
labels_int64 = fluid.layers.cast(
x=bbox_target['labels_int32'], dtype='int64')
x=target['labels_int32'], dtype='int64')
labels_int64.stop_gradient = True
bbox_score = fluid.layers.reshape(bbox_out['bbox_score'],
(-1, self.num_classes))
loss_bbox_cls = fluid.layers.softmax_with_cross_entropy(
logits=bbox_score, label=labels_int64)
loss_bbox_cls = fluid.layers.reduce_mean(
loss_bbox_cls, name='loss_bbox_cls_' + str(inputs['stage']))
logits=score, label=labels_int64)
loss_bbox_cls = fluid.layers.reduce_mean(loss_bbox_cls)
# bbox reg
loss_bbox_reg = fluid.layers.smooth_l1(
x=bbox_out['bbox_delta'],
y=bbox_target['bbox_targets'],
inside_weight=bbox_target['bbox_inside_weights'],
outside_weight=bbox_target['bbox_outside_weights'],
x=delta,
y=target['bbox_targets'],
inside_weight=target['bbox_inside_weights'],
outside_weight=target['bbox_outside_weights'],
sigma=1.0)
loss_bbox_reg = fluid.layers.reduce_mean(
loss_bbox_reg, name='loss_bbox_loc_' + str(inputs['stage']))
loss_bbox_reg = fluid.layers.reduce_mean(loss_bbox_reg)
return loss_bbox_cls, loss_bbox_reg
def loss(self, bbox_head_out, targets):
loss_bbox = {}
for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)):
score, delta = bboxhead
cls_name = 'loss_bbox_cls_{}'.format(lvl)
reg_name = 'loss_bbox_reg_{}'.format(lvl)
loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
target)
loss_bbox[cls_name] = loss_bbox_cls
loss_bbox[reg_name] = loss_bbox_reg
return loss_bbox
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Layer, Sequential
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.initializer import MSRA
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Conv2DTranspose
from ppdet.core.workspace import register
# TODO: del it and use inject
from ..backbone.resnet import Blocks
@register
class MaskFeat(Layer):
def __init__(self, feat_in=2048, feat_out=256, mask_stages=1):
__inject__ = ['mask_roi_extractor']
def __init__(self,
mask_roi_extractor,
num_convs=1,
feat_in=2048,
feat_out=256,
mask_num_stages=1,
share_bbox_feat=False):
super(MaskFeat, self).__init__()
self.num_convs = num_convs
self.feat_in = feat_in
self.feat_out = feat_out
self.mask_stages = mask_stages
for i in range(self.mask_stages):
if i == 0:
postfix = ''
else:
postfix = '_' + str(i)
self.upsample = fluid.dygraph.Conv2DTranspose(
num_channels=self.feat_in,
num_filters=self.feat_out,
filter_size=2,
stride=2,
act='relu',
param_attr=ParamAttr(
name='conv5_mask_w' + postfix,
initializer=MSRA(uniform=False)),
bias_attr=ParamAttr(
name='conv5_mask_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs):
bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]
if inputs['mode'] == 'train':
x = bbox_head_out['res5']
rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32'])
elif inputs['mode'] == 'infer':
rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2]
rois_num = inputs['predicted_bbox_nums']
# TODO: optim here
shared_roi_ext = bbox_head_out['shared_roi_extractor']
if callable(shared_roi_ext):
rois_feat = shared_roi_ext(inputs['res4'], rois, rois_num)
shared_res5 = bbox_head_out['shared_res5_block']
if callable(shared_res5):
rois_feat = shared_res5(rois_feat)
self.mask_roi_extractor = mask_roi_extractor
self.mask_num_stages = mask_num_stages
self.share_bbox_feat = share_bbox_feat
self.upsample_module = []
fan_conv = feat_out * 3 * 3
fan_deconv = feat_out * 2 * 2
for i in range(self.mask_num_stages):
name = 'stage_{}'.format(i)
mask_conv = Sequential()
for j in range(self.num_convs):
conv_name = 'mask_inter_feat_{}'.format(j + 1)
mask_conv.add_sublayer(
conv_name,
Conv2D(
num_channels=feat_in if j == 1 else feat_out,
num_filters=feat_out,
filter_size=3,
act='relu',
padding=1,
param_attr=ParamAttr(
#name=conv_name+'_w',
initializer=MSRA(
uniform=False, fan_in=fan_conv)),
bias_attr=ParamAttr(
#name=conv_name+'_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
mask_conv.add_sublayer(
'conv5_mask',
Conv2DTranspose(
num_channels=self.feat_in,
num_filters=self.feat_out,
filter_size=2,
stride=2,
act='relu',
param_attr=ParamAttr(
#name='conv5_mask_w',
initializer=MSRA(
uniform=False, fan_in=fan_deconv)),
bias_attr=ParamAttr(
#name='conv5_mask_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
upsample = self.add_sublayer(name, mask_conv)
self.upsample_module.append(upsample)
def forward(self,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
if self.share_bbox_feat:
rois_feat = fluid.layers.gather(bbox_feat, mask_index)
else:
rois_feat = self.mask_roi_extractor(body_feats, bboxes,
spatial_scale)
# upsample
y = self.upsample(rois_feat)
outs = {'mask_feat': y}
return outs
mask_feat = self.upsample_module[stage](rois_feat)
return mask_feat
@register
class MaskHead(Layer):
__shared__ = ['num_classes']
__shared__ = ['num_classes', 'mask_num_stages']
__inject__ = ['mask_feat']
def __init__(self,
mask_feat,
num_classes=81,
feat_in=256,
resolution=14,
mask_stages=1):
num_classes=81,
mask_num_stages=1):
super(MaskHead, self).__init__()
self.mask_feat = mask_feat
self.feat_in = feat_in
self.resolution = resolution
self.num_classes = num_classes
self.mask_stages = mask_stages
for i in range(self.mask_stages):
if i == 0:
postfix = ''
else:
postfix = '_' + str(i)
self.mask_fcn_logits = fluid.dygraph.Conv2D(
num_channels=self.feat_in,
num_filters=self.num_classes,
filter_size=1,
param_attr=ParamAttr(
name='mask_fcn_logits_w' + postfix,
initializer=MSRA(uniform=False)),
bias_attr=ParamAttr(
name='mask_fcn_logits_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.0)))
def forward(self, inputs):
# feat
outs = self.mask_feat(inputs)
x = outs['mask_feat']
# logits
mask_logits = self.mask_fcn_logits(x)
if inputs['mode'] == 'infer':
pred_bbox = inputs['predicted_bbox']
shape = reduce((lambda x, y: x * y), pred_bbox.shape)
shape = np.asarray(shape).reshape((1, 1))
ones = np.ones((1, 1), dtype=np.int32)
cond = (shape == ones).all()
if cond:
mask_logits = pred_bbox
outs['mask_logits'] = mask_logits
return outs
def loss(self, inputs):
reshape_dim = self.num_classes * self.resolution * self.resolution
mask_logits = fluid.layers.reshape(inputs['mask_logits'],
(-1, reshape_dim))
mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32')
self.mask_num_stages = mask_num_stages
self.mask_fcn_logits = []
for i in range(self.mask_num_stages):
name = 'mask_fcn_logits_{}'.format(i)
self.mask_fcn_logits.append(
self.add_sublayer(
name,
fluid.dygraph.Conv2D(
num_channels=self.feat_in,
num_filters=self.num_classes,
filter_size=1,
param_attr=ParamAttr(
#name='mask_fcn_logits_w',
initializer=MSRA(
uniform=False, fan_in=self.num_classes)),
bias_attr=ParamAttr(
#name='mask_fcn_logits_b',
learning_rate=2.,
regularizer=L2Decay(0.0)))))
def forward_train(self,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
# feat
mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index,
spatial_scale, stage)
# logits
mask_head_out = self.mask_fcn_logits[stage](mask_feat)
return mask_head_out
def forward_test(self,
im_info,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
bbox, bbox_num = bboxes
if bbox.shape[0] == 0:
mask_head_out = bbox
else:
im_info_expand = []
for idx, num in enumerate(bbox_num):
for n in range(num):
im_info_expand.append(im_info[idx, -1])
im_info_expand = fluid.layers.concat(im_info_expand)
scaled_bbox = fluid.layers.elementwise_mul(
bbox[:, 2:], im_info_expand, axis=0)
scaled_bboxes = (scaled_bbox, bbox_num)
mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat,
mask_index, spatial_scale, stage)
mask_logit = self.mask_fcn_logits[stage](mask_feat)
mask_head_out = fluid.layers.sigmoid(mask_logit)
return mask_head_out
def forward(self,
inputs,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
if inputs['mode'] == 'train':
mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat,
mask_index, spatial_scale, stage)
else:
im_info = inputs['im_info']
mask_head_out = self.forward_test(im_info, body_feats, bboxes,
bbox_feat, mask_index,
spatial_scale, stage)
return mask_head_out
def loss(self, mask_head_out, mask_target):
mask_logits = fluid.layers.flatten(mask_head_out)
mask_label = fluid.layers.cast(x=mask_target, dtype='float32')
mask_label.stop_gradient = True
loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits(
x=mask_logits, label=mask_label, ignore_index=-1, normalize=True)
loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask')
loss_mask = fluid.layers.reduce_sum(loss_mask)
return loss_mask
return {'loss_mask': loss_mask}
......@@ -11,95 +11,105 @@ from ppdet.core.workspace import register
class RPNFeat(Layer):
def __init__(self, feat_in=1024, feat_out=1024):
super(RPNFeat, self).__init__()
# rpn feat is shared with each level
self.rpn_conv = Conv2D(
num_channels=1024,
num_filters=1024,
num_channels=feat_in,
num_filters=feat_out,
filter_size=3,
stride=1,
padding=1,
act='relu',
param_attr=ParamAttr(
name="conv_rpn_w", initializer=Normal(
#name="conv_rpn_fpn2_w",
initializer=Normal(
loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
#name="conv_rpn_fpn2_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs):
x = inputs.get('res4')
y = self.rpn_conv(x)
outs = {'rpn_feat': y}
return outs
def forward(self, inputs, feats):
rpn_feats = []
for feat in feats:
rpn_feats.append(self.rpn_conv(feat))
return rpn_feats
@register
class RPNHead(Layer):
__inject__ = ['rpn_feat']
def __init__(self, rpn_feat, anchor_per_position=15):
def __init__(self, rpn_feat, anchor_per_position=15, rpn_channel=1024):
super(RPNHead, self).__init__()
self.rpn_feat = rpn_feat
self.anchor_per_position = anchor_per_position
if isinstance(rpn_feat, dict):
self.rpn_feat = RPNFeat(**rpn_feat)
# rpn head is shared with each level
# rpn roi classification scores
self.rpn_rois_score = Conv2D(
num_channels=1024,
num_filters=1 * self.anchor_per_position,
num_channels=rpn_channel,
num_filters=anchor_per_position,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(
name="rpn_cls_logits_w", initializer=Normal(
#name="rpn_cls_logits_fpn2_w",
initializer=Normal(
loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="rpn_cls_logits_b",
#name="rpn_cls_logits_fpn2_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
# rpn roi bbox regression deltas
self.rpn_rois_delta = Conv2D(
num_channels=1024,
num_filters=4 * self.anchor_per_position,
num_channels=rpn_channel,
num_filters=4 * anchor_per_position,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(
name="rpn_bbox_pred_w", initializer=Normal(
#name="rpn_bbox_pred_fpn2_w",
initializer=Normal(
loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="rpn_bbox_pred_b",
#name="rpn_bbox_pred_fpn2_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs):
outs = self.rpn_feat(inputs)
x = outs['rpn_feat']
rrs = self.rpn_rois_score(x)
rrd = self.rpn_rois_delta(x)
outs.update({'rpn_rois_score': rrs, 'rpn_rois_delta': rrd})
return outs
def forward(self, inputs, feats):
rpn_feats = self.rpn_feat(inputs, feats)
rpn_head_out = []
for rpn_feat in rpn_feats:
rrs = self.rpn_rois_score(rpn_feat)
rrd = self.rpn_rois_delta(rpn_feat)
rpn_head_out.append((rrs, rrd))
return rpn_feats, rpn_head_out
def loss(self, inputs):
if callable(inputs['anchor_module']):
rpn_targets = inputs['anchor_module'].generate_anchors_target(
inputs)
def loss(self, loss_inputs):
# cls loss
score_tgt = fluid.layers.cast(
x=rpn_targets['rpn_score_target'], dtype='float32')
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=rpn_targets['rpn_score_pred'], label=score_tgt)
rpn_cls_loss = fluid.layers.reduce_mean(
rpn_cls_loss, name='loss_rpn_cls')
x=loss_inputs['rpn_score_target'], dtype='float32')
score_tgt.stop_gradient = True
loss_rpn_cls = fluid.layers.sigmoid_cross_entropy_with_logits(
x=loss_inputs['rpn_score_pred'], label=score_tgt)
loss_rpn_cls = fluid.layers.reduce_mean(
loss_rpn_cls, name='loss_rpn_cls')
# reg loss
rpn_reg_loss = fluid.layers.smooth_l1(
x=rpn_targets['rpn_rois_pred'],
y=rpn_targets['rpn_rois_target'],
loc_tgt = fluid.layers.cast(
x=loss_inputs['rpn_rois_target'], dtype='float32')
loc_tgt.stop_gradient = True
loss_rpn_reg = fluid.layers.smooth_l1(
x=loss_inputs['rpn_rois_pred'],
y=loc_tgt,
sigma=3.0,
inside_weight=rpn_targets['rpn_rois_weight'],
outside_weight=rpn_targets['rpn_rois_weight'])
rpn_reg_loss = fluid.layers.reduce_mean(
rpn_reg_loss, name='loss_rpn_reg')
inside_weight=loss_inputs['rpn_rois_weight'],
outside_weight=loss_inputs['rpn_rois_weight'])
loss_rpn_reg = fluid.layers.reduce_sum(loss_rpn_reg)
score_shape = fluid.layers.shape(score_tgt)
score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
norm = fluid.layers.reduce_prod(score_shape)
norm.stop_gradient = True
loss_rpn_reg = loss_rpn_reg / norm
return rpn_cls_loss, rpn_reg_loss
return {'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_reg': loss_rpn_reg}
......@@ -8,20 +8,22 @@ from ppdet.py_op.post_process import mask_post_process
@register
class MaskPostProcess(object):
__shared__ = ['num_classes']
__shared__ = ['mask_resolution']
def __init__(self, num_classes=81):
def __init__(self, mask_resolution=28, binary_thresh=0.5):
super(MaskPostProcess, self).__init__()
self.num_classes = num_classes
self.mask_resolution = mask_resolution
self.binary_thresh = binary_thresh
def __call__(self, inputs):
def __call__(self, bboxes, mask_head_out, im_info):
# TODO: modify related ops for deploying
outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(),
inputs['predicted_bbox'].numpy(),
inputs['mask_logits'].numpy(),
inputs['im_info'].numpy())
outs = {'predicted_mask': outs}
return outs
bboxes_np = (i.numpy() for i in bboxes)
mask = mask_post_process(bboxes_np,
mask_head_out.numpy(),
im_info.numpy(), self.mask_resolution,
self.binary_thresh)
mask = {'mask': mask}
return mask
@register
......@@ -33,29 +35,28 @@ class Mask(object):
self.mask_target_generator = mask_target_generator
self.mask_post_process = mask_post_process
def __call__(self, inputs):
outs = {}
if inputs['mode'] == 'train':
outs = self.generate_mask_target(inputs)
return outs
def __call__(self, inputs, rois, targets):
mask_rois, rois_has_mask_int32 = self.generate_mask_target(inputs, rois,
targets)
return mask_rois, rois_has_mask_int32
def generate_mask_target(self, inputs):
proposal_out = inputs['proposal_' + str(inputs['stage'])]
outs = self.mask_target_generator(
def generate_mask_target(self, inputs, rois, targets):
labels_int32 = targets['labels_int32']
proposals, proposals_num = rois
mask_rois, mask_rois_num, self.rois_has_mask_int32, self.mask_int32 = self.mask_target_generator(
im_info=inputs['im_info'],
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_segms=inputs['gt_mask'],
rois=proposal_out['rois'],
rois_nums=proposal_out['rois_nums'],
labels_int32=proposal_out['labels_int32'])
outs = {
'mask_rois': outs[0],
'rois_has_mask_int32': outs[1],
'mask_int32': outs[2]
}
return outs
def post_process(self, inputs):
outs = self.mask_post_process(inputs)
return outs
rois=proposals,
rois_num=proposals_num,
labels_int32=labels_int32)
self.mask_rois = (mask_rois, mask_rois_num)
return self.mask_rois, self.rois_has_mask_int32
def get_targets(self):
return self.mask_int32
def post_process(self, bboxes, mask_head_out, im_info):
mask = self.mask_post_process(bboxes, mask_head_out, im_info)
return mask
from . import fpn
from .fpn import *
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
@register
@serializable
class FPN(Layer):
def __init__(self,
in_channels,
out_channel,
min_level=0,
max_level=4,
spatial_scale=[0.25, 0.125, 0.0625, 0.03125]):
super(FPN, self).__init__()
self.lateral_convs = []
self.fpn_convs = []
fan = out_channel * 3 * 3
for i in range(min_level, max_level):
if i == 3:
lateral_name = 'fpn_inner_res5_sum'
else:
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
in_c = in_channels[i]
lateral = self.add_sublayer(
lateral_name,
Conv2D(
num_channels=in_c,
num_filters=out_channel,
filter_size=1,
param_attr=ParamAttr(
#name=lateral_name+'_w',
initializer=Xavier(fan_out=in_c)),
bias_attr=ParamAttr(
#name=lateral_name+'_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.lateral_convs.append(lateral)
fpn_name = 'fpn_res{}_sum'.format(i + 2)
fpn_conv = self.add_sublayer(
fpn_name,
Conv2D(
num_channels=out_channel,
num_filters=out_channel,
filter_size=3,
padding=1,
param_attr=ParamAttr(
#name=fpn_name+'_w',
initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
#name=fpn_name+'_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.fpn_convs.append(fpn_conv)
self.min_level = min_level
self.max_level = max_level
self.spatial_scale = spatial_scale
def forward(self, body_feats):
laterals = []
for lvl in range(self.min_level, self.max_level):
laterals.append(self.lateral_convs[lvl](body_feats[lvl]))
for lvl in range(self.max_level - 1, self.min_level, -1):
upsample = fluid.layers.resize_nearest(laterals[lvl], scale=2.)
laterals[lvl - 1] = laterals[lvl - 1] + upsample
fpn_output = []
for lvl in range(self.min_level, self.max_level):
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
extension = fluid.layers.pool2d(fpn_output[-1], 1, 'max', pool_stride=2)
spatial_scale = self.spatial_scale + [self.spatial_scale[-1] * 0.5]
fpn_output.append(extension)
return fpn_output, spatial_scale
......@@ -14,21 +14,29 @@ class AnchorGeneratorRPN(object):
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]):
variance=[1.0, 1.0, 1.0, 1.0],
anchor_start_size=None):
super(AnchorGeneratorRPN, self).__init__()
self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios
self.stride = stride
self.variance = variance
def __call__(self, inputs):
outs = fluid.layers.anchor_generator(
input=inputs,
anchor_sizes=self.anchor_sizes,
self.anchor_start_size = anchor_start_size
def __call__(self, input, level=None):
anchor_sizes = self.anchor_sizes if (
level is None or self.anchor_start_size is None) else (
self.anchor_start_size * 2**level)
stride = self.stride if (
level is None or self.anchor_start_size is None) else (
self.stride[0] * (2.**level), self.stride[1] * (2.**level))
anchor, var = fluid.layers.anchor_generator(
input=input,
anchor_sizes=anchor_sizes,
aspect_ratios=self.aspect_ratios,
stride=self.stride,
stride=stride,
variance=self.variance)
return outs
return anchor, var
@register
......@@ -49,20 +57,12 @@ class AnchorTargetGeneratorRPN(object):
self.negative_overlap = negative_overlap
self.use_random = use_random
def __call__(self,
cls_logits,
bbox_pred,
anchor_box,
gt_boxes,
is_crowd,
im_info,
open_debug=False):
def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd,
im_info):
anchor_box = anchor_box.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
if open_debug:
self.use_random = False
loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target(
anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh,
self.batch_size_per_im, self.positive_overlap,
......@@ -149,8 +149,7 @@ class ProposalGenerator(object):
infer_post_nms_top_n=1000,
nms_thresh=.5,
min_size=.1,
eta=1.,
return_rois_num=True):
eta=1.):
super(ProposalGenerator, self).__init__()
self.train_pre_nms_top_n = train_pre_nms_top_n
self.train_post_nms_top_n = train_post_nms_top_n
......@@ -159,7 +158,6 @@ class ProposalGenerator(object):
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
self.return_rois_num = return_rois_num
def __call__(self,
scores,
......@@ -170,7 +168,7 @@ class ProposalGenerator(object):
mode='train'):
pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n
post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n
outs = fluid.layers.generate_proposals(
rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals(
scores,
bbox_deltas,
im_info,
......@@ -181,8 +179,8 @@ class ProposalGenerator(object):
nms_thresh=self.nms_thresh,
min_size=self.min_size,
eta=self.eta,
return_rois_num=self.return_rois_num)
return outs
return_rois_num=True)
return rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n
@register
......@@ -210,34 +208,29 @@ class ProposalTargetGenerator(object):
self.bbox_reg_weights = bbox_reg_weights
self.num_classes = num_classes
self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic,
self.is_cls_agnostic = is_cls_agnostic
self.is_cascade_rcnn = is_cascade_rcnn
def __call__(self,
rpn_rois,
rpn_rois_nums,
rpn_rois_num,
gt_classes,
is_crowd,
gt_boxes,
im_info,
stage=0,
open_debug=False):
stage=0):
rpn_rois = rpn_rois.numpy()
rpn_rois_nums = rpn_rois_nums.numpy()
rpn_rois_num = rpn_rois_num.numpy()
gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
if open_debug:
self.use_random = False
outs = generate_proposal_target(
rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info,
rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage],
self.bbox_reg_weights[stage], self.num_classes, self.use_random,
self.is_cls_agnostic, self.is_cascade_rcnn)
outs = [to_variable(v) for v in outs]
for v in outs:
v.stop_gradient = True
......@@ -247,25 +240,25 @@ class ProposalTargetGenerator(object):
@register
@serializable
class MaskTargetGenerator(object):
__shared__ = ['num_classes']
__shared__ = ['num_classes', 'mask_resolution']
def __init__(self, num_classes=81, resolution=14):
def __init__(self, num_classes=81, mask_resolution=14):
super(MaskTargetGenerator, self).__init__()
self.num_classes = num_classes
self.resolution = resolution
self.mask_resolution = mask_resolution
def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_nums,
def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_num,
labels_int32):
im_info = im_info.numpy()
gt_classes = gt_classes.numpy()
is_crowd = is_crowd.numpy()
gt_segms = gt_segms.numpy()
rois = rois.numpy()
rois_nums = rois_nums.numpy()
rois_num = rois_num.numpy()
labels_int32 = labels_int32.numpy()
outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms,
rois, rois_nums, labels_int32,
self.num_classes, self.resolution)
rois, rois_num, labels_int32,
self.num_classes, self.mask_resolution)
outs = [to_variable(v) for v in outs]
for v in outs:
......@@ -277,41 +270,54 @@ class MaskTargetGenerator(object):
class RoIExtractor(object):
def __init__(self,
resolution=14,
spatial_scale=1. / 16,
sampling_ratio=0,
extractor_type='RoIAlign'):
canconical_level=4,
canonical_size=224,
start_level=0,
end_level=3):
super(RoIExtractor, self).__init__()
if isinstance(resolution, Integral):
resolution = [resolution, resolution]
self.resolution = resolution
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
self.extractor_type = extractor_type
self.canconical_level = canconical_level
self.canonical_size = canonical_size
self.start_level = start_level
self.end_level = end_level
def __call__(self, feat, rois, rois_nums):
def __call__(self, feats, rois, spatial_scale):
roi, rois_num = rois
cur_l = 0
new_nums = [cur_l]
rois_nums_np = rois_nums.numpy()
for l in rois_nums_np:
cur_l += l
new_nums.append(cur_l)
nums_t = to_variable(np.asarray(new_nums))
if self.extractor_type == 'RoIAlign':
if self.start_level == self.end_level:
rois_feat = fluid.layers.roi_align(
feat,
rois,
self.resolution[0],
self.resolution[1],
self.spatial_scale,
rois_lod=nums_t)
elif self.extractor_type == 'RoIPool':
rois_feat = fluid.layers.roi_pool(
feat,
rois,
self.resolution[0],
self.resolution[1],
self.spatial_scale,
rois_lod=nums_t)
feats[self.start_level],
roi,
self.resolution,
self.resolution,
spatial_scale,
rois_num=rois_num)
return rois_feat
offset = 2
k_min = self.start_level + offset
k_max = self.end_level + offset
rois_dist, restore_index, rois_num_dist = fluid.layers.distribute_fpn_proposals(
roi,
k_min,
k_max,
self.canconical_level,
self.canonical_size,
rois_num=rois_num)
rois_feat_list = []
for lvl in range(self.start_level, self.end_level + 1):
roi_feat = fluid.layers.roi_align(
feats[lvl],
rois_dist[lvl],
self.resolution,
self.resolution,
spatial_scale[lvl],
sampling_ratio=self.sampling_ratio,
rois_num=rois_num_dist[lvl])
rois_feat_list.append(roi_feat)
rois_feat_shuffle = fluid.layers.concat(rois_feat_list)
rois_feat = fluid.layers.gather(rois_feat_shuffle, restore_index)
return rois_feat
......@@ -333,11 +339,13 @@ class DecodeClipNms(object):
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
def __call__(self, bbox, bbox_prob, bbox_delta, img_info):
outs = bbox_post_process(bbox.numpy(),
def __call__(self, bboxes, bbox_prob, bbox_delta, im_info):
bboxes_np = (i.numpy() for i in bboxes)
# bbox, bbox_num
outs = bbox_post_process(bboxes_np,
bbox_prob.numpy(),
bbox_delta.numpy(),
img_info.numpy(), self.keep_top_k,
im_info.numpy(), self.keep_top_k,
self.score_threshold, self.nms_threshold,
self.num_classes)
outs = [to_variable(v) for v in outs]
......
......@@ -126,12 +126,11 @@ def bbox_overlaps(bboxes1, bboxes2):
def nms(dets, thresh):
if dets.shape[0] == 0:
return []
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
scores = dets[:, 0]
x1 = dets[:, 1]
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
......@@ -242,13 +241,13 @@ def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights):
np.float32, copy=False)
@jit
#@jit
def expand_bbox_targets(bbox_targets_input,
class_nums=81,
is_cls_agnostic=False):
class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0]
if not is_cls_agnostic:
if is_cls_agnostic:
class_nums = 2
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums))
bbox_inside_weights = np.zeros(bbox_targets.shape)
......
......@@ -180,7 +180,7 @@ def polys_to_mask_wrt_box(polygons, box, M):
return mask
@jit
#@jit
def expand_mask_targets(masks, mask_class_labels, resolution, num_classes):
"""Expand masks from shape (#masks, resolution ** 2)
to (#masks, #classes * resolution ** 2) to encode class
......
......@@ -3,44 +3,45 @@ import os
import numpy as np
from numba import jit
from .bbox import delta2bbox, clip_bbox, expand_bbox, nms
import pycocotools.mask as mask_util
import cv2
def bbox_post_process(bboxes,
bbox_nums,
bbox_probs,
bbox_prob,
bbox_deltas,
im_info,
keep_top_k=100,
score_thresh=0.05,
nms_thresh=0.5,
class_nums=81,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]):
new_bboxes = [[] for _ in range(len(bbox_nums))]
new_bbox_nums = [0]
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
with_background=True):
bbox, bbox_num = bboxes
new_bbox = [[] for _ in range(len(bbox_num))]
new_bbox_num = []
st_num = 0
end_num = 0
for i in range(len(bbox_nums)):
bbox_num = bbox_nums[i]
end_num += bbox_num
bbox = bboxes[st_num:end_num, :] # bbox
bbox = bbox / im_info[i][2] # scale
bbox_delta = bbox_deltas[st_num:end_num, :] # bbox delta
for i in range(len(bbox_num)):
box_num = bbox_num[i]
end_num += box_num
boxes = bbox[st_num:end_num, :] # bbox
boxes = boxes / im_info[i][2] # scale
bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta
bbox_delta = np.reshape(bbox_delta, (box_num, -1))
# step1: decode
bbox = delta2bbox(bbox_delta, bbox, bbox_reg_weights)
boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights)
# step2: clip
bbox = clip_bbox(bbox, im_info[i][:2] / im_info[i][2])
boxes = clip_bbox(boxes, im_info[i][:2] / im_info[i][2])
# step3: nms
cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_probs[st_num:end_num, :]
for j in range(1, class_nums):
scores_n = bbox_prob[st_num:end_num, :]
for j in range(with_background, class_nums):
inds = np.where(scores_n[:, j] > score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = bbox[inds, j * 4:(j + 1) * 4]
rois_j = boxes[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
keep = nms(dets_j, nms_thresh)
......@@ -51,32 +52,34 @@ def bbox_post_process(bboxes,
np.float32, copy=False)
cls_boxes[j] = nms_dets
st_num += bbox_num
st_num += box_num
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, class_nums)])
[cls_boxes[j][:, 1] for j in range(with_background, class_nums)])
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
for j in range(1, class_nums):
for j in range(with_background, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
new_bboxes_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)])
new_bboxes[i] = new_bboxes_n
new_bbox_nums.append(len(new_bboxes_n))
labels = new_bboxes_n[:, 0]
scores = new_bboxes_n[:, 1]
boxes = new_bboxes_n[:, 2:]
new_bboxes = np.vstack([new_bboxes[k] for k in range(len(bbox_nums) - 1)])
new_bbox_nums = np.array(new_bbox_nums)
return new_bbox_nums, new_bboxes
new_bbox_n = np.vstack(
[cls_boxes[j] for j in range(with_background, class_nums)])
new_bbox[i] = new_bbox_n
new_bbox_num.append(len(new_bbox_n))
new_bbox = np.vstack([new_bbox[k] for k in range(len(bbox_num))])
new_bbox_num = np.array(new_bbox_num).astype('int32')
return new_bbox, new_bbox_num
@jit
def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
scale = (resolution + 2.0) / resolution
boxes = bboxes[:, 2:]
labels = bboxes[:, 0]
def mask_post_process(bboxes, masks, im_info, resolution=14, binary_thresh=0.5):
if masks.shape[0] == 0:
return masks
bbox, bbox_nums = bboxes
M = resolution
scale = (M + 2.0) / M
boxes = bbox[:, 2:]
labels = bbox[:, 0]
segms_results = [[] for _ in range(len(bbox_nums))]
sum = 0
st_num = 0
......@@ -92,7 +95,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
im_h = int(round(im_info[i][0] / im_info[i][2]))
im_w = int(round(im_info[i][1] / im_info[i][2]))
boxes_n = expand_boxes(boxes_n, scale)
boxes_n = expand_bbox(boxes_n, scale)
boxes_n = boxes_n.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for j in range(len(boxes_n)):
......@@ -106,7 +109,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > cfg.mrcnn_thresh_binarize, dtype=np.uint8)
mask = np.array(mask > binary_thresh, dtype=np.uint8)
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
x_0 = max(ref_box[0], 0)
......@@ -121,20 +124,18 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms.append(rle)
segms_results[i] = np.array(cls_segms)[:, np.newaxis]
st_num += bbox_num
segms_results = np.vstack([segms_results[k] for k in range(len(bbox_nums))])
bboxes = np.hstack([segms_results, bboxes])
bboxes = np.hstack([segms_results, bbox])
return bboxes[:, :3]
@jit
def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map,
batch_size=1):
def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map):
det_res = []
k = 0
for i in range(len(bbox_nums)):
image_id = int(image_id[i][0])
image_width = int(image_shape[i][1])
image_height = int(image_shape[i][2])
det_nums = bbox_nums[i]
for j in range(det_nums):
......
......@@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors,
@jit
def label_anchor(anchors, gt_boxes):
iou = compute_iou(anchors, gt_boxes)
iou = bbox_overlaps(anchors, gt_boxes)
# every gt's anchor's index
gt_bbox_anchor_inds = iou.argmax(axis=0)
......@@ -150,7 +150,7 @@ def sample_anchor(anchor_gt_bbox_iou,
@jit
def generate_proposal_target(rpn_rois,
rpn_rois_nums,
rpn_rois_num,
gt_classes,
is_crowd,
gt_boxes,
......@@ -171,12 +171,12 @@ def generate_proposal_target(rpn_rois,
tgt_deltas = []
rois_inside_weights = []
rois_outside_weights = []
rois_nums = []
new_rois_num = []
st_num = 0
end_num = 0
for im_i in range(len(rpn_rois_nums)):
rpn_rois_num = rpn_rois_nums[im_i]
end_num += rpn_rois_num
for im_i in range(len(rpn_rois_num)):
length = rpn_rois_num[im_i]
end_num += length
rpn_roi = rpn_rois[st_num:end_num]
im_scale = im_info[im_i][2]
......@@ -220,10 +220,10 @@ def generate_proposal_target(rpn_rois,
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
roi = sampled_boxes * im_scale
st_num += rpn_rois_num
st_num += length
rois.append(roi)
rois_nums.append(roi.shape[0])
new_rois_num.append(roi.shape[0])
tgt_labels.append(sampled_labels)
tgt_deltas.append(sampled_deltas)
rois_inside_weights.append(bbox_inside_weights)
......@@ -237,9 +237,8 @@ def generate_proposal_target(rpn_rois,
rois_inside_weights, axis=0).astype(np.float32)
rois_outside_weights = np.concatenate(
rois_outside_weights, axis=0).astype(np.float32)
rois_nums = np.asarray(rois_nums, np.int32)
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, rois_nums
new_rois_num = np.asarray(new_rois_num, np.int32)
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num
@jit
......@@ -250,7 +249,7 @@ def label_bbox(boxes,
class_nums=81,
is_cascade_rcnn=False):
iou = compute_iou(boxes, gt_boxes)
iou = bbox_overlaps(boxes, gt_boxes)
# every roi's gt box's index
roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32)
......@@ -318,15 +317,16 @@ def sample_bbox(roi_gt_bbox_iou,
@jit
def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois,
rois_nums, labels_int32, num_classes, resolution):
rois_num, labels_int32, num_classes, resolution):
mask_rois = []
mask_rois_num = []
rois_has_mask_int32 = []
mask_int32 = []
st_num = 0
end_num = 0
for k in range(len(rois_nums)):
rois_num = rois_nums[k]
end_num += rois_num
for k in range(len(rois_num)):
length = rois_num[k]
end_num += length
# remove padding
gt_polys = gt_segms[k]
......@@ -345,37 +345,32 @@ def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois,
if len(new_poly) > 0:
gt_segs.append(new_poly)
new_gt_polys.append(gt_segs)
im_scale = im_info[k][2]
boxes = rois[st_num:end_num] / im_scale
bbox_fg, bbox_has_mask, masks = sample_mask(
boxes, new_gt_polys, labels_int32[st_num:rois_num], gt_classes[k],
boxes, new_gt_polys, labels_int32[st_num:end_num], gt_classes[k],
is_crowd[k], num_classes, resolution)
st_num += rois_num
st_num += length
mask_rois.append(bbox_fg * im_scale)
mask_rois_num.append(len(bbox_fg))
rois_has_mask_int32.append(bbox_has_mask)
mask_int32.append(masks)
mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32)
mask_rois_num = np.array(mask_rois_num).astype(np.int32)
rois_has_mask_int32 = np.concatenate(
rois_has_mask_int32, axis=0).astype(np.int32)
mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32)
return mask_rois, rois_has_mask_int32, mask_int32
return mask_rois, mask_rois_num, rois_has_mask_int32, mask_int32
@jit
def sample_mask(
boxes,
gt_polys,
label_int32,
gt_classes,
is_crowd,
num_classes,
resolution, ):
def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes,
resolution):
gt_polys_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0]
_gt_polys = [gt_polys[i] for i in gt_polys_inds]
......@@ -405,7 +400,5 @@ def sample_mask(
masks_fg = -np.ones((1, resolution**2), dtype=np.int32)
labels_fg = np.zeros((1, ))
bbox_has_mask = np.append(bbox_has_mask, 0)
masks = expand_mask_targets(masks_fg, labels_fg, resolution, num_classes)
return bbox_fg, bbox_has_mask, masks
......@@ -45,37 +45,46 @@ def get_ckpt_path(path):
def load_dygraph_ckpt(model,
optimizer,
optimizer=None,
pretrain_ckpt=None,
ckpt=None,
ckpt_type='pretrain',
ckpt_type=None,
exclude_params=[],
open_debug=False):
load_static_weights=False):
if ckpt_type == 'pretrain':
assert ckpt_type in ['pretrain', 'resume', 'finetune', None]
if ckpt_type == 'pretrain' and ckpt is None:
ckpt = pretrain_ckpt
ckpt = get_ckpt_path(ckpt)
if ckpt is not None and os.path.exists(ckpt):
param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt)
if open_debug:
print("Loading Weights: ", param_state_dict.keys())
assert os.path.exists(ckpt), "Path {} does not exist.".format(ckpt)
if load_static_weights:
pre_state_dict = fluid.load_program_state(ckpt)
param_state_dict = {}
model_dict = model.state_dict()
for key in model_dict.keys():
weight_name = model_dict[key].name
if weight_name in pre_state_dict.keys():
print('Load weight: {}, shape: {}'.format(
weight_name, pre_state_dict[weight_name].shape))
param_state_dict[key] = pre_state_dict[weight_name]
else:
param_state_dict[key] = model_dict[key]
model.set_dict(param_state_dict)
return model
param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt)
if len(exclude_params) != 0:
for k in exclude_params:
param_state_dict.pop(k, None)
if len(exclude_params) != 0:
for k in exclude_params:
param_state_dict.pop(k, None)
if ckpt_type == 'pretrain':
model.backbone.set_dict(param_state_dict)
elif ckpt_type == 'finetune':
model.set_dict(param_state_dict, use_structured_name=True)
else:
model.set_dict(param_state_dict)
if ckpt_type == 'pretrain':
model.backbone.set_dict(param_state_dict)
else:
model.set_dict(param_state_dict)
if ckpt_type == 'resume':
if optim_state_dict is None:
print("Can't Resume Last Training's Optimizer State!!!")
else:
optimizer.set_dict(optim_state_dict)
if ckpt_type == 'resume':
assert optim_state_dict, "Can't Resume Last Training's Optimizer State!!!"
optimizer.set_dict(optim_state_dict)
return model
......
......@@ -28,10 +28,7 @@ def json_eval_results(metric, json_directory=None, dataset=None):
logger.info("{} not exists!".format(v_json))
def coco_eval_results(outs_res=None,
include_mask=False,
batch_size=1,
dataset=None):
def coco_eval_results(outs_res=None, include_mask=False, dataset=None):
print("start evaluate bbox using coco api")
import io
import six
......@@ -49,14 +46,14 @@ def coco_eval_results(outs_res=None,
if outs_res is not None and len(outs_res) > 0:
det_res = []
for outs in outs_res:
det_res += get_det_res(outs['bbox_nums'], outs['bbox'],
outs['im_id'], catid, batch_size)
det_res += get_det_res(outs['bbox'], outs['bbox_num'],
outs['im_id'], catid)
with io.open("bbox_eval.json", 'w') as outfile:
with io.open("bbox.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(det_res)))
cocoDt = cocoGt.loadRes("bbox_eval.json")
cocoDt = cocoGt.loadRes("bbox.json")
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.evaluate()
cocoEval.accumulate()
......@@ -65,14 +62,15 @@ def coco_eval_results(outs_res=None,
if outs_res is not None and len(outs_res) > 0 and include_mask:
seg_res = []
for outs in outs_res:
seg_res += get_seg_res(outs['bbox_nums'], outs['mask'],
outs['im_id'], catid, batch_size)
seg_res += get_seg_res(outs['mask'], outs['bbox_num'],
outs['im_id'], catid)
with io.open("mask_eval.json", 'w') as outfile:
with io.open("mask.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(seg_res)))
cocoSg = cocoGt.loadRes("mask_eval.json")
cocoEval = COCOeval(cocoGt, cocoSg, 'bbox')
cocoSg = cocoGt.loadRes("mask.json")
cocoEval = COCOeval(cocoGt, cocoSg, 'segm')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
......@@ -47,7 +47,7 @@ class TrainingStats(object):
def update(self, stats):
for k, v in self.smoothed_losses_and_metrics.items():
v.add_value(stats[k])
v.add_value(stats[k].numpy())
def get(self, extras=None):
stats = collections.OrderedDict()
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os, sys
# add python path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
if parent_path not in sys.path:
sys.path.append(parent_path)
import time
# ignore numba warning
import warnings
......@@ -14,6 +19,7 @@ from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser
from ppdet.utils.eval_utils import coco_eval_results
from ppdet.data.reader import create_reader
from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt
def parse_args():
......@@ -38,11 +44,10 @@ def run(FLAGS, cfg):
# Model
main_arch = cfg.architecture
model = create(cfg.architecture, mode='infer', open_debug=cfg.open_debug)
model = create(cfg.architecture)
# Init Model
param_state_dict = fluid.dygraph.load_dygraph(cfg.weights)[0]
model.set_dict(param_state_dict)
model = load_dygraph_ckpt(model, ckpt=cfg.weights)
# Data Reader
if FLAGS.use_gpu:
......@@ -58,7 +63,7 @@ def run(FLAGS, cfg):
# forward
model.eval()
outs = model(data, cfg['EvalReader']['inputs_def']['fields'])
outs = model(data, cfg['EvalReader']['inputs_def']['fields'], 'infer')
outs_res.append(outs)
# log
......@@ -68,7 +73,7 @@ def run(FLAGS, cfg):
# Metric
coco_eval_results(
outs_res,
include_mask=True if 'MaskHed' in cfg else False,
include_mask=True if 'MaskHead' in cfg else False,
dataset=cfg['EvalReader']['dataset'])
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os, sys
# add python path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
if parent_path not in sys.path:
sys.path.append(parent_path)
import time
# ignore numba warning
import warnings
warnings.filterwarnings('ignore')
import random
import datetime
import numpy as np
from collections import deque
import paddle.fluid as fluid
from ppdet.core.workspace import load_config, merge_config, create
from ppdet.data.reader import create_reader
from ppdet.utils.stats import TrainingStats
from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser
from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt
from paddle.fluid.dygraph.parallel import ParallelEnv
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
def parse_args():
......@@ -24,7 +37,6 @@ def parse_args():
type=str,
help="Loading Checkpoints only support 'pretrain', 'finetune', 'resume'."
)
parser.add_argument(
"--fp16",
action='store_true',
......@@ -63,11 +75,6 @@ def parse_args():
"This flag is only used for internal test.")
parser.add_argument(
"--use_gpu", action='store_true', default=False, help="data parallel")
parser.add_argument(
"--use_parallel",
action='store_true',
default=False,
help="data parallel")
parser.add_argument(
'--is_profiler',
......@@ -88,13 +95,13 @@ def run(FLAGS, cfg):
random.seed(local_seed)
np.random.seed(local_seed)
if FLAGS.enable_ce or cfg.open_debug:
if FLAGS.enable_ce:
random.seed(0)
np.random.seed(0)
# Model
main_arch = cfg.architecture
model = create(cfg.architecture, mode='train', open_debug=cfg.open_debug)
model = create(cfg.architecture)
# Optimizer
lr = create('LearningRate')()
......@@ -105,12 +112,11 @@ def run(FLAGS, cfg):
model,
optimizer,
cfg.pretrain_weights,
cfg.weights,
FLAGS.ckpt_type,
open_debug=cfg.open_debug)
ckpt_type=FLAGS.ckpt_type,
load_static_weights=cfg.load_static_weights)
# Parallel Model
if FLAGS.use_parallel:
if ParallelEnv().nranks > 1:
strategy = fluid.dygraph.parallel.prepare_context()
model = fluid.dygraph.parallel.DataParallel(model, strategy)
......@@ -122,21 +128,29 @@ def run(FLAGS, cfg):
devices_num = int(os.environ.get('CPU_NUM', 1))
train_reader = create_reader(
cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num,
cfg,
devices_num=devices_num)
cfg.TrainReader, (cfg.max_iters - start_iter), cfg, devices_num=1)
time_stat = deque(maxlen=cfg.log_smooth_window)
start_time = time.time()
end_time = time.time()
# Run Train
for iter_id, data in enumerate(train_reader()):
start_time = time.time()
start_time = end_time
end_time = time.time()
time_stat.append(end_time - start_time)
time_cost = np.mean(time_stat)
eta_sec = (cfg.max_iters - iter_id) * time_cost
eta = str(datetime.timedelta(seconds=int(eta_sec)))
# Model Forward
model.train()
outputs = model(data, cfg['TrainReader']['inputs_def']['fields'])
outputs = model(data, cfg['TrainReader']['inputs_def']['fields'],
'train')
# Model Backward
loss = outputs['loss']
if FLAGS.use_parallel:
if ParallelEnv().nranks > 1:
loss = model.scale_loss(loss)
loss.backward()
model.apply_collective_grads()
......@@ -144,30 +158,27 @@ def run(FLAGS, cfg):
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
# Log state
cost_time = time.time() - start_time
# TODO: check this method
curr_lr = optimizer.current_step_lr()
log_info = "iter: {}, time: {:.4f}, lr: {:.6f}".format(
iter_id, cost_time, curr_lr)
for k, v in outputs.items():
log_info += ", {}: {:.6f}".format(k, v.numpy()[0])
print(log_info)
# Debug
if cfg.open_debug and iter_id > 10:
break
# Save Stage
if iter_id > 0 and iter_id % int(
cfg.snapshot_iter) == 0 and fluid.dygraph.parallel.Env(
).local_rank == 0:
cfg_name = os.path.basename(FLAGS.config).split('.')[0]
save_name = str(
iter_id) if iter_id != cfg.max_iters - 1 else "model_final"
save_dir = os.path.join(cfg.save_dir, cfg_name, save_name)
save_dygraph_ckpt(model, optimizer, save_dir)
if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0:
# Log state
if iter_id == 0:
train_stats = TrainingStats(cfg.log_smooth_window,
outputs.keys())
train_stats.update(outputs)
logs = train_stats.log()
if iter_id % cfg.log_iter == 0:
strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
iter_id, curr_lr, logs, time_cost, eta)
logger.info(strs)
# Save Stage
if iter_id > 0 and iter_id % int(
cfg.snapshot_iter) == 0 or iter_id == cfg.max_iters - 1:
cfg_name = os.path.basename(FLAGS.config).split('.')[0]
save_name = str(
iter_id) if iter_id != cfg.max_iters - 1 else "model_final"
save_dir = os.path.join(cfg.save_dir, cfg_name, save_name)
save_dygraph_ckpt(model, optimizer, save_dir)
def main():
......@@ -179,7 +190,7 @@ def main():
check_gpu(cfg.use_gpu)
check_version()
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
place = fluid.CUDAPlace(ParallelEnv().dev_id) \
if cfg.use_gpu else fluid.CPUPlace()
with fluid.dygraph.guard(place):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册