未验证 提交 646996f4 编写于 作者: W wangguanzhong 提交者: GitHub

refactor dygraph & add mask rcnn fpn (#1171)

* refactor dygraph & add mask rcnn fpn

* fix initialization & minor update

* refine architecture of mask_rcnn
上级 d8704f28
architecture: MaskRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_fpn_1x/model_final
num_classes: 81
load_static_weights: True
# Model Achitecture
MaskRCNN:
# model anchor info flow
anchor: AnchorRPN
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024, 2048]
out_channel: 256
min_level: 0
max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 256
feat_out: 256
anchor_per_position: 3
rpn_channel: 256
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor:
name: RoIExtractor
resolution: 7
sampling_ratio: 2
head_feat:
name: TwoFCHead
in_dim: 256
mlp_dim: 1024
in_feat: 1024
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 4
feat_in: 256
feat_out: 256
mask_roi_extractor:
name: RoIExtractor
resolution: 14
sampling_ratio: 2
share_bbox_feat: False
feat_in: 256
AnchorRPN:
anchor_generator:
name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32
stride: [4., 4.]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 2000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 1000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
bbox_post_process: # used in infer
name: BBoxPostProcess
# decode -> clip -> nms
decode_clip_nms:
name: DecodeClipNms
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 28
mask_post_process:
name: MaskPostProcess
mask_resolution: 28
# Train
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
_READER_: 'mask_reader.yml'
...@@ -18,8 +18,8 @@ TrainReader: ...@@ -18,8 +18,8 @@ TrainReader:
mean: [0.485,0.456,0.406] mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225] std: [0.229, 0.224,0.225]
- !ResizeImage - !ResizeImage
target_size: 512 target_size: 800
max_size: 512 max_size: 1333
interp: 1 interp: 1
use_cv2: true use_cv2: true
- !Permute - !Permute
...@@ -39,8 +39,6 @@ TrainReader: ...@@ -39,8 +39,6 @@ TrainReader:
EvalReader: EvalReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'im_shape'] fields: ['image', 'im_info', 'im_id', 'im_shape']
# for voc
#fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
dataset: dataset:
!COCODataSet !COCODataSet
image_dir: val2017 image_dir: val2017
......
...@@ -127,7 +127,6 @@ class COCODataSet(DataSet): ...@@ -127,7 +127,6 @@ class COCODataSet(DataSet):
if not self.load_image_only: if not self.load_image_only:
ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
instances = coco.loadAnns(ins_anno_ids) instances = coco.loadAnns(ins_anno_ids)
bboxes = [] bboxes = []
for inst in instances: for inst in instances:
x, y, box_w, box_h = inst['bbox'] x, y, box_w, box_h = inst['bbox']
...@@ -135,6 +134,7 @@ class COCODataSet(DataSet): ...@@ -135,6 +134,7 @@ class COCODataSet(DataSet):
y1 = max(0, y) y1 = max(0, y)
x2 = min(im_w - 1, x1 + max(0, box_w - 1)) x2 = min(im_w - 1, x1 + max(0, box_w - 1))
y2 = min(im_h - 1, y1 + max(0, box_h - 1)) y2 = min(im_h - 1, y1 + max(0, box_h - 1))
if inst['area'] > 0 and x2 >= x1 and y2 >= y1: if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
inst['clean_bbox'] = [x1, y1, x2, y2] inst['clean_bbox'] = [x1, y1, x2, y2]
bboxes.append(inst) bboxes.append(inst)
...@@ -143,6 +143,7 @@ class COCODataSet(DataSet): ...@@ -143,6 +143,7 @@ class COCODataSet(DataSet):
'Found an invalid bbox in annotations: im_id: {}, ' 'Found an invalid bbox in annotations: im_id: {}, '
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( 'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
img_id, float(inst['area']), x1, y1, x2, y2)) img_id, float(inst['area']), x1, y1, x2, y2))
num_bbox = len(bboxes) num_bbox = len(bboxes)
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -24,7 +24,6 @@ except Exception: ...@@ -24,7 +24,6 @@ except Exception:
import logging import logging
import cv2 import cv2
import numpy as np import numpy as np
from .operators import register_op, BaseOperator from .operators import register_op, BaseOperator
from .op_helper import jaccard_overlap, gaussian2D from .op_helper import jaccard_overlap, gaussian2D
...@@ -50,10 +49,11 @@ class PadBatch(BaseOperator): ...@@ -50,10 +49,11 @@ class PadBatch(BaseOperator):
height and width is divisible by `pad_to_stride`. height and width is divisible by `pad_to_stride`.
""" """
def __init__(self, pad_to_stride=0, use_padded_im_info=True): def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False):
super(PadBatch, self).__init__() super(PadBatch, self).__init__()
self.pad_to_stride = pad_to_stride self.pad_to_stride = pad_to_stride
self.use_padded_im_info = use_padded_im_info self.use_padded_im_info = use_padded_im_info
self.pad_gt = pad_gt
def __call__(self, samples, context=None): def __call__(self, samples, context=None):
""" """
...@@ -61,11 +61,11 @@ class PadBatch(BaseOperator): ...@@ -61,11 +61,11 @@ class PadBatch(BaseOperator):
samples (list): a batch of sample, each is dict. samples (list): a batch of sample, each is dict.
""" """
coarsest_stride = self.pad_to_stride coarsest_stride = self.pad_to_stride
if coarsest_stride == 0: #if coarsest_stride == 0:
return samples # return samples
max_shape = np.array([data['image'].shape for data in samples]).max( max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0) axis=0)
if coarsest_stride > 0: if coarsest_stride > 0:
max_shape[1] = int( max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride) np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
...@@ -82,6 +82,52 @@ class PadBatch(BaseOperator): ...@@ -82,6 +82,52 @@ class PadBatch(BaseOperator):
data['image'] = padding_im data['image'] = padding_im
if self.use_padded_im_info: if self.use_padded_im_info:
data['im_info'][:2] = max_shape[1:3] data['im_info'][:2] = max_shape[1:3]
if self.pad_gt:
gt_num = []
if data['gt_poly'] is not None and len(data['gt_poly']) > 0:
pad_mask = True
else:
pad_mask = False
if pad_mask:
poly_num = []
poly_part_num = []
point_num = []
for data in samples:
gt_num.append(data['gt_bbox'].shape[0])
if pad_mask:
poly_num.append(len(data['gt_poly']))
for poly in data['gt_poly']:
poly_part_num.append(int(len(poly)))
for p_p in poly:
point_num.append(int(len(p_p) / 2))
gt_num_max = max(gt_num)
gt_box_data = np.zeros([gt_num_max, 4])
gt_class_data = np.zeros([gt_num_max])
is_crowd_data = np.ones([gt_num_max])
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
point_num_max = max(point_num)
gt_masks_data = -np.ones(
[poly_num_max, poly_part_num_max, point_num_max, 2])
for i, data in enumerate(samples):
gt_num = data['gt_bbox'].shape[0]
gt_box_data[0:gt_num, :] = data['gt_bbox']
gt_class_data[0:gt_num] = np.squeeze(data['gt_class'])
is_crowd_data[0:gt_num] = np.squeeze(data['is_crowd'])
if pad_mask:
for j, poly in enumerate(data['gt_poly']):
for k, p_p in enumerate(poly):
pp_np = np.array(p_p).reshape(-1, 2)
gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
data['gt_poly'] = gt_masks_data
data['gt_bbox'] = gt_box_data
data['gt_class'] = gt_class_data
data['is_crowd'] = is_crowd_data
return samples return samples
......
...@@ -122,7 +122,6 @@ class DecodeImage(BaseOperator): ...@@ -122,7 +122,6 @@ class DecodeImage(BaseOperator):
if self.to_rgb: if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
sample['image'] = im sample['image'] = im
if 'h' not in sample: if 'h' not in sample:
sample['h'] = im.shape[0] sample['h'] = im.shape[0]
elif sample['h'] != im.shape[0]: elif sample['h'] != im.shape[0]:
...@@ -333,7 +332,6 @@ class ResizeImage(BaseOperator): ...@@ -333,7 +332,6 @@ class ResizeImage(BaseOperator):
resize_w = selected_size resize_w = selected_size
resize_h = selected_size resize_h = selected_size
if self.use_cv2: if self.use_cv2:
im = cv2.resize( im = cv2.resize(
im, im,
......
...@@ -2,6 +2,7 @@ from . import ops ...@@ -2,6 +2,7 @@ from . import ops
from . import bbox from . import bbox
from . import mask from . import mask
from . import backbone from . import backbone
from . import neck
from . import head from . import head
from . import architecture from . import architecture
...@@ -9,5 +10,6 @@ from .ops import * ...@@ -9,5 +10,6 @@ from .ops import *
from .bbox import * from .bbox import *
from .mask import * from .mask import *
from .backbone import * from .backbone import *
from .neck import *
from .head import * from .head import *
from .architecture import * from .architecture import *
...@@ -4,7 +4,6 @@ from __future__ import print_function ...@@ -4,7 +4,6 @@ from __future__ import print_function
from paddle import fluid from paddle import fluid
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.utils.data_structure import BufferDict
from .meta_arch import BaseArch from .meta_arch import BaseArch
__all__ = ['MaskRCNN'] __all__ = ['MaskRCNN']
...@@ -18,84 +17,107 @@ class MaskRCNN(BaseArch): ...@@ -18,84 +17,107 @@ class MaskRCNN(BaseArch):
'proposal', 'proposal',
'mask', 'mask',
'backbone', 'backbone',
'neck',
'rpn_head', 'rpn_head',
'bbox_head', 'bbox_head',
'mask_head', 'mask_head',
] ]
def __init__(self, anchor, proposal, mask, backbone, rpn_head, bbox_head, def __init__(self,
mask_head, *args, **kwargs): anchor,
super(MaskRCNN, self).__init__(*args, **kwargs) proposal,
mask,
backbone,
rpn_head,
bbox_head,
mask_head,
neck=None):
super(MaskRCNN, self).__init__()
self.anchor = anchor self.anchor = anchor
self.proposal = proposal self.proposal = proposal
self.mask = mask self.mask = mask
self.backbone = backbone self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head self.rpn_head = rpn_head
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.mask_head = mask_head self.mask_head = mask_head
def model_arch(self, ): def model_arch(self):
# Backbone # Backbone
bb_out = self.backbone(self.gbd) body_feats = self.backbone(self.inputs)
self.gbd.update(bb_out) spatial_scale = None
# Neck
if self.neck is not None:
body_feats, spatial_scale = self.neck(body_feats)
# RPN # RPN
rpn_head_out = self.rpn_head(self.gbd) # rpn_head returns two list: rpn_feat, rpn_head_out
self.gbd.update(rpn_head_out) # each element in rpn_feats contains rpn feature on each level,
# and the length is 1 when the neck is not applied.
# each element in rpn_head_out contains (rpn_rois_score, rpn_rois_delta)
rpn_feat, self.rpn_head_out = self.rpn_head(self.inputs, body_feats)
# Anchor # Anchor
anchor_out = self.anchor(self.gbd) # anchor_out returns a list,
self.gbd.update(anchor_out) # each element contains (anchor, anchor_var)
self.anchor_out = self.anchor(rpn_feat)
# Proposal BBox
self.gbd['stage'] = 0
proposal_out = self.proposal(self.gbd)
self.gbd.update({'proposal_0': proposal_out})
# Proposal RoI
# compute targets here when training
rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out)
# BBox Head # BBox Head
bboxhead_out = self.bbox_head(self.gbd) bbox_feat, self.bbox_head_out = self.bbox_head(body_feats, rois,
self.gbd.update({'bbox_head_0': bboxhead_out}) spatial_scale)
rois_has_mask_int32 = None
if self.inputs['mode'] == 'infer':
# Refine bbox by the output from bbox_head at test stage
self.bboxes = self.proposal.post_process(self.inputs,
self.bbox_head_out, rois)
else:
# Proposal RoI for Mask branch
# bboxes update at training stage only
bbox_targets = self.proposal.get_targets()[0]
self.bboxes, rois_has_mask_int32 = self.mask(self.inputs, rois,
bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(self.inputs, body_feats,
self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale)
if self.gbd['mode'] == 'infer': def loss(self, ):
bbox_out = self.proposal.post_process(self.gbd) loss = {}
self.gbd.update(bbox_out)
# Mask # RPN loss
mask_out = self.mask(self.gbd) rpn_loss_inputs = self.anchor.generate_loss_inputs(
self.gbd.update(mask_out) self.inputs, self.rpn_head_out, self.anchor_out)
loss_rpn = self.rpn_head.loss(rpn_loss_inputs)
loss.update(loss_rpn)
# Mask Head # BBox loss
mask_head_out = self.mask_head(self.gbd) bbox_targets = self.proposal.get_targets()
self.gbd.update(mask_head_out) loss_bbox = self.bbox_head.loss(self.bbox_head_out, bbox_targets)
loss.update(loss_bbox)
if self.gbd['mode'] == 'infer': # Mask loss
mask_out = self.mask.post_process(self.gbd) mask_targets = self.mask.get_targets()
self.gbd.update(mask_out) loss_mask = self.mask_head.loss(self.mask_head_out, mask_targets)
loss.update(loss_mask)
def loss(self, ): total_loss = fluid.layers.sums(list(loss.values()))
losses = [] loss.update({'loss': total_loss})
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) return loss
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd)
mask_loss = self.mask_head.loss(self.gbd)
losses = [
rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss
]
loss = fluid.layers.sum(losses)
out = {
'loss': loss,
'loss_rpn_cls': rpn_cls_loss,
'loss_rpn_reg': rpn_reg_loss,
'loss_bbox_cls': bbox_cls_loss,
'loss_bbox_reg': bbox_reg_loss,
'loss_mask': mask_loss
}
return out
def infer(self, ): def infer(self, ):
outs = { mask = self.mask.post_process(self.bboxes, self.mask_head_out,
'bbox': self.gbd['predicted_bbox'].numpy(), self.inputs['im_info'])
'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(), bbox, bbox_num = self.bboxes
'mask': self.gbd['predicted_mask'].numpy(), output = {
'im_id': self.gbd['im_id'].numpy() 'bbox': bbox.numpy(),
'bbox_num': bbox_num.numpy(),
'im_id': self.inputs['im_id'].numpy()
} }
return inputs output.update(mask)
return output
...@@ -13,39 +13,36 @@ __all__ = ['BaseArch'] ...@@ -13,39 +13,36 @@ __all__ = ['BaseArch']
@register @register
class BaseArch(Layer): class BaseArch(Layer):
def __init__(self, *args, **kwargs): def __init__(self):
super(BaseArch, self).__init__() super(BaseArch, self).__init__()
self.args = args
self.kwargs = kwargs
def forward(self, inputs, inputs_keys):
self.gbd = BufferDict()
self.gbd.update(self.kwargs)
assert self.gbd[
'mode'] is not None, "Please specify mode train or infer in config file!"
if self.kwargs['open_debug'] is None:
self.gbd['open_debug'] = False
self.build_inputs(inputs, inputs_keys)
def forward(self, data, input_def, mode):
self.inputs = self.build_inputs(data, input_def)
self.inputs['mode'] = mode
self.model_arch() self.model_arch()
self.gbd.debug() if mode == 'train':
if self.gbd['mode'] == 'train':
out = self.loss() out = self.loss()
elif self.gbd['mode'] == 'infer': elif mode == 'infer':
out = self.infer() out = self.infer()
else: else:
raise "Now, only support train or infer mode!" raise "Now, only support train or infer mode!"
return out return out
def build_inputs(self, inputs, inputs_keys): def build_inputs(self, data, input_def):
for i, k in enumerate(inputs_keys): inputs = {}
v = to_variable(np.array([x[i] for x in inputs])) for name in input_def:
self.gbd.set(k, v) inputs[name] = []
batch_size = len(data)
def model_arch(self, ): for bs in range(batch_size):
for name, input in zip(input_def, data[bs]):
input_v = np.array(input)[np.newaxis, ...]
inputs[name].append(input_v)
for name in input_def:
inputs[name] = to_variable(np.concatenate(inputs[name]))
return inputs
def model_arch(self, mode):
raise NotImplementedError("Should implement model_arch method!") raise NotImplementedError("Should implement model_arch method!")
def loss(self, ): def loss(self, ):
......
class NameAdapter(object):
"""Fix the backbones variable names for pretrained weight"""
def __init__(self, model):
super(NameAdapter, self).__init__()
self.model = model
@property
def model_type(self):
return getattr(self.model, '_model_type', '')
@property
def variant(self):
return getattr(self.model, 'variant', '')
def fix_conv_norm_name(self, name):
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
# the naming rule is same as pretrained weight
if self.model_type == 'SEResNeXt':
bn_name = name + "_bn"
return bn_name
def fix_shortcut_name(self, name):
if self.model_type == 'SEResNeXt':
name = 'conv' + name + '_prj'
return name
def fix_bottleneck_name(self, name):
if self.model_type == 'SEResNeXt':
conv_name1 = 'conv' + name + '_x1'
conv_name2 = 'conv' + name + '_x2'
conv_name3 = 'conv' + name + '_x3'
shortcut_name = name
else:
conv_name1 = name + "_branch2a"
conv_name2 = name + "_branch2b"
conv_name3 = name + "_branch2c"
shortcut_name = name + "_branch1"
return conv_name1, conv_name2, conv_name3, shortcut_name
def fix_layer_warp_name(self, stage_num, count, i):
name = 'res' + str(stage_num)
if count > 10 and stage_num == 4:
if i == 0:
conv_name = name + "a"
else:
conv_name = name + "b" + str(i)
else:
conv_name = name + chr(ord("a") + i)
if self.model_type == 'SEResNeXt':
conv_name = str(stage_num + 2) + '_' + str(i + 1)
return conv_name
def fix_c1_stage_name(self):
return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer, Sequential
from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
from paddle.fluid.regularizer import L2Decay
from .name_adapter import NameAdapter
from numbers import Integral
class ConvBNLayer(Layer): class ConvNormLayer(Layer):
def __init__(self, def __init__(self,
name_scope,
ch_in, ch_in,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, name_adapter,
act='relu', act=None,
lr=1.0): norm_type='bn',
super(ConvBNLayer, self).__init__() norm_decay=0.,
freeze_norm=True,
self.conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=1,
act=act,
param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=lr),
bias_attr=ParamAttr(name=name_scope + "_bias"))
if name_scope == "conv1":
bn_name = "bn_" + name_scope
else:
bn_name = "bn" + name_scope[3:]
self.bn = BatchNorm(
num_channels=ch_out,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(name=bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
out = self.conv(inputs)
out = self.bn(out)
return out
class ConvAffineLayer(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
filter_size,
stride,
padding,
lr=1.0, lr=1.0,
act='relu'): name=None):
super(ConvAffineLayer, self).__init__() super(ConvNormLayer, self).__init__()
assert norm_type in ['bn', 'affine_channel']
self.norm_type = norm_type
self.act = act
self.conv = Conv2D( self.conv = Conv2D(
num_channels=ch_in, num_channels=ch_in,
num_filters=ch_out, num_filters=ch_out,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
padding=padding, padding=(filter_size - 1) // 2,
groups=1,
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=lr), learning_rate=lr, name=name + "_weights"),
bias_attr=False) bias_attr=False)
if name_scope == "conv1":
bn_name = "bn_" + name_scope
else:
bn_name = "bn" + name_scope[3:]
self.scale = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=ParamAttr(
name=bn_name + '_scale', learning_rate=0.),
default_initializer=Constant(1.))
self.offset = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=ParamAttr(
name=bn_name + '_offset', learning_rate=0.),
default_initializer=Constant(0.))
self.act = act bn_name = name_adapter.fix_conv_norm_name(name)
norm_lr = 0. if freeze_norm else lr
param_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
name=bn_name + "_scale",
trainable=False if freeze_norm else True)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
name=bn_name + "_offset",
trainable=False if freeze_norm else True)
if norm_type in ['bn', 'sync_bn']:
global_stats = True if freeze_norm else False
self.norm = BatchNorm(
num_channels=ch_out,
act=act,
param_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
norm_params = self.norm.parameters()
elif norm_type == 'affine_channel':
self.scale = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=param_attr,
default_initializer=Constant(1.))
self.offset = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=bias_attr,
default_initializer=Constant(0.))
norm_params = [self.scale, self.offset]
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
def forward(self, inputs): def forward(self, inputs):
out = self.conv(inputs) out = self.conv(inputs)
out = fluid.layers.affine_channel( if self.norm_type == 'bn':
out, scale=self.scale, bias=self.offset) out = self.norm(out)
if self.act == 'relu': elif self.norm_type == 'affine_channel':
out = fluid.layers.relu(out) out = fluid.layers.affine_channel(
out, scale=self.scale, bias=self.offset, act=self.act)
return out return out
class BottleNeck(Layer): class BottleNeck(Layer):
def __init__(self, def __init__(self,
name_scope,
ch_in, ch_in,
ch_out, ch_out,
stride, stride,
shortcut=True, shortcut,
name_adapter,
name,
variant='b',
lr=1.0, lr=1.0,
norm_type='bn'): norm_type='bn',
norm_decay=0.,
freeze_norm=True):
super(BottleNeck, self).__init__() super(BottleNeck, self).__init__()
self.name_scope = name_scope if variant == 'a':
if norm_type == 'bn': stride1, stride2 = stride, 1
atom_block = ConvBNLayer
elif norm_type == 'affine':
atom_block = ConvAffineLayer
else: else:
atom_block = None stride1, stride2 = 1, stride
assert atom_block != None, 'NormType only support BatchNorm and Affine!'
conv_name1, conv_name2, conv_name3, \
shortcut_name = name_adapter.fix_bottleneck_name(name)
self.shortcut = shortcut self.shortcut = shortcut
if not shortcut: if not shortcut:
self.branch1 = atom_block( self.short = ConvNormLayer(
name_scope + "_branch1",
ch_in=ch_in, ch_in=ch_in,
ch_out=ch_out * 4, ch_out=ch_out * 4,
filter_size=1, filter_size=1,
stride=stride, stride=stride,
padding=0, name_adapter=name_adapter,
act=None, norm_type=norm_type,
lr=lr) norm_decay=norm_decay,
freeze_norm=freeze_norm,
self.branch2a = atom_block( lr=lr,
name_scope + "_branch2a", name=shortcut_name)
self.branch2a = ConvNormLayer(
ch_in=ch_in, ch_in=ch_in,
ch_out=ch_out, ch_out=ch_out,
filter_size=1, filter_size=1,
stride=stride, stride=stride1,
padding=0, name_adapter=name_adapter,
lr=lr) act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
name=conv_name1)
self.branch2b = atom_block( self.branch2b = ConvNormLayer(
name_scope + "_branch2b",
ch_in=ch_out, ch_in=ch_out,
ch_out=ch_out, ch_out=ch_out,
filter_size=3, filter_size=3,
stride=1, stride=stride2,
padding=1, name_adapter=name_adapter,
lr=lr) act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
name=conv_name2)
self.branch2c = atom_block( self.branch2c = ConvNormLayer(
name_scope + "_branch2c",
ch_in=ch_out, ch_in=ch_out,
ch_out=ch_out * 4, ch_out=ch_out * 4,
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0, name_adapter=name_adapter,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr, lr=lr,
act=None) name=conv_name3)
def forward(self, inputs): def forward(self, inputs):
if self.shortcut: if self.shortcut:
short = inputs short = inputs
else: else:
short = self.branch1(inputs) short = self.short(inputs)
out = self.branch2a(inputs) out = self.branch2a(inputs)
out = self.branch2b(out) out = self.branch2b(out)
out = self.branch2c(out) out = self.branch2c(out)
out = fluid.layers.elementwise_add( out = fluid.layers.elementwise_add(x=short, y=out, act='relu')
x=short, y=out, act='relu', name=self.name_scope + ".add.output.5")
return out return out
class Blocks(Layer): class Blocks(Layer):
def __init__(self, def __init__(self,
name_scope,
ch_in, ch_in,
ch_out, ch_out,
count, count,
stride, name_adapter,
stage_num,
lr=1.0, lr=1.0,
norm_type='bn'): norm_type='bn',
norm_decay=0.,
freeze_norm=True):
super(Blocks, self).__init__() super(Blocks, self).__init__()
self.blocks = [] self.blocks = []
for i in range(count): for i in range(count):
if i == 0: conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i)
name = name_scope + "a"
self.stride = stride
self.shortcut = False
else:
name = name_scope + chr(ord("a") + i)
self.stride = 1
self.shortcut = True
block = self.add_sublayer( block = self.add_sublayer(
name, conv_name,
BottleNeck( BottleNeck(
name,
ch_in=ch_in if i == 0 else ch_out * 4, ch_in=ch_in if i == 0 else ch_out * 4,
ch_out=ch_out, ch_out=ch_out,
stride=self.stride, stride=2 if i == 0 and stage_num != 2 else 1,
shortcut=self.shortcut, shortcut=False if i == 0 else True,
name_adapter=name_adapter,
name=conv_name,
variant=name_adapter.variant,
lr=lr, lr=lr,
norm_type=norm_type)) norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm))
self.blocks.append(block) self.blocks.append(block)
shortcut = True
def forward(self, inputs): def forward(self, inputs):
res_out = self.blocks[0](inputs) block_out = inputs
for block in self.blocks[1:]: for block in self.blocks:
res_out = block(res_out) block_out = block(block_out)
return res_out return block_out
ResNet_cfg = {'50': [3, 4, 6, 3], '101': [3, 4, 23, 3], '152': [3, 8, 36, 3]} ResNet_cfg = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}
@register @register
@serializable @serializable
class ResNet(Layer): class ResNet(Layer):
def __init__(self, depth=50, norm_type='bn', freeze_at='res2'): def __init__(self,
depth=50,
variant='b',
lr_mult=1.,
norm_type='bn',
norm_decay=0,
freeze_norm=True,
freeze_at=0,
return_idx=[0, 1, 2, 3],
num_stages=4):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.depth = depth self.depth = depth
self.variant = variant
self.norm_type = norm_type self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.freeze_at = freeze_at self.freeze_at = freeze_at
if isinstance(return_idx, Integral):
block_nums = ResNet_cfg[str(self.depth)] return_idx = [return_idx]
if self.norm_type == 'bn': assert max(return_idx) < num_stages, \
atom_block = ConvBNLayer 'the maximum return index must smaller than num_stages, ' \
elif self.norm_type == 'affine': 'but received maximum return index is {} and num_stages ' \
atom_block = ConvAffineLayer 'is {}'.format(max(return_idx), num_stages)
self.return_idx = return_idx
self.num_stages = num_stages
block_nums = ResNet_cfg[depth]
na = NameAdapter(self)
conv1_name = na.fix_c1_stage_name()
if variant in ['c', 'd']:
conv_def = [
[3, 32, 3, 2, "conv1_1"],
[32, 32, 3, 1, "conv1_2"],
[32, 64, 3, 1, "conv1_3"],
]
else: else:
atom_block = None conv_def = [[3, 64, 7, 2, conv1_name]]
assert atom_block != None, 'NormType only support BatchNorm and Affine!' self.conv1 = Sequential()
for (c_in, c_out, k, s, _name) in conv_def:
self.conv1 = atom_block( self.conv1.add_sublayer(
'conv1', ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) _name,
ConvNormLayer(
ch_in=c_in,
ch_out=c_out,
filter_size=k,
stride=s,
name_adapter=na,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr_mult,
name=_name))
self.pool = Pool2D( self.pool = Pool2D(
pool_type='max', pool_size=3, pool_stride=2, pool_padding=1) pool_type='max', pool_size=3, pool_stride=2, pool_padding=1)
self.stage2 = Blocks( ch_in_list = [64, 256, 512, 1024]
"res2", ch_out_list = [64, 128, 256, 512]
ch_in=64,
ch_out=64, self.res_layers = []
count=block_nums[0], for i in range(num_stages):
stride=1, stage_num = i + 2
norm_type=norm_type) res_name = "res{}".format(stage_num)
res_layer = self.add_sublayer(
self.stage3 = Blocks( res_name,
"res3", Blocks(
ch_in=256, ch_in_list[i],
ch_out=128, ch_out_list[i],
count=block_nums[1], count=block_nums[i],
stride=2, name_adapter=na,
norm_type=norm_type) stage_num=stage_num,
lr=lr_mult,
self.stage4 = Blocks( norm_type=norm_type,
"res4", norm_decay=norm_decay,
ch_in=512, freeze_norm=freeze_norm))
ch_out=256, self.res_layers.append(res_layer)
count=block_nums[2],
stride=2,
norm_type=norm_type)
def forward(self, inputs): def forward(self, inputs):
x = inputs['image'] x = inputs['image']
conv1 = self.conv1(x) conv1 = self.conv1(x)
x = self.pool(conv1)
pool1 = self.pool(conv1) outs = []
for idx, stage in enumerate(self.res_layers):
res2 = self.stage2(pool1) x = stage(x)
if idx == self.freeze_at:
res3 = self.stage3(res2) x.stop_gradient = True
if idx in self.return_idx:
res4 = self.stage4(res3) outs.append(x)
outs = {
'res2': res2,
'res3': res3,
'res4': res4,
'res_norm_type': self.norm_type
}
outs[self.freeze_at].stop_gradient = True
return outs return outs
...@@ -5,51 +5,65 @@ from ppdet.core.workspace import register ...@@ -5,51 +5,65 @@ from ppdet.core.workspace import register
@register @register
class BBoxPostProcess(object): class BBoxPostProcess(object):
__shared__ = ['num_classes', 'num_stages'] __shared__ = ['num_classes']
__inject__ = ['decode_clip_nms'] __inject__ = ['decode_clip_nms']
def __init__(self, def __init__(self,
decode_clip_nms, decode_clip_nms,
num_classes=81, num_classes=81,
num_stages=1, cls_agnostic=False,
decode=None, decode=None,
clip=None, clip=None,
nms=None): nms=None,
score_stage=[0, 1, 2],
delta_stage=[2]):
super(BBoxPostProcess, self).__init__() super(BBoxPostProcess, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.num_stages = num_stages
self.decode = decode self.decode = decode
self.clip = clip self.clip = clip
self.nms = nms self.nms = nms
self.decode_clip_nms = decode_clip_nms self.decode_clip_nms = decode_clip_nms
self.score_stage = score_stage
self.delta_stage = delta_stage
self.out_dim = 2 if cls_agnostic else num_classes
self.cls_agnostic = cls_agnostic
def __call__(self, inputs): def __call__(self, inputs, bboxheads, rois):
# TODO: split into 3 steps # TODO: split into 3 steps
# TODO: modify related ops for deploying # TODO: modify related ops for deploying
# decode # decode
# clip # clip
# nms # nms
if self.num_stages > 0: if isinstance(rois, tuple):
bbox_prob_list = [] proposal, proposal_num = rois
for i in range(self.num_stages): score, delta = bboxheads[0]
bbox_prob_list.append(inputs['bbox_head_' + str(i)][ bbox_prob = fluid.layers.softmax(score)
'bbox_prob']) delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4))
bbox_prob = fluid.layers.sum(bbox_prob_list) / float(
len(bbox_prob_list))
bbox_delta = inputs['bbox_head_' + str(i)]['bbox_delta']
if inputs['bbox_head_0']['cls_agnostic_bbox_reg'] == 2:
bbox_delta = fluid.layers.slice(
bbox_delta, axes=1, starts=[1], ends=[2])
bbox_delta = fluid.layers.expand(bbox_delta,
[1, self.num_classes, 1])
else: else:
bbox_prob = inputs['bbox_prob'] num_stage = len(rois)
bbox_delta = inputs['bbox_delta'] proposal_list = []
prob_list = []
outs = self.decode_clip_nms(inputs['rpn_rois'], bbox_prob, bbox_delta, delta_list = []
inputs['im_info']) for stage, (proposals, bboxhead) in zip(rois, bboxheads):
outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]} score, delta = bboxhead
return outs proposal, proposal_num = proposals
if stage in self.score_stage:
bbox_prob = fluid.layers.softmax(score)
prob_list.append(bbox_prob)
if stage in self.delta_stage:
proposal_list.append(proposal)
delta_list.append(delta)
bbox_prob = fluid.layers.mean(prob_list)
delta = fluid.layers.mean(delta_list)
proposal = fluid.layers.mean(proposal_list)
delta = fluid.layers.reshape(delta, (-1, self.out_dim, 4))
if self.cls_agnostic:
delta = delta[:, 1:2, :]
delta = fluid.layers.expand(delta, [1, self.num_classes, 1])
bboxes = (proposal, proposal_num)
bboxes, bbox_nums = self.decode_clip_nms(bboxes, bbox_prob, delta,
inputs['im_info'])
return bboxes, bbox_nums
@register @register
...@@ -97,36 +111,51 @@ class AnchorRPN(object): ...@@ -97,36 +111,51 @@ class AnchorRPN(object):
self.anchor_generator = anchor_generator self.anchor_generator = anchor_generator
self.anchor_target_generator = anchor_target_generator self.anchor_target_generator = anchor_target_generator
def __call__(self, inputs): def __call__(self, rpn_feats):
outs = self.generate_anchors(inputs) anchors = []
return outs num_level = len(rpn_feats)
for i, rpn_feat in enumerate(rpn_feats):
def generate_anchors(self, inputs): anchor, var = self.anchor_generator(rpn_feat, i)
# TODO: update here to use int to specify featmap size anchors.append((anchor, var))
outs = self.anchor_generator(inputs['rpn_feat']) return anchors
outs = {'anchor': outs[0], 'anchor_var': outs[1], 'anchor_module': self}
return outs def _get_target_input(self, rpn_feats, anchors):
rpn_score_list = []
def generate_anchors_target(self, inputs): rpn_delta_list = []
rpn_rois_score = fluid.layers.transpose( anchor_list = []
inputs['rpn_rois_score'], perm=[0, 2, 3, 1]) for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_feats, anchors):
rpn_rois_delta = fluid.layers.transpose( rpn_score = fluid.layers.transpose(rpn_score, perm=[0, 2, 3, 1])
inputs['rpn_rois_delta'], perm=[0, 2, 3, 1]) rpn_delta = fluid.layers.transpose(rpn_delta, perm=[0, 2, 3, 1])
rpn_rois_score = fluid.layers.reshape( rpn_score = fluid.layers.reshape(x=rpn_score, shape=(0, -1, 1))
x=rpn_rois_score, shape=(0, -1, 1)) rpn_delta = fluid.layers.reshape(x=rpn_delta, shape=(0, -1, 4))
rpn_rois_delta = fluid.layers.reshape(
x=rpn_rois_delta, shape=(0, -1, 4)) anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
var = fluid.layers.reshape(var, shape=(-1, 4))
anchor = fluid.layers.reshape(inputs['anchor'], shape=(-1, 4))
rpn_score_list.append(rpn_score)
rpn_delta_list.append(rpn_delta)
anchor_list.append(anchor)
rpn_scores = fluid.layers.concat(rpn_score_list, axis=1)
rpn_deltas = fluid.layers.concat(rpn_delta_list, axis=1)
anchors = fluid.layers.concat(anchor_list)
return rpn_scores, rpn_deltas, anchors
def generate_loss_inputs(self, inputs, rpn_head_out, anchors):
assert len(rpn_head_out) == len(
anchors
), "rpn_head_out and anchors should have same length, but received rpn_head_out' length is {} and anchors' length is {}".format(
len(rpn_head_out), len(anchors))
rpn_score, rpn_delta, anchors = self._get_target_input(rpn_head_out,
anchors)
score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator( score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator(
bbox_pred=rpn_rois_delta, bbox_pred=rpn_delta,
cls_logits=rpn_rois_score, cls_logits=rpn_score,
anchor_box=anchor, anchor_box=anchors,
gt_boxes=inputs['gt_bbox'], gt_boxes=inputs['gt_bbox'],
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
im_info=inputs['im_info'], im_info=inputs['im_info'])
open_debug=inputs['open_debug'])
outs = { outs = {
'rpn_score_pred': score_pred, 'rpn_score_pred': score_pred,
'rpn_score_target': score_tgt, 'rpn_score_target': score_tgt,
...@@ -180,86 +209,107 @@ class Proposal(object): ...@@ -180,86 +209,107 @@ class Proposal(object):
self.proposal_target_generator = proposal_target_generator self.proposal_target_generator = proposal_target_generator
self.bbox_post_process = bbox_post_process self.bbox_post_process = bbox_post_process
def __call__(self, inputs): def generate_proposal(self, inputs, rpn_head_out, anchor_out):
outs = {} rpn_rois_list = []
if inputs['stage'] == 0: rpn_prob_list = []
proposal_out = self.generate_proposal(inputs) rpn_rois_num_list = []
inputs.update(proposal_out) for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_head_out,
if inputs['mode'] == 'train': anchor_out):
proposal_target_out = self.generate_proposal_target(inputs) rpn_prob = fluid.layers.sigmoid(rpn_score)
outs.update(proposal_target_out) rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = self.proposal_generator(
return outs scores=rpn_prob,
bbox_deltas=rpn_delta,
def generate_proposal(self, inputs): anchors=anchor,
rpn_rois_prob = fluid.layers.sigmoid( variances=var,
inputs['rpn_rois_score'], name='rpn_rois_prob') im_info=inputs['im_info'],
outs = self.proposal_generator( mode=inputs['mode'])
scores=rpn_rois_prob, if len(rpn_head_out) == 1:
bbox_deltas=inputs['rpn_rois_delta'], return rpn_rois, rpn_rois_num
anchors=inputs['anchor'], rpn_rois_list.append(rpn_rois)
variances=inputs['anchor_var'], rpn_prob_list.append(rpn_rois_prob)
im_info=inputs['im_info'], rpn_rois_num_list.append(rpn_rois_num)
mode=inputs['mode'])
outs = { start_level = 2
'rpn_rois': outs[0], end_level = start_level + len(rpn_head_out)
'rpn_rois_probs': outs[1], rois_collect, rois_num_collect = fluid.layers.collect_fpn_proposals(
'rpn_rois_nums': outs[2] rpn_rois_list,
} rpn_prob_list,
return outs start_level,
end_level,
def generate_proposal_target(self, inputs): post_nms_top_n,
if inputs['stage'] == 0: rois_num_per_level=rpn_rois_num_list)
rois = inputs['rpn_rois'] return rois_collect, rois_num_collect
rois_num = inputs['rpn_rois_nums']
elif inputs['stage'] > 0: def generate_proposal_target(self, inputs, rois, rois_num, stage=0):
last_proposal_out = inputs['proposal_' + str(inputs['stage'] - 1)]
rois = last_proposal_out['refined_bbox']
rois_num = last_proposal_out['rois_nums']
outs = self.proposal_target_generator( outs = self.proposal_target_generator(
rpn_rois=rois, rpn_rois=rois,
rpn_rois_nums=rois_num, rpn_rois_num=rois_num,
gt_classes=inputs['gt_class'], gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'], gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'], im_info=inputs['im_info'],
stage=inputs['stage'], stage=stage)
open_debug=inputs['open_debug']) rois = outs[0]
outs = { rois_num = outs[-1]
'rois': outs[0], targets = {
'labels_int32': outs[1], 'labels_int32': outs[1],
'bbox_targets': outs[2], 'bbox_targets': outs[2],
'bbox_inside_weights': outs[3], 'bbox_inside_weights': outs[3],
'bbox_outside_weights': outs[4], 'bbox_outside_weights': outs[4]
'rois_nums': outs[5]
} }
return outs return rois, rois_num, targets
def refine_bbox(self, inputs):
if inputs['mode'] == 'train':
rois = inputs['proposal_' + str(inputs['stage'])]['rois']
else:
rois = inputs['rpn_rois']
bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]
bbox_delta_r = fluid.layers.reshape( def refine_bbox(self, rois, bbox_delta, stage=0):
bbox_head_out['bbox_delta'], out_dim = bbox_delta.shape[1] / 4
(-1, inputs['bbox_head_0']['cls_agnostic_bbox_reg'], 4)) bbox_delta_r = fluid.layers.reshape(bbox_delta, (-1, out_dim, 4))
bbox_delta_s = fluid.layers.slice( bbox_delta_s = fluid.layers.slice(
bbox_delta_r, axes=[1], starts=[1], ends=[2]) bbox_delta_r, axes=[1], starts=[1], ends=[2])
refined_bbox = fluid.layers.box_coder( refined_bbox = fluid.layers.box_coder(
prior_box=rois, prior_box=rois,
prior_box_var=self.proposal_target_generator.bbox_reg_weights[ prior_box_var=self.proposal_target_generator.bbox_reg_weights[
inputs['stage']], stage],
target_box=bbox_delta_s, target_box=bbox_delta_s,
code_type='decode_center_size', code_type='decode_center_size',
box_normalized=False, box_normalized=False,
axis=1) axis=1)
refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4])
outs = {'refined_bbox': refined_bbox} return refined_bbox
return outs
def __call__(self,
inputs,
rpn_head_out,
anchor_out,
stage=0,
proposal_out=None,
bbox_head_outs=None,
refined=False):
if refined:
assert proposal_out is not None, "If proposal has been refined, proposal_out should not be None."
return proposal_out
if stage == 0:
roi, rois_num = self.generate_proposal(inputs, rpn_head_out,
anchor_out)
self.proposals_list = []
self.targets_list = []
def post_process(self, inputs): else:
outs = self.bbox_post_process(inputs) bbox_delta = bbox_head_outs[stage][0]
return outs roi = self.refine_bbox(proposal_out[0], bbox_delta, stage - 1)
rois_num = proposal_out[1]
if inputs['mode'] == 'train':
roi, rois_num, targets = self.generate_proposal_target(
inputs, roi, rois_num, stage)
self.targets_list.append(targets)
self.proposals_list.append((roi, rois_num))
return roi, rois_num
def get_targets(self):
return self.targets_list
def get_proposals(self):
return self.proposals_list
def post_process(self, inputs, bbox_head_out, rois):
bboxes = self.bbox_post_process(inputs, bbox_head_out, rois)
return bboxes
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from ppdet.core.workspace import register from ppdet.core.workspace import register
# TODO: del import and use inject
from ..backbone.resnet import Blocks
@register @register
class BBoxFeat(Layer): class TwoFCHead(Layer):
__inject__ = ['roi_extractor']
__shared__ = ['num_stages'] __shared__ = ['num_stages']
def __init__(self, roi_extractor, feat_in=1024, feat_out=512, num_stages=1): def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1):
super(TwoFCHead, self).__init__()
self.in_dim = in_dim
self.mlp_dim = mlp_dim
self.num_stages = num_stages
fan = in_dim * resolution * resolution
self.fc6_list = []
self.fc7_list = []
for stage in range(num_stages):
fc6_name = 'fc6_{}'.format(stage)
fc7_name = 'fc7_{}'.format(stage)
fc6 = self.add_sublayer(
fc6_name,
Linear(
in_dim * resolution * resolution,
mlp_dim,
act='relu',
param_attr=ParamAttr(
#name='fc6_w',
initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
#name='fc6_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
fc7 = self.add_sublayer(
fc7_name,
Linear(
mlp_dim,
mlp_dim,
act='relu',
param_attr=ParamAttr(
#name='fc7_w',
initializer=Xavier()),
bias_attr=ParamAttr(
#name='fc7_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.fc6_list.append(fc6)
self.fc7_list.append(fc7)
def forward(self, rois_feat, stage=0):
rois_feat = fluid.layers.flatten(rois_feat)
fc6 = self.fc6_list[stage](rois_feat)
fc7 = self.fc7_list[stage](fc6)
return fc7
@register
class BBoxFeat(Layer):
__inject__ = ['roi_extractor', 'head_feat']
def __init__(self, roi_extractor, head_feat):
super(BBoxFeat, self).__init__() super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor self.roi_extractor = roi_extractor
self.num_stages = num_stages self.head_feat = head_feat
self.res5s = []
for i in range(self.num_stages): def forward(self, body_feats, rois, spatial_scale, stage=0):
if i == 0: rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
postfix = '' bbox_feat = self.head_feat(rois_feat, stage)
else: return bbox_feat
postfix = '_' + str(i)
# TODO: set norm type
res5 = Blocks(
"res5" + postfix,
ch_in=feat_in,
ch_out=feat_out,
count=3,
stride=2)
self.res5s.append(res5)
self.res5_pool = fluid.dygraph.Pool2D(
pool_type='avg', global_pooling=True)
def forward(self, inputs):
if inputs['mode'] == 'train':
in_rois = inputs['proposal_' + str(inputs['stage'])]
rois = in_rois['rois']
rois_num = in_rois['rois_nums']
elif inputs['mode'] == 'infer':
rois = inputs['rpn_rois']
rois_num = inputs['rpn_rois_nums']
else:
raise "BBoxFeat only support train or infer mode!"
rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num)
# TODO: add others
y_res5 = self.res5s[inputs['stage']](rois_feat)
y = self.res5_pool(y_res5)
y = fluid.layers.squeeze(y, axes=[2, 3])
outs = {
'rois_feat': rois_feat,
'res5': y_res5,
"bbox_feat": y,
'shared_res5_block': self.res5s[inputs['stage']],
'shared_roi_extractor': self.roi_extractor
}
return outs
@register @register
class BBoxHead(Layer): class BBoxHead(Layer):
__inject__ = ['bbox_feat']
__shared__ = ['num_classes', 'num_stages'] __shared__ = ['num_classes', 'num_stages']
__inject__ = ['bbox_feat']
def __init__(self, def __init__(self,
bbox_feat, bbox_feat,
feat_in=2048, in_feat=1024,
num_classes=81, num_classes=81,
cls_agnostic_bbox_reg=81, cls_agnostic=False,
num_stages=1): num_stages=1,
with_pool=False):
super(BBoxHead, self).__init__() super(BBoxHead, self).__init__()
self.bbox_feat = bbox_feat
self.num_classes = num_classes self.num_classes = num_classes
self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg self.delta_dim = 2 if cls_agnostic else num_classes
self.bbox_feat = bbox_feat
self.num_stages = num_stages self.num_stages = num_stages
self.bbox_score_list = []
self.bbox_scores = [] self.bbox_delta_list = []
self.bbox_deltas = [] self.with_pool = with_pool
for i in range(self.num_stages): for stage in range(num_stages):
if i == 0: score_name = 'bbox_score_{}'.format(stage)
postfix = '' delta_name = 'bbox_delta_{}'.format(stage)
else: bbox_score = self.add_sublayer(
postfix = '_' + str(i) score_name,
bbox_score = fluid.dygraph.Linear( fluid.dygraph.Linear(
input_dim=feat_in, input_dim=in_feat,
output_dim=1 * self.num_classes, output_dim=1 * self.num_classes,
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
name='cls_score_w' + postfix, #name='cls_score_w',
initializer=Normal( initializer=Normal(
loc=0.0, scale=0.001)), loc=0.0, scale=0.01)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='cls_score_b' + postfix, #name='cls_score_b',
learning_rate=2., learning_rate=2.,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.))))
bbox_delta = fluid.dygraph.Linear( bbox_delta = self.add_sublayer(
input_dim=feat_in, delta_name,
output_dim=4 * self.cls_agnostic_bbox_reg, fluid.dygraph.Linear(
act=None, input_dim=in_feat,
param_attr=ParamAttr( output_dim=4 * self.delta_dim,
name='bbox_pred_w' + postfix, act=None,
initializer=Normal( param_attr=ParamAttr(
loc=0.0, scale=0.01)), #name='bbox_pred_w',
bias_attr=ParamAttr( initializer=Normal(
name='bbox_pred_b' + postfix, loc=0.0, scale=0.001)),
learning_rate=2., bias_attr=ParamAttr(
regularizer=L2Decay(0.))) #name='bbox_pred_b',
self.bbox_scores.append(bbox_score) learning_rate=2.,
self.bbox_deltas.append(bbox_delta) regularizer=L2Decay(0.))))
self.bbox_score_list.append(bbox_score)
def forward(self, inputs): self.bbox_delta_list.append(bbox_delta)
outs = self.bbox_feat(inputs)
x = outs['bbox_feat'] def forward(self, body_feats, rois, spatial_scale, stage=0):
bs = self.bbox_scores[inputs['stage']](x) bbox_feat = self.bbox_feat(body_feats, rois, spatial_scale, stage)
bd = self.bbox_deltas[inputs['stage']](x) if self.with_pool:
outs.update({'bbox_score': bs, 'bbox_delta': bd}) bbox_feat = fluid.layers.pool2d(
if inputs['stage'] == 0: bbox_feat, pool_type='avg', global_pooling=True)
outs.update({"cls_agnostic_bbox_reg": self.cls_agnostic_bbox_reg}) bbox_head_out = []
if inputs['mode'] == 'infer': scores = self.bbox_score_list[stage](bbox_feat)
bbox_prob = fluid.layers.softmax(bs, use_cudnn=False) deltas = self.bbox_delta_list[stage](bbox_feat)
outs['bbox_prob'] = bbox_prob bbox_head_out.append((scores, deltas))
return outs return bbox_feat, bbox_head_out
def loss(self, inputs): def _get_head_loss(self, score, delta, target):
bbox_out = inputs['bbox_head_' + str(inputs['stage'])]
bbox_target = inputs['proposal_' + str(inputs['stage'])]
# bbox cls # bbox cls
labels_int64 = fluid.layers.cast( labels_int64 = fluid.layers.cast(
x=bbox_target['labels_int32'], dtype='int64') x=target['labels_int32'], dtype='int64')
labels_int64.stop_gradient = True labels_int64.stop_gradient = True
bbox_score = fluid.layers.reshape(bbox_out['bbox_score'],
(-1, self.num_classes))
loss_bbox_cls = fluid.layers.softmax_with_cross_entropy( loss_bbox_cls = fluid.layers.softmax_with_cross_entropy(
logits=bbox_score, label=labels_int64) logits=score, label=labels_int64)
loss_bbox_cls = fluid.layers.reduce_mean( loss_bbox_cls = fluid.layers.reduce_mean(loss_bbox_cls)
loss_bbox_cls, name='loss_bbox_cls_' + str(inputs['stage']))
# bbox reg # bbox reg
loss_bbox_reg = fluid.layers.smooth_l1( loss_bbox_reg = fluid.layers.smooth_l1(
x=bbox_out['bbox_delta'], x=delta,
y=bbox_target['bbox_targets'], y=target['bbox_targets'],
inside_weight=bbox_target['bbox_inside_weights'], inside_weight=target['bbox_inside_weights'],
outside_weight=bbox_target['bbox_outside_weights'], outside_weight=target['bbox_outside_weights'],
sigma=1.0) sigma=1.0)
loss_bbox_reg = fluid.layers.reduce_mean( loss_bbox_reg = fluid.layers.reduce_mean(loss_bbox_reg)
loss_bbox_reg, name='loss_bbox_loc_' + str(inputs['stage']))
return loss_bbox_cls, loss_bbox_reg return loss_bbox_cls, loss_bbox_reg
def loss(self, bbox_head_out, targets):
loss_bbox = {}
for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)):
score, delta = bboxhead
cls_name = 'loss_bbox_cls_{}'.format(lvl)
reg_name = 'loss_bbox_reg_{}'.format(lvl)
loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
target)
loss_bbox[cls_name] = loss_bbox_cls
loss_bbox[reg_name] = loss_bbox_reg
return loss_bbox
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer, Sequential
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA from paddle.fluid.initializer import MSRA
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Conv2DTranspose
from ppdet.core.workspace import register from ppdet.core.workspace import register
# TODO: del it and use inject
from ..backbone.resnet import Blocks
@register @register
class MaskFeat(Layer): class MaskFeat(Layer):
def __init__(self, feat_in=2048, feat_out=256, mask_stages=1): __inject__ = ['mask_roi_extractor']
def __init__(self,
mask_roi_extractor,
num_convs=1,
feat_in=2048,
feat_out=256,
mask_num_stages=1,
share_bbox_feat=False):
super(MaskFeat, self).__init__() super(MaskFeat, self).__init__()
self.num_convs = num_convs
self.feat_in = feat_in self.feat_in = feat_in
self.feat_out = feat_out self.feat_out = feat_out
self.mask_stages = mask_stages self.mask_roi_extractor = mask_roi_extractor
self.mask_num_stages = mask_num_stages
for i in range(self.mask_stages): self.share_bbox_feat = share_bbox_feat
if i == 0: self.upsample_module = []
postfix = '' fan_conv = feat_out * 3 * 3
else: fan_deconv = feat_out * 2 * 2
postfix = '_' + str(i) for i in range(self.mask_num_stages):
self.upsample = fluid.dygraph.Conv2DTranspose( name = 'stage_{}'.format(i)
num_channels=self.feat_in, mask_conv = Sequential()
num_filters=self.feat_out, for j in range(self.num_convs):
filter_size=2, conv_name = 'mask_inter_feat_{}'.format(j + 1)
stride=2, mask_conv.add_sublayer(
act='relu', conv_name,
param_attr=ParamAttr( Conv2D(
name='conv5_mask_w' + postfix, num_channels=feat_in if j == 1 else feat_out,
initializer=MSRA(uniform=False)), num_filters=feat_out,
bias_attr=ParamAttr( filter_size=3,
name='conv5_mask_b' + postfix, act='relu',
learning_rate=2., padding=1,
regularizer=L2Decay(0.))) param_attr=ParamAttr(
#name=conv_name+'_w',
def forward(self, inputs): initializer=MSRA(
bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])] uniform=False, fan_in=fan_conv)),
if inputs['mode'] == 'train': bias_attr=ParamAttr(
x = bbox_head_out['res5'] #name=conv_name+'_b',
rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32']) learning_rate=2.,
elif inputs['mode'] == 'infer': regularizer=L2Decay(0.))))
rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2] mask_conv.add_sublayer(
rois_num = inputs['predicted_bbox_nums'] 'conv5_mask',
# TODO: optim here Conv2DTranspose(
shared_roi_ext = bbox_head_out['shared_roi_extractor'] num_channels=self.feat_in,
if callable(shared_roi_ext): num_filters=self.feat_out,
rois_feat = shared_roi_ext(inputs['res4'], rois, rois_num) filter_size=2,
stride=2,
shared_res5 = bbox_head_out['shared_res5_block'] act='relu',
if callable(shared_res5): param_attr=ParamAttr(
rois_feat = shared_res5(rois_feat) #name='conv5_mask_w',
initializer=MSRA(
uniform=False, fan_in=fan_deconv)),
bias_attr=ParamAttr(
#name='conv5_mask_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
upsample = self.add_sublayer(name, mask_conv)
self.upsample_module.append(upsample)
def forward(self,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
if self.share_bbox_feat:
rois_feat = fluid.layers.gather(bbox_feat, mask_index)
else:
rois_feat = self.mask_roi_extractor(body_feats, bboxes,
spatial_scale)
# upsample # upsample
y = self.upsample(rois_feat) mask_feat = self.upsample_module[stage](rois_feat)
outs = {'mask_feat': y} return mask_feat
return outs
@register @register
class MaskHead(Layer): class MaskHead(Layer):
__shared__ = ['num_classes'] __shared__ = ['num_classes', 'mask_num_stages']
__inject__ = ['mask_feat'] __inject__ = ['mask_feat']
def __init__(self, def __init__(self,
mask_feat, mask_feat,
num_classes=81,
feat_in=256, feat_in=256,
resolution=14, num_classes=81,
mask_stages=1): mask_num_stages=1):
super(MaskHead, self).__init__() super(MaskHead, self).__init__()
self.mask_feat = mask_feat self.mask_feat = mask_feat
self.feat_in = feat_in self.feat_in = feat_in
self.resolution = resolution
self.num_classes = num_classes self.num_classes = num_classes
self.mask_stages = mask_stages self.mask_num_stages = mask_num_stages
self.mask_fcn_logits = []
for i in range(self.mask_stages): for i in range(self.mask_num_stages):
if i == 0: name = 'mask_fcn_logits_{}'.format(i)
postfix = '' self.mask_fcn_logits.append(
else: self.add_sublayer(
postfix = '_' + str(i) name,
self.mask_fcn_logits = fluid.dygraph.Conv2D( fluid.dygraph.Conv2D(
num_channels=self.feat_in, num_channels=self.feat_in,
num_filters=self.num_classes, num_filters=self.num_classes,
filter_size=1, filter_size=1,
param_attr=ParamAttr( param_attr=ParamAttr(
name='mask_fcn_logits_w' + postfix, #name='mask_fcn_logits_w',
initializer=MSRA(uniform=False)), initializer=MSRA(
bias_attr=ParamAttr( uniform=False, fan_in=self.num_classes)),
name='mask_fcn_logits_b' + postfix, bias_attr=ParamAttr(
learning_rate=2., #name='mask_fcn_logits_b',
regularizer=L2Decay(0.0))) learning_rate=2.,
regularizer=L2Decay(0.0)))))
def forward(self, inputs):
# feat def forward_train(self,
outs = self.mask_feat(inputs) body_feats,
x = outs['mask_feat'] bboxes,
# logits bbox_feat,
mask_logits = self.mask_fcn_logits(x) mask_index,
if inputs['mode'] == 'infer': spatial_scale,
pred_bbox = inputs['predicted_bbox'] stage=0):
shape = reduce((lambda x, y: x * y), pred_bbox.shape) # feat
shape = np.asarray(shape).reshape((1, 1)) mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index,
ones = np.ones((1, 1), dtype=np.int32) spatial_scale, stage)
cond = (shape == ones).all() # logits
if cond: mask_head_out = self.mask_fcn_logits[stage](mask_feat)
mask_logits = pred_bbox return mask_head_out
outs['mask_logits'] = mask_logits def forward_test(self,
im_info,
return outs body_feats,
bboxes,
def loss(self, inputs): bbox_feat,
reshape_dim = self.num_classes * self.resolution * self.resolution mask_index,
mask_logits = fluid.layers.reshape(inputs['mask_logits'], spatial_scale,
(-1, reshape_dim)) stage=0):
mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32') bbox, bbox_num = bboxes
if bbox.shape[0] == 0:
mask_head_out = bbox
else:
im_info_expand = []
for idx, num in enumerate(bbox_num):
for n in range(num):
im_info_expand.append(im_info[idx, -1])
im_info_expand = fluid.layers.concat(im_info_expand)
scaled_bbox = fluid.layers.elementwise_mul(
bbox[:, 2:], im_info_expand, axis=0)
scaled_bboxes = (scaled_bbox, bbox_num)
mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat,
mask_index, spatial_scale, stage)
mask_logit = self.mask_fcn_logits[stage](mask_feat)
mask_head_out = fluid.layers.sigmoid(mask_logit)
return mask_head_out
def forward(self,
inputs,
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage=0):
if inputs['mode'] == 'train':
mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat,
mask_index, spatial_scale, stage)
else:
im_info = inputs['im_info']
mask_head_out = self.forward_test(im_info, body_feats, bboxes,
bbox_feat, mask_index,
spatial_scale, stage)
return mask_head_out
def loss(self, mask_head_out, mask_target):
mask_logits = fluid.layers.flatten(mask_head_out)
mask_label = fluid.layers.cast(x=mask_target, dtype='float32')
mask_label.stop_gradient = True
loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits( loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits(
x=mask_logits, label=mask_label, ignore_index=-1, normalize=True) x=mask_logits, label=mask_label, ignore_index=-1, normalize=True)
loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask') loss_mask = fluid.layers.reduce_sum(loss_mask)
return loss_mask return {'loss_mask': loss_mask}
...@@ -11,95 +11,105 @@ from ppdet.core.workspace import register ...@@ -11,95 +11,105 @@ from ppdet.core.workspace import register
class RPNFeat(Layer): class RPNFeat(Layer):
def __init__(self, feat_in=1024, feat_out=1024): def __init__(self, feat_in=1024, feat_out=1024):
super(RPNFeat, self).__init__() super(RPNFeat, self).__init__()
# rpn feat is shared with each level
self.rpn_conv = Conv2D( self.rpn_conv = Conv2D(
num_channels=1024, num_channels=feat_in,
num_filters=1024, num_filters=feat_out,
filter_size=3, filter_size=3,
stride=1,
padding=1, padding=1,
act='relu', act='relu',
param_attr=ParamAttr( param_attr=ParamAttr(
name="conv_rpn_w", initializer=Normal( #name="conv_rpn_fpn2_w",
initializer=Normal(
loc=0., scale=0.01)), loc=0., scale=0.01)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) #name="conv_rpn_fpn2_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs): def forward(self, inputs, feats):
x = inputs.get('res4') rpn_feats = []
y = self.rpn_conv(x) for feat in feats:
outs = {'rpn_feat': y} rpn_feats.append(self.rpn_conv(feat))
return outs return rpn_feats
@register @register
class RPNHead(Layer): class RPNHead(Layer):
__inject__ = ['rpn_feat'] __inject__ = ['rpn_feat']
def __init__(self, rpn_feat, anchor_per_position=15): def __init__(self, rpn_feat, anchor_per_position=15, rpn_channel=1024):
super(RPNHead, self).__init__() super(RPNHead, self).__init__()
self.rpn_feat = rpn_feat self.rpn_feat = rpn_feat
self.anchor_per_position = anchor_per_position if isinstance(rpn_feat, dict):
self.rpn_feat = RPNFeat(**rpn_feat)
# rpn head is shared with each level
# rpn roi classification scores # rpn roi classification scores
self.rpn_rois_score = Conv2D( self.rpn_rois_score = Conv2D(
num_channels=1024, num_channels=rpn_channel,
num_filters=1 * self.anchor_per_position, num_filters=anchor_per_position,
filter_size=1, filter_size=1,
stride=1,
padding=0, padding=0,
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
name="rpn_cls_logits_w", initializer=Normal( #name="rpn_cls_logits_fpn2_w",
initializer=Normal(
loc=0., scale=0.01)), loc=0., scale=0.01)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name="rpn_cls_logits_b", #name="rpn_cls_logits_fpn2_b",
learning_rate=2., learning_rate=2.,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.)))
# rpn roi bbox regression deltas # rpn roi bbox regression deltas
self.rpn_rois_delta = Conv2D( self.rpn_rois_delta = Conv2D(
num_channels=1024, num_channels=rpn_channel,
num_filters=4 * self.anchor_per_position, num_filters=4 * anchor_per_position,
filter_size=1, filter_size=1,
stride=1,
padding=0, padding=0,
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
name="rpn_bbox_pred_w", initializer=Normal( #name="rpn_bbox_pred_fpn2_w",
initializer=Normal(
loc=0., scale=0.01)), loc=0., scale=0.01)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name="rpn_bbox_pred_b", #name="rpn_bbox_pred_fpn2_b",
learning_rate=2., learning_rate=2.,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.)))
def forward(self, inputs): def forward(self, inputs, feats):
outs = self.rpn_feat(inputs) rpn_feats = self.rpn_feat(inputs, feats)
x = outs['rpn_feat'] rpn_head_out = []
rrs = self.rpn_rois_score(x) for rpn_feat in rpn_feats:
rrd = self.rpn_rois_delta(x) rrs = self.rpn_rois_score(rpn_feat)
outs.update({'rpn_rois_score': rrs, 'rpn_rois_delta': rrd}) rrd = self.rpn_rois_delta(rpn_feat)
return outs rpn_head_out.append((rrs, rrd))
return rpn_feats, rpn_head_out
def loss(self, inputs): def loss(self, loss_inputs):
if callable(inputs['anchor_module']):
rpn_targets = inputs['anchor_module'].generate_anchors_target(
inputs)
# cls loss # cls loss
score_tgt = fluid.layers.cast( score_tgt = fluid.layers.cast(
x=rpn_targets['rpn_score_target'], dtype='float32') x=loss_inputs['rpn_score_target'], dtype='float32')
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( score_tgt.stop_gradient = True
x=rpn_targets['rpn_score_pred'], label=score_tgt) loss_rpn_cls = fluid.layers.sigmoid_cross_entropy_with_logits(
rpn_cls_loss = fluid.layers.reduce_mean( x=loss_inputs['rpn_score_pred'], label=score_tgt)
rpn_cls_loss, name='loss_rpn_cls') loss_rpn_cls = fluid.layers.reduce_mean(
loss_rpn_cls, name='loss_rpn_cls')
# reg loss # reg loss
rpn_reg_loss = fluid.layers.smooth_l1( loc_tgt = fluid.layers.cast(
x=rpn_targets['rpn_rois_pred'], x=loss_inputs['rpn_rois_target'], dtype='float32')
y=rpn_targets['rpn_rois_target'], loc_tgt.stop_gradient = True
loss_rpn_reg = fluid.layers.smooth_l1(
x=loss_inputs['rpn_rois_pred'],
y=loc_tgt,
sigma=3.0, sigma=3.0,
inside_weight=rpn_targets['rpn_rois_weight'], inside_weight=loss_inputs['rpn_rois_weight'],
outside_weight=rpn_targets['rpn_rois_weight']) outside_weight=loss_inputs['rpn_rois_weight'])
rpn_reg_loss = fluid.layers.reduce_mean( loss_rpn_reg = fluid.layers.reduce_sum(loss_rpn_reg)
rpn_reg_loss, name='loss_rpn_reg') score_shape = fluid.layers.shape(score_tgt)
score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
norm = fluid.layers.reduce_prod(score_shape)
norm.stop_gradient = True
loss_rpn_reg = loss_rpn_reg / norm
return rpn_cls_loss, rpn_reg_loss return {'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_reg': loss_rpn_reg}
...@@ -8,20 +8,22 @@ from ppdet.py_op.post_process import mask_post_process ...@@ -8,20 +8,22 @@ from ppdet.py_op.post_process import mask_post_process
@register @register
class MaskPostProcess(object): class MaskPostProcess(object):
__shared__ = ['num_classes'] __shared__ = ['mask_resolution']
def __init__(self, num_classes=81): def __init__(self, mask_resolution=28, binary_thresh=0.5):
super(MaskPostProcess, self).__init__() super(MaskPostProcess, self).__init__()
self.num_classes = num_classes self.mask_resolution = mask_resolution
self.binary_thresh = binary_thresh
def __call__(self, inputs): def __call__(self, bboxes, mask_head_out, im_info):
# TODO: modify related ops for deploying # TODO: modify related ops for deploying
outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(), bboxes_np = (i.numpy() for i in bboxes)
inputs['predicted_bbox'].numpy(), mask = mask_post_process(bboxes_np,
inputs['mask_logits'].numpy(), mask_head_out.numpy(),
inputs['im_info'].numpy()) im_info.numpy(), self.mask_resolution,
outs = {'predicted_mask': outs} self.binary_thresh)
return outs mask = {'mask': mask}
return mask
@register @register
...@@ -33,29 +35,28 @@ class Mask(object): ...@@ -33,29 +35,28 @@ class Mask(object):
self.mask_target_generator = mask_target_generator self.mask_target_generator = mask_target_generator
self.mask_post_process = mask_post_process self.mask_post_process = mask_post_process
def __call__(self, inputs): def __call__(self, inputs, rois, targets):
outs = {} mask_rois, rois_has_mask_int32 = self.generate_mask_target(inputs, rois,
if inputs['mode'] == 'train': targets)
outs = self.generate_mask_target(inputs) return mask_rois, rois_has_mask_int32
return outs
def generate_mask_target(self, inputs): def generate_mask_target(self, inputs, rois, targets):
proposal_out = inputs['proposal_' + str(inputs['stage'])] labels_int32 = targets['labels_int32']
outs = self.mask_target_generator( proposals, proposals_num = rois
mask_rois, mask_rois_num, self.rois_has_mask_int32, self.mask_int32 = self.mask_target_generator(
im_info=inputs['im_info'], im_info=inputs['im_info'],
gt_classes=inputs['gt_class'], gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
gt_segms=inputs['gt_mask'], gt_segms=inputs['gt_mask'],
rois=proposal_out['rois'], rois=proposals,
rois_nums=proposal_out['rois_nums'], rois_num=proposals_num,
labels_int32=proposal_out['labels_int32']) labels_int32=labels_int32)
outs = { self.mask_rois = (mask_rois, mask_rois_num)
'mask_rois': outs[0], return self.mask_rois, self.rois_has_mask_int32
'rois_has_mask_int32': outs[1],
'mask_int32': outs[2] def get_targets(self):
} return self.mask_int32
return outs
def post_process(self, bboxes, mask_head_out, im_info):
def post_process(self, inputs): mask = self.mask_post_process(bboxes, mask_head_out, im_info)
outs = self.mask_post_process(inputs) return mask
return outs
from . import fpn
from .fpn import *
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
@register
@serializable
class FPN(Layer):
def __init__(self,
in_channels,
out_channel,
min_level=0,
max_level=4,
spatial_scale=[0.25, 0.125, 0.0625, 0.03125]):
super(FPN, self).__init__()
self.lateral_convs = []
self.fpn_convs = []
fan = out_channel * 3 * 3
for i in range(min_level, max_level):
if i == 3:
lateral_name = 'fpn_inner_res5_sum'
else:
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
in_c = in_channels[i]
lateral = self.add_sublayer(
lateral_name,
Conv2D(
num_channels=in_c,
num_filters=out_channel,
filter_size=1,
param_attr=ParamAttr(
#name=lateral_name+'_w',
initializer=Xavier(fan_out=in_c)),
bias_attr=ParamAttr(
#name=lateral_name+'_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.lateral_convs.append(lateral)
fpn_name = 'fpn_res{}_sum'.format(i + 2)
fpn_conv = self.add_sublayer(
fpn_name,
Conv2D(
num_channels=out_channel,
num_filters=out_channel,
filter_size=3,
padding=1,
param_attr=ParamAttr(
#name=fpn_name+'_w',
initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
#name=fpn_name+'_b',
learning_rate=2.,
regularizer=L2Decay(0.))))
self.fpn_convs.append(fpn_conv)
self.min_level = min_level
self.max_level = max_level
self.spatial_scale = spatial_scale
def forward(self, body_feats):
laterals = []
for lvl in range(self.min_level, self.max_level):
laterals.append(self.lateral_convs[lvl](body_feats[lvl]))
for lvl in range(self.max_level - 1, self.min_level, -1):
upsample = fluid.layers.resize_nearest(laterals[lvl], scale=2.)
laterals[lvl - 1] = laterals[lvl - 1] + upsample
fpn_output = []
for lvl in range(self.min_level, self.max_level):
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
extension = fluid.layers.pool2d(fpn_output[-1], 1, 'max', pool_stride=2)
spatial_scale = self.spatial_scale + [self.spatial_scale[-1] * 0.5]
fpn_output.append(extension)
return fpn_output, spatial_scale
...@@ -14,21 +14,29 @@ class AnchorGeneratorRPN(object): ...@@ -14,21 +14,29 @@ class AnchorGeneratorRPN(object):
anchor_sizes=[32, 64, 128, 256, 512], anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0], aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0], stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]): variance=[1.0, 1.0, 1.0, 1.0],
anchor_start_size=None):
super(AnchorGeneratorRPN, self).__init__() super(AnchorGeneratorRPN, self).__init__()
self.anchor_sizes = anchor_sizes self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios self.aspect_ratios = aspect_ratios
self.stride = stride self.stride = stride
self.variance = variance self.variance = variance
self.anchor_start_size = anchor_start_size
def __call__(self, inputs):
outs = fluid.layers.anchor_generator( def __call__(self, input, level=None):
input=inputs, anchor_sizes = self.anchor_sizes if (
anchor_sizes=self.anchor_sizes, level is None or self.anchor_start_size is None) else (
self.anchor_start_size * 2**level)
stride = self.stride if (
level is None or self.anchor_start_size is None) else (
self.stride[0] * (2.**level), self.stride[1] * (2.**level))
anchor, var = fluid.layers.anchor_generator(
input=input,
anchor_sizes=anchor_sizes,
aspect_ratios=self.aspect_ratios, aspect_ratios=self.aspect_ratios,
stride=self.stride, stride=stride,
variance=self.variance) variance=self.variance)
return outs return anchor, var
@register @register
...@@ -49,20 +57,12 @@ class AnchorTargetGeneratorRPN(object): ...@@ -49,20 +57,12 @@ class AnchorTargetGeneratorRPN(object):
self.negative_overlap = negative_overlap self.negative_overlap = negative_overlap
self.use_random = use_random self.use_random = use_random
def __call__(self, def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd,
cls_logits, im_info):
bbox_pred,
anchor_box,
gt_boxes,
is_crowd,
im_info,
open_debug=False):
anchor_box = anchor_box.numpy() anchor_box = anchor_box.numpy()
gt_boxes = gt_boxes.numpy() gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy() is_crowd = is_crowd.numpy()
im_info = im_info.numpy() im_info = im_info.numpy()
if open_debug:
self.use_random = False
loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target( loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target(
anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh, anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh,
self.batch_size_per_im, self.positive_overlap, self.batch_size_per_im, self.positive_overlap,
...@@ -149,8 +149,7 @@ class ProposalGenerator(object): ...@@ -149,8 +149,7 @@ class ProposalGenerator(object):
infer_post_nms_top_n=1000, infer_post_nms_top_n=1000,
nms_thresh=.5, nms_thresh=.5,
min_size=.1, min_size=.1,
eta=1., eta=1.):
return_rois_num=True):
super(ProposalGenerator, self).__init__() super(ProposalGenerator, self).__init__()
self.train_pre_nms_top_n = train_pre_nms_top_n self.train_pre_nms_top_n = train_pre_nms_top_n
self.train_post_nms_top_n = train_post_nms_top_n self.train_post_nms_top_n = train_post_nms_top_n
...@@ -159,7 +158,6 @@ class ProposalGenerator(object): ...@@ -159,7 +158,6 @@ class ProposalGenerator(object):
self.nms_thresh = nms_thresh self.nms_thresh = nms_thresh
self.min_size = min_size self.min_size = min_size
self.eta = eta self.eta = eta
self.return_rois_num = return_rois_num
def __call__(self, def __call__(self,
scores, scores,
...@@ -170,7 +168,7 @@ class ProposalGenerator(object): ...@@ -170,7 +168,7 @@ class ProposalGenerator(object):
mode='train'): mode='train'):
pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n
post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n
outs = fluid.layers.generate_proposals( rpn_rois, rpn_rois_prob, rpn_rois_num = fluid.layers.generate_proposals(
scores, scores,
bbox_deltas, bbox_deltas,
im_info, im_info,
...@@ -181,8 +179,8 @@ class ProposalGenerator(object): ...@@ -181,8 +179,8 @@ class ProposalGenerator(object):
nms_thresh=self.nms_thresh, nms_thresh=self.nms_thresh,
min_size=self.min_size, min_size=self.min_size,
eta=self.eta, eta=self.eta,
return_rois_num=self.return_rois_num) return_rois_num=True)
return outs return rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n
@register @register
...@@ -210,34 +208,29 @@ class ProposalTargetGenerator(object): ...@@ -210,34 +208,29 @@ class ProposalTargetGenerator(object):
self.bbox_reg_weights = bbox_reg_weights self.bbox_reg_weights = bbox_reg_weights
self.num_classes = num_classes self.num_classes = num_classes
self.use_random = use_random self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic, self.is_cls_agnostic = is_cls_agnostic
self.is_cascade_rcnn = is_cascade_rcnn self.is_cascade_rcnn = is_cascade_rcnn
def __call__(self, def __call__(self,
rpn_rois, rpn_rois,
rpn_rois_nums, rpn_rois_num,
gt_classes, gt_classes,
is_crowd, is_crowd,
gt_boxes, gt_boxes,
im_info, im_info,
stage=0, stage=0):
open_debug=False):
rpn_rois = rpn_rois.numpy() rpn_rois = rpn_rois.numpy()
rpn_rois_nums = rpn_rois_nums.numpy() rpn_rois_num = rpn_rois_num.numpy()
gt_classes = gt_classes.numpy() gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy() gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy() is_crowd = is_crowd.numpy()
im_info = im_info.numpy() im_info = im_info.numpy()
if open_debug:
self.use_random = False
outs = generate_proposal_target( outs = generate_proposal_target(
rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info, rpn_rois, rpn_rois_num, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage], self.bg_thresh_hi[stage], self.bg_thresh_lo[stage],
self.bbox_reg_weights[stage], self.num_classes, self.use_random, self.bbox_reg_weights[stage], self.num_classes, self.use_random,
self.is_cls_agnostic, self.is_cascade_rcnn) self.is_cls_agnostic, self.is_cascade_rcnn)
outs = [to_variable(v) for v in outs] outs = [to_variable(v) for v in outs]
for v in outs: for v in outs:
v.stop_gradient = True v.stop_gradient = True
...@@ -247,25 +240,25 @@ class ProposalTargetGenerator(object): ...@@ -247,25 +240,25 @@ class ProposalTargetGenerator(object):
@register @register
@serializable @serializable
class MaskTargetGenerator(object): class MaskTargetGenerator(object):
__shared__ = ['num_classes'] __shared__ = ['num_classes', 'mask_resolution']
def __init__(self, num_classes=81, resolution=14): def __init__(self, num_classes=81, mask_resolution=14):
super(MaskTargetGenerator, self).__init__() super(MaskTargetGenerator, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.resolution = resolution self.mask_resolution = mask_resolution
def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_nums, def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_num,
labels_int32): labels_int32):
im_info = im_info.numpy() im_info = im_info.numpy()
gt_classes = gt_classes.numpy() gt_classes = gt_classes.numpy()
is_crowd = is_crowd.numpy() is_crowd = is_crowd.numpy()
gt_segms = gt_segms.numpy() gt_segms = gt_segms.numpy()
rois = rois.numpy() rois = rois.numpy()
rois_nums = rois_nums.numpy() rois_num = rois_num.numpy()
labels_int32 = labels_int32.numpy() labels_int32 = labels_int32.numpy()
outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms,
rois, rois_nums, labels_int32, rois, rois_num, labels_int32,
self.num_classes, self.resolution) self.num_classes, self.mask_resolution)
outs = [to_variable(v) for v in outs] outs = [to_variable(v) for v in outs]
for v in outs: for v in outs:
...@@ -277,41 +270,54 @@ class MaskTargetGenerator(object): ...@@ -277,41 +270,54 @@ class MaskTargetGenerator(object):
class RoIExtractor(object): class RoIExtractor(object):
def __init__(self, def __init__(self,
resolution=14, resolution=14,
spatial_scale=1. / 16,
sampling_ratio=0, sampling_ratio=0,
extractor_type='RoIAlign'): canconical_level=4,
canonical_size=224,
start_level=0,
end_level=3):
super(RoIExtractor, self).__init__() super(RoIExtractor, self).__init__()
if isinstance(resolution, Integral):
resolution = [resolution, resolution]
self.resolution = resolution self.resolution = resolution
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio self.sampling_ratio = sampling_ratio
self.extractor_type = extractor_type self.canconical_level = canconical_level
self.canonical_size = canonical_size
self.start_level = start_level
self.end_level = end_level
def __call__(self, feat, rois, rois_nums): def __call__(self, feats, rois, spatial_scale):
roi, rois_num = rois
cur_l = 0 cur_l = 0
new_nums = [cur_l] if self.start_level == self.end_level:
rois_nums_np = rois_nums.numpy()
for l in rois_nums_np:
cur_l += l
new_nums.append(cur_l)
nums_t = to_variable(np.asarray(new_nums))
if self.extractor_type == 'RoIAlign':
rois_feat = fluid.layers.roi_align( rois_feat = fluid.layers.roi_align(
feat, feats[self.start_level],
rois, roi,
self.resolution[0], self.resolution,
self.resolution[1], self.resolution,
self.spatial_scale, spatial_scale,
rois_lod=nums_t) rois_num=rois_num)
elif self.extractor_type == 'RoIPool': return rois_feat
rois_feat = fluid.layers.roi_pool( offset = 2
feat, k_min = self.start_level + offset
rois, k_max = self.end_level + offset
self.resolution[0], rois_dist, restore_index, rois_num_dist = fluid.layers.distribute_fpn_proposals(
self.resolution[1], roi,
self.spatial_scale, k_min,
rois_lod=nums_t) k_max,
self.canconical_level,
self.canonical_size,
rois_num=rois_num)
rois_feat_list = []
for lvl in range(self.start_level, self.end_level + 1):
roi_feat = fluid.layers.roi_align(
feats[lvl],
rois_dist[lvl],
self.resolution,
self.resolution,
spatial_scale[lvl],
sampling_ratio=self.sampling_ratio,
rois_num=rois_num_dist[lvl])
rois_feat_list.append(roi_feat)
rois_feat_shuffle = fluid.layers.concat(rois_feat_list)
rois_feat = fluid.layers.gather(rois_feat_shuffle, restore_index)
return rois_feat return rois_feat
...@@ -333,11 +339,13 @@ class DecodeClipNms(object): ...@@ -333,11 +339,13 @@ class DecodeClipNms(object):
self.score_threshold = score_threshold self.score_threshold = score_threshold
self.nms_threshold = nms_threshold self.nms_threshold = nms_threshold
def __call__(self, bbox, bbox_prob, bbox_delta, img_info): def __call__(self, bboxes, bbox_prob, bbox_delta, im_info):
outs = bbox_post_process(bbox.numpy(), bboxes_np = (i.numpy() for i in bboxes)
# bbox, bbox_num
outs = bbox_post_process(bboxes_np,
bbox_prob.numpy(), bbox_prob.numpy(),
bbox_delta.numpy(), bbox_delta.numpy(),
img_info.numpy(), self.keep_top_k, im_info.numpy(), self.keep_top_k,
self.score_threshold, self.nms_threshold, self.score_threshold, self.nms_threshold,
self.num_classes) self.num_classes)
outs = [to_variable(v) for v in outs] outs = [to_variable(v) for v in outs]
......
...@@ -126,12 +126,11 @@ def bbox_overlaps(bboxes1, bboxes2): ...@@ -126,12 +126,11 @@ def bbox_overlaps(bboxes1, bboxes2):
def nms(dets, thresh): def nms(dets, thresh):
if dets.shape[0] == 0: if dets.shape[0] == 0:
return [] return []
x1 = dets[:, 0] scores = dets[:, 0]
y1 = dets[:, 1] x1 = dets[:, 1]
x2 = dets[:, 2] y1 = dets[:, 2]
y2 = dets[:, 3] x2 = dets[:, 3]
scores = dets[:, 4] y2 = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1) areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1] order = scores.argsort()[::-1]
...@@ -242,13 +241,13 @@ def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights): ...@@ -242,13 +241,13 @@ def compute_bbox_targets(bboxes1, bboxes2, labels, bbox_reg_weights):
np.float32, copy=False) np.float32, copy=False)
@jit #@jit
def expand_bbox_targets(bbox_targets_input, def expand_bbox_targets(bbox_targets_input,
class_nums=81, class_nums=81,
is_cls_agnostic=False): is_cls_agnostic=False):
class_labels = bbox_targets_input[:, 0] class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0] fg_inds = np.where(class_labels > 0)[0]
if not is_cls_agnostic: if is_cls_agnostic:
class_nums = 2 class_nums = 2
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums)) bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums))
bbox_inside_weights = np.zeros(bbox_targets.shape) bbox_inside_weights = np.zeros(bbox_targets.shape)
......
...@@ -180,7 +180,7 @@ def polys_to_mask_wrt_box(polygons, box, M): ...@@ -180,7 +180,7 @@ def polys_to_mask_wrt_box(polygons, box, M):
return mask return mask
@jit #@jit
def expand_mask_targets(masks, mask_class_labels, resolution, num_classes): def expand_mask_targets(masks, mask_class_labels, resolution, num_classes):
"""Expand masks from shape (#masks, resolution ** 2) """Expand masks from shape (#masks, resolution ** 2)
to (#masks, #classes * resolution ** 2) to encode class to (#masks, #classes * resolution ** 2) to encode class
......
...@@ -3,44 +3,45 @@ import os ...@@ -3,44 +3,45 @@ import os
import numpy as np import numpy as np
from numba import jit from numba import jit
from .bbox import delta2bbox, clip_bbox, expand_bbox, nms from .bbox import delta2bbox, clip_bbox, expand_bbox, nms
import pycocotools.mask as mask_util
import cv2
def bbox_post_process(bboxes, def bbox_post_process(bboxes,
bbox_nums, bbox_prob,
bbox_probs,
bbox_deltas, bbox_deltas,
im_info, im_info,
keep_top_k=100, keep_top_k=100,
score_thresh=0.05, score_thresh=0.05,
nms_thresh=0.5, nms_thresh=0.5,
class_nums=81, class_nums=81,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2]): bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
with_background=True):
new_bboxes = [[] for _ in range(len(bbox_nums))] bbox, bbox_num = bboxes
new_bbox_nums = [0] new_bbox = [[] for _ in range(len(bbox_num))]
new_bbox_num = []
st_num = 0 st_num = 0
end_num = 0 end_num = 0
for i in range(len(bbox_nums)): for i in range(len(bbox_num)):
bbox_num = bbox_nums[i] box_num = bbox_num[i]
end_num += bbox_num end_num += box_num
bbox = bboxes[st_num:end_num, :] # bbox boxes = bbox[st_num:end_num, :] # bbox
bbox = bbox / im_info[i][2] # scale boxes = boxes / im_info[i][2] # scale
bbox_delta = bbox_deltas[st_num:end_num, :] # bbox delta bbox_delta = bbox_deltas[st_num:end_num, :, :] # bbox delta
bbox_delta = np.reshape(bbox_delta, (box_num, -1))
# step1: decode # step1: decode
bbox = delta2bbox(bbox_delta, bbox, bbox_reg_weights) boxes = delta2bbox(bbox_delta, boxes, bbox_reg_weights)
# step2: clip # step2: clip
bbox = clip_bbox(bbox, im_info[i][:2] / im_info[i][2]) boxes = clip_bbox(boxes, im_info[i][:2] / im_info[i][2])
# step3: nms # step3: nms
cls_boxes = [[] for _ in range(class_nums)] cls_boxes = [[] for _ in range(class_nums)]
scores_n = bbox_probs[st_num:end_num, :] scores_n = bbox_prob[st_num:end_num, :]
for j in range(1, class_nums): for j in range(with_background, class_nums):
inds = np.where(scores_n[:, j] > score_thresh)[0] inds = np.where(scores_n[:, j] > score_thresh)[0]
scores_j = scores_n[inds, j] scores_j = scores_n[inds, j]
rois_j = bbox[inds, j * 4:(j + 1) * 4] rois_j = boxes[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False) np.float32, copy=False)
keep = nms(dets_j, nms_thresh) keep = nms(dets_j, nms_thresh)
...@@ -51,32 +52,34 @@ def bbox_post_process(bboxes, ...@@ -51,32 +52,34 @@ def bbox_post_process(bboxes,
np.float32, copy=False) np.float32, copy=False)
cls_boxes[j] = nms_dets cls_boxes[j] = nms_dets
st_num += bbox_num st_num += box_num
# Limit to max_per_image detections **over all classes** # Limit to max_per_image detections **over all classes**
image_scores = np.hstack( image_scores = np.hstack(
[cls_boxes[j][:, 1] for j in range(1, class_nums)]) [cls_boxes[j][:, 1] for j in range(with_background, class_nums)])
if len(image_scores) > keep_top_k: if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k] image_thresh = np.sort(image_scores)[-keep_top_k]
for j in range(1, class_nums): for j in range(with_background, class_nums):
keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :] cls_boxes[j] = cls_boxes[j][keep, :]
new_bboxes_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) new_bbox_n = np.vstack(
new_bboxes[i] = new_bboxes_n [cls_boxes[j] for j in range(with_background, class_nums)])
new_bbox_nums.append(len(new_bboxes_n)) new_bbox[i] = new_bbox_n
labels = new_bboxes_n[:, 0] new_bbox_num.append(len(new_bbox_n))
scores = new_bboxes_n[:, 1] new_bbox = np.vstack([new_bbox[k] for k in range(len(bbox_num))])
boxes = new_bboxes_n[:, 2:] new_bbox_num = np.array(new_bbox_num).astype('int32')
new_bboxes = np.vstack([new_bboxes[k] for k in range(len(bbox_nums) - 1)]) return new_bbox, new_bbox_num
new_bbox_nums = np.array(new_bbox_nums)
return new_bbox_nums, new_bboxes
@jit @jit
def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): def mask_post_process(bboxes, masks, im_info, resolution=14, binary_thresh=0.5):
scale = (resolution + 2.0) / resolution if masks.shape[0] == 0:
boxes = bboxes[:, 2:] return masks
labels = bboxes[:, 0] bbox, bbox_nums = bboxes
M = resolution
scale = (M + 2.0) / M
boxes = bbox[:, 2:]
labels = bbox[:, 0]
segms_results = [[] for _ in range(len(bbox_nums))] segms_results = [[] for _ in range(len(bbox_nums))]
sum = 0 sum = 0
st_num = 0 st_num = 0
...@@ -92,7 +95,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): ...@@ -92,7 +95,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
im_h = int(round(im_info[i][0] / im_info[i][2])) im_h = int(round(im_info[i][0] / im_info[i][2]))
im_w = int(round(im_info[i][1] / im_info[i][2])) im_w = int(round(im_info[i][1] / im_info[i][2]))
boxes_n = expand_boxes(boxes_n, scale) boxes_n = expand_bbox(boxes_n, scale)
boxes_n = boxes_n.astype(np.int32) boxes_n = boxes_n.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for j in range(len(boxes_n)): for j in range(len(boxes_n)):
...@@ -106,7 +109,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): ...@@ -106,7 +109,7 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
h = np.maximum(h, 1) h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h)) mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > cfg.mrcnn_thresh_binarize, dtype=np.uint8) mask = np.array(mask > binary_thresh, dtype=np.uint8)
im_mask = np.zeros((im_h, im_w), dtype=np.uint8) im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
x_0 = max(ref_box[0], 0) x_0 = max(ref_box[0], 0)
...@@ -121,20 +124,18 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14): ...@@ -121,20 +124,18 @@ def mask_post_process(bboxes, bbox_nums, masks, im_info, resolution=14):
im_mask[:, :, np.newaxis], order='F'))[0] im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms.append(rle) cls_segms.append(rle)
segms_results[i] = np.array(cls_segms)[:, np.newaxis] segms_results[i] = np.array(cls_segms)[:, np.newaxis]
st_num += bbox_num
segms_results = np.vstack([segms_results[k] for k in range(len(bbox_nums))]) segms_results = np.vstack([segms_results[k] for k in range(len(bbox_nums))])
bboxes = np.hstack([segms_results, bboxes]) bboxes = np.hstack([segms_results, bbox])
return bboxes[:, :3] return bboxes[:, :3]
@jit @jit
def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map, def get_det_res(bboxes, bbox_nums, image_id, num_id_to_cat_id_map):
batch_size=1):
det_res = [] det_res = []
k = 0 k = 0
for i in range(len(bbox_nums)): for i in range(len(bbox_nums)):
image_id = int(image_id[i][0]) image_id = int(image_id[i][0])
image_width = int(image_shape[i][1])
image_height = int(image_shape[i][2])
det_nums = bbox_nums[i] det_nums = bbox_nums[i]
for j in range(det_nums): for j in range(det_nums):
......
...@@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors, ...@@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors,
@jit @jit
def label_anchor(anchors, gt_boxes): def label_anchor(anchors, gt_boxes):
iou = compute_iou(anchors, gt_boxes) iou = bbox_overlaps(anchors, gt_boxes)
# every gt's anchor's index # every gt's anchor's index
gt_bbox_anchor_inds = iou.argmax(axis=0) gt_bbox_anchor_inds = iou.argmax(axis=0)
...@@ -150,7 +150,7 @@ def sample_anchor(anchor_gt_bbox_iou, ...@@ -150,7 +150,7 @@ def sample_anchor(anchor_gt_bbox_iou,
@jit @jit
def generate_proposal_target(rpn_rois, def generate_proposal_target(rpn_rois,
rpn_rois_nums, rpn_rois_num,
gt_classes, gt_classes,
is_crowd, is_crowd,
gt_boxes, gt_boxes,
...@@ -171,12 +171,12 @@ def generate_proposal_target(rpn_rois, ...@@ -171,12 +171,12 @@ def generate_proposal_target(rpn_rois,
tgt_deltas = [] tgt_deltas = []
rois_inside_weights = [] rois_inside_weights = []
rois_outside_weights = [] rois_outside_weights = []
rois_nums = [] new_rois_num = []
st_num = 0 st_num = 0
end_num = 0 end_num = 0
for im_i in range(len(rpn_rois_nums)): for im_i in range(len(rpn_rois_num)):
rpn_rois_num = rpn_rois_nums[im_i] length = rpn_rois_num[im_i]
end_num += rpn_rois_num end_num += length
rpn_roi = rpn_rois[st_num:end_num] rpn_roi = rpn_rois[st_num:end_num]
im_scale = im_info[im_i][2] im_scale = im_info[im_i][2]
...@@ -220,10 +220,10 @@ def generate_proposal_target(rpn_rois, ...@@ -220,10 +220,10 @@ def generate_proposal_target(rpn_rois,
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
roi = sampled_boxes * im_scale roi = sampled_boxes * im_scale
st_num += rpn_rois_num st_num += length
rois.append(roi) rois.append(roi)
rois_nums.append(roi.shape[0]) new_rois_num.append(roi.shape[0])
tgt_labels.append(sampled_labels) tgt_labels.append(sampled_labels)
tgt_deltas.append(sampled_deltas) tgt_deltas.append(sampled_deltas)
rois_inside_weights.append(bbox_inside_weights) rois_inside_weights.append(bbox_inside_weights)
...@@ -237,9 +237,8 @@ def generate_proposal_target(rpn_rois, ...@@ -237,9 +237,8 @@ def generate_proposal_target(rpn_rois,
rois_inside_weights, axis=0).astype(np.float32) rois_inside_weights, axis=0).astype(np.float32)
rois_outside_weights = np.concatenate( rois_outside_weights = np.concatenate(
rois_outside_weights, axis=0).astype(np.float32) rois_outside_weights, axis=0).astype(np.float32)
rois_nums = np.asarray(rois_nums, np.int32) new_rois_num = np.asarray(new_rois_num, np.int32)
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, new_rois_num
return rois, tgt_labels, tgt_deltas, rois_inside_weights, rois_outside_weights, rois_nums
@jit @jit
...@@ -250,7 +249,7 @@ def label_bbox(boxes, ...@@ -250,7 +249,7 @@ def label_bbox(boxes,
class_nums=81, class_nums=81,
is_cascade_rcnn=False): is_cascade_rcnn=False):
iou = compute_iou(boxes, gt_boxes) iou = bbox_overlaps(boxes, gt_boxes)
# every roi's gt box's index # every roi's gt box's index
roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32)
...@@ -318,15 +317,16 @@ def sample_bbox(roi_gt_bbox_iou, ...@@ -318,15 +317,16 @@ def sample_bbox(roi_gt_bbox_iou,
@jit @jit
def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois, def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois,
rois_nums, labels_int32, num_classes, resolution): rois_num, labels_int32, num_classes, resolution):
mask_rois = [] mask_rois = []
mask_rois_num = []
rois_has_mask_int32 = [] rois_has_mask_int32 = []
mask_int32 = [] mask_int32 = []
st_num = 0 st_num = 0
end_num = 0 end_num = 0
for k in range(len(rois_nums)): for k in range(len(rois_num)):
rois_num = rois_nums[k] length = rois_num[k]
end_num += rois_num end_num += length
# remove padding # remove padding
gt_polys = gt_segms[k] gt_polys = gt_segms[k]
...@@ -345,37 +345,32 @@ def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois, ...@@ -345,37 +345,32 @@ def generate_mask_target(im_info, gt_classes, is_crowd, gt_segms, rois,
if len(new_poly) > 0: if len(new_poly) > 0:
gt_segs.append(new_poly) gt_segs.append(new_poly)
new_gt_polys.append(gt_segs) new_gt_polys.append(gt_segs)
im_scale = im_info[k][2] im_scale = im_info[k][2]
boxes = rois[st_num:end_num] / im_scale boxes = rois[st_num:end_num] / im_scale
bbox_fg, bbox_has_mask, masks = sample_mask( bbox_fg, bbox_has_mask, masks = sample_mask(
boxes, new_gt_polys, labels_int32[st_num:rois_num], gt_classes[k], boxes, new_gt_polys, labels_int32[st_num:end_num], gt_classes[k],
is_crowd[k], num_classes, resolution) is_crowd[k], num_classes, resolution)
st_num += rois_num st_num += length
mask_rois.append(bbox_fg * im_scale) mask_rois.append(bbox_fg * im_scale)
mask_rois_num.append(len(bbox_fg))
rois_has_mask_int32.append(bbox_has_mask) rois_has_mask_int32.append(bbox_has_mask)
mask_int32.append(masks) mask_int32.append(masks)
mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32) mask_rois = np.concatenate(mask_rois, axis=0).astype(np.float32)
mask_rois_num = np.array(mask_rois_num).astype(np.int32)
rois_has_mask_int32 = np.concatenate( rois_has_mask_int32 = np.concatenate(
rois_has_mask_int32, axis=0).astype(np.int32) rois_has_mask_int32, axis=0).astype(np.int32)
mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32) mask_int32 = np.concatenate(mask_int32, axis=0).astype(np.int32)
return mask_rois, rois_has_mask_int32, mask_int32 return mask_rois, mask_rois_num, rois_has_mask_int32, mask_int32
@jit @jit
def sample_mask( def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes,
boxes, resolution):
gt_polys,
label_int32,
gt_classes,
is_crowd,
num_classes,
resolution, ):
gt_polys_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0] gt_polys_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0]
_gt_polys = [gt_polys[i] for i in gt_polys_inds] _gt_polys = [gt_polys[i] for i in gt_polys_inds]
...@@ -405,7 +400,5 @@ def sample_mask( ...@@ -405,7 +400,5 @@ def sample_mask(
masks_fg = -np.ones((1, resolution**2), dtype=np.int32) masks_fg = -np.ones((1, resolution**2), dtype=np.int32)
labels_fg = np.zeros((1, )) labels_fg = np.zeros((1, ))
bbox_has_mask = np.append(bbox_has_mask, 0) bbox_has_mask = np.append(bbox_has_mask, 0)
masks = expand_mask_targets(masks_fg, labels_fg, resolution, num_classes) masks = expand_mask_targets(masks_fg, labels_fg, resolution, num_classes)
return bbox_fg, bbox_has_mask, masks return bbox_fg, bbox_has_mask, masks
...@@ -45,37 +45,46 @@ def get_ckpt_path(path): ...@@ -45,37 +45,46 @@ def get_ckpt_path(path):
def load_dygraph_ckpt(model, def load_dygraph_ckpt(model,
optimizer, optimizer=None,
pretrain_ckpt=None, pretrain_ckpt=None,
ckpt=None, ckpt=None,
ckpt_type='pretrain', ckpt_type=None,
exclude_params=[], exclude_params=[],
open_debug=False): load_static_weights=False):
if ckpt_type == 'pretrain': assert ckpt_type in ['pretrain', 'resume', 'finetune', None]
if ckpt_type == 'pretrain' and ckpt is None:
ckpt = pretrain_ckpt ckpt = pretrain_ckpt
ckpt = get_ckpt_path(ckpt) ckpt = get_ckpt_path(ckpt)
if ckpt is not None and os.path.exists(ckpt): assert os.path.exists(ckpt), "Path {} does not exist.".format(ckpt)
param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt) if load_static_weights:
if open_debug: pre_state_dict = fluid.load_program_state(ckpt)
print("Loading Weights: ", param_state_dict.keys()) param_state_dict = {}
model_dict = model.state_dict()
for key in model_dict.keys():
weight_name = model_dict[key].name
if weight_name in pre_state_dict.keys():
print('Load weight: {}, shape: {}'.format(
weight_name, pre_state_dict[weight_name].shape))
param_state_dict[key] = pre_state_dict[weight_name]
else:
param_state_dict[key] = model_dict[key]
model.set_dict(param_state_dict)
return model
param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt)
if len(exclude_params) != 0: if len(exclude_params) != 0:
for k in exclude_params: for k in exclude_params:
param_state_dict.pop(k, None) param_state_dict.pop(k, None)
if ckpt_type == 'pretrain': if ckpt_type == 'pretrain':
model.backbone.set_dict(param_state_dict) model.backbone.set_dict(param_state_dict)
elif ckpt_type == 'finetune': else:
model.set_dict(param_state_dict, use_structured_name=True) model.set_dict(param_state_dict)
else:
model.set_dict(param_state_dict)
if ckpt_type == 'resume': if ckpt_type == 'resume':
if optim_state_dict is None: assert optim_state_dict, "Can't Resume Last Training's Optimizer State!!!"
print("Can't Resume Last Training's Optimizer State!!!") optimizer.set_dict(optim_state_dict)
else:
optimizer.set_dict(optim_state_dict)
return model return model
......
...@@ -28,10 +28,7 @@ def json_eval_results(metric, json_directory=None, dataset=None): ...@@ -28,10 +28,7 @@ def json_eval_results(metric, json_directory=None, dataset=None):
logger.info("{} not exists!".format(v_json)) logger.info("{} not exists!".format(v_json))
def coco_eval_results(outs_res=None, def coco_eval_results(outs_res=None, include_mask=False, dataset=None):
include_mask=False,
batch_size=1,
dataset=None):
print("start evaluate bbox using coco api") print("start evaluate bbox using coco api")
import io import io
import six import six
...@@ -49,14 +46,14 @@ def coco_eval_results(outs_res=None, ...@@ -49,14 +46,14 @@ def coco_eval_results(outs_res=None,
if outs_res is not None and len(outs_res) > 0: if outs_res is not None and len(outs_res) > 0:
det_res = [] det_res = []
for outs in outs_res: for outs in outs_res:
det_res += get_det_res(outs['bbox_nums'], outs['bbox'], det_res += get_det_res(outs['bbox'], outs['bbox_num'],
outs['im_id'], catid, batch_size) outs['im_id'], catid)
with io.open("bbox_eval.json", 'w') as outfile: with io.open("bbox.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(det_res))) outfile.write(encode_func(json.dumps(det_res)))
cocoDt = cocoGt.loadRes("bbox_eval.json") cocoDt = cocoGt.loadRes("bbox.json")
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.evaluate() cocoEval.evaluate()
cocoEval.accumulate() cocoEval.accumulate()
...@@ -65,14 +62,15 @@ def coco_eval_results(outs_res=None, ...@@ -65,14 +62,15 @@ def coco_eval_results(outs_res=None,
if outs_res is not None and len(outs_res) > 0 and include_mask: if outs_res is not None and len(outs_res) > 0 and include_mask:
seg_res = [] seg_res = []
for outs in outs_res: for outs in outs_res:
seg_res += get_seg_res(outs['bbox_nums'], outs['mask'], seg_res += get_seg_res(outs['mask'], outs['bbox_num'],
outs['im_id'], catid, batch_size) outs['im_id'], catid)
with io.open("mask_eval.json", 'w') as outfile: with io.open("mask.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(seg_res))) outfile.write(encode_func(json.dumps(seg_res)))
cocoSg = cocoGt.loadRes("mask_eval.json") cocoSg = cocoGt.loadRes("mask.json")
cocoEval = COCOeval(cocoGt, cocoSg, 'bbox') cocoEval = COCOeval(cocoGt, cocoSg, 'segm')
cocoEval.evaluate() cocoEval.evaluate()
cocoEval.accumulate() cocoEval.accumulate()
cocoEval.summarize()
...@@ -47,7 +47,7 @@ class TrainingStats(object): ...@@ -47,7 +47,7 @@ class TrainingStats(object):
def update(self, stats): def update(self, stats):
for k, v in self.smoothed_losses_and_metrics.items(): for k, v in self.smoothed_losses_and_metrics.items():
v.add_value(stats[k]) v.add_value(stats[k].numpy())
def get(self, extras=None): def get(self, extras=None):
stats = collections.OrderedDict() stats = collections.OrderedDict()
......
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os, sys
# add python path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
if parent_path not in sys.path:
sys.path.append(parent_path)
import time import time
# ignore numba warning # ignore numba warning
import warnings import warnings
...@@ -14,6 +19,7 @@ from ppdet.utils.check import check_gpu, check_version, check_config ...@@ -14,6 +19,7 @@ from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser from ppdet.utils.cli import ArgsParser
from ppdet.utils.eval_utils import coco_eval_results from ppdet.utils.eval_utils import coco_eval_results
from ppdet.data.reader import create_reader from ppdet.data.reader import create_reader
from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt
def parse_args(): def parse_args():
...@@ -38,11 +44,10 @@ def run(FLAGS, cfg): ...@@ -38,11 +44,10 @@ def run(FLAGS, cfg):
# Model # Model
main_arch = cfg.architecture main_arch = cfg.architecture
model = create(cfg.architecture, mode='infer', open_debug=cfg.open_debug) model = create(cfg.architecture)
# Init Model # Init Model
param_state_dict = fluid.dygraph.load_dygraph(cfg.weights)[0] model = load_dygraph_ckpt(model, ckpt=cfg.weights)
model.set_dict(param_state_dict)
# Data Reader # Data Reader
if FLAGS.use_gpu: if FLAGS.use_gpu:
...@@ -58,7 +63,7 @@ def run(FLAGS, cfg): ...@@ -58,7 +63,7 @@ def run(FLAGS, cfg):
# forward # forward
model.eval() model.eval()
outs = model(data, cfg['EvalReader']['inputs_def']['fields']) outs = model(data, cfg['EvalReader']['inputs_def']['fields'], 'infer')
outs_res.append(outs) outs_res.append(outs)
# log # log
...@@ -68,7 +73,7 @@ def run(FLAGS, cfg): ...@@ -68,7 +73,7 @@ def run(FLAGS, cfg):
# Metric # Metric
coco_eval_results( coco_eval_results(
outs_res, outs_res,
include_mask=True if 'MaskHed' in cfg else False, include_mask=True if 'MaskHead' in cfg else False,
dataset=cfg['EvalReader']['dataset']) dataset=cfg['EvalReader']['dataset'])
......
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os, sys
# add python path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
if parent_path not in sys.path:
sys.path.append(parent_path)
import time import time
# ignore numba warning # ignore numba warning
import warnings import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
import random import random
import datetime
import numpy as np import numpy as np
from collections import deque
import paddle.fluid as fluid import paddle.fluid as fluid
from ppdet.core.workspace import load_config, merge_config, create from ppdet.core.workspace import load_config, merge_config, create
from ppdet.data.reader import create_reader from ppdet.data.reader import create_reader
from ppdet.utils.stats import TrainingStats
from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser from ppdet.utils.cli import ArgsParser
from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt
from paddle.fluid.dygraph.parallel import ParallelEnv
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
def parse_args(): def parse_args():
...@@ -24,7 +37,6 @@ def parse_args(): ...@@ -24,7 +37,6 @@ def parse_args():
type=str, type=str,
help="Loading Checkpoints only support 'pretrain', 'finetune', 'resume'." help="Loading Checkpoints only support 'pretrain', 'finetune', 'resume'."
) )
parser.add_argument( parser.add_argument(
"--fp16", "--fp16",
action='store_true', action='store_true',
...@@ -63,11 +75,6 @@ def parse_args(): ...@@ -63,11 +75,6 @@ def parse_args():
"This flag is only used for internal test.") "This flag is only used for internal test.")
parser.add_argument( parser.add_argument(
"--use_gpu", action='store_true', default=False, help="data parallel") "--use_gpu", action='store_true', default=False, help="data parallel")
parser.add_argument(
"--use_parallel",
action='store_true',
default=False,
help="data parallel")
parser.add_argument( parser.add_argument(
'--is_profiler', '--is_profiler',
...@@ -88,13 +95,13 @@ def run(FLAGS, cfg): ...@@ -88,13 +95,13 @@ def run(FLAGS, cfg):
random.seed(local_seed) random.seed(local_seed)
np.random.seed(local_seed) np.random.seed(local_seed)
if FLAGS.enable_ce or cfg.open_debug: if FLAGS.enable_ce:
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
# Model # Model
main_arch = cfg.architecture main_arch = cfg.architecture
model = create(cfg.architecture, mode='train', open_debug=cfg.open_debug) model = create(cfg.architecture)
# Optimizer # Optimizer
lr = create('LearningRate')() lr = create('LearningRate')()
...@@ -105,12 +112,11 @@ def run(FLAGS, cfg): ...@@ -105,12 +112,11 @@ def run(FLAGS, cfg):
model, model,
optimizer, optimizer,
cfg.pretrain_weights, cfg.pretrain_weights,
cfg.weights, ckpt_type=FLAGS.ckpt_type,
FLAGS.ckpt_type, load_static_weights=cfg.load_static_weights)
open_debug=cfg.open_debug)
# Parallel Model # Parallel Model
if FLAGS.use_parallel: if ParallelEnv().nranks > 1:
strategy = fluid.dygraph.parallel.prepare_context() strategy = fluid.dygraph.parallel.prepare_context()
model = fluid.dygraph.parallel.DataParallel(model, strategy) model = fluid.dygraph.parallel.DataParallel(model, strategy)
...@@ -122,21 +128,29 @@ def run(FLAGS, cfg): ...@@ -122,21 +128,29 @@ def run(FLAGS, cfg):
devices_num = int(os.environ.get('CPU_NUM', 1)) devices_num = int(os.environ.get('CPU_NUM', 1))
train_reader = create_reader( train_reader = create_reader(
cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg.TrainReader, (cfg.max_iters - start_iter), cfg, devices_num=1)
cfg,
devices_num=devices_num)
time_stat = deque(maxlen=cfg.log_smooth_window)
start_time = time.time()
end_time = time.time()
# Run Train # Run Train
for iter_id, data in enumerate(train_reader()): for iter_id, data in enumerate(train_reader()):
start_time = time.time()
start_time = end_time
end_time = time.time()
time_stat.append(end_time - start_time)
time_cost = np.mean(time_stat)
eta_sec = (cfg.max_iters - iter_id) * time_cost
eta = str(datetime.timedelta(seconds=int(eta_sec)))
# Model Forward # Model Forward
model.train() model.train()
outputs = model(data, cfg['TrainReader']['inputs_def']['fields']) outputs = model(data, cfg['TrainReader']['inputs_def']['fields'],
'train')
# Model Backward # Model Backward
loss = outputs['loss'] loss = outputs['loss']
if FLAGS.use_parallel: if ParallelEnv().nranks > 1:
loss = model.scale_loss(loss) loss = model.scale_loss(loss)
loss.backward() loss.backward()
model.apply_collective_grads() model.apply_collective_grads()
...@@ -144,30 +158,27 @@ def run(FLAGS, cfg): ...@@ -144,30 +158,27 @@ def run(FLAGS, cfg):
loss.backward() loss.backward()
optimizer.minimize(loss) optimizer.minimize(loss)
model.clear_gradients() model.clear_gradients()
# Log state
cost_time = time.time() - start_time
# TODO: check this method
curr_lr = optimizer.current_step_lr() curr_lr = optimizer.current_step_lr()
log_info = "iter: {}, time: {:.4f}, lr: {:.6f}".format(
iter_id, cost_time, curr_lr) if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0:
for k, v in outputs.items(): # Log state
log_info += ", {}: {:.6f}".format(k, v.numpy()[0]) if iter_id == 0:
print(log_info) train_stats = TrainingStats(cfg.log_smooth_window,
outputs.keys())
# Debug train_stats.update(outputs)
if cfg.open_debug and iter_id > 10: logs = train_stats.log()
break if iter_id % cfg.log_iter == 0:
strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
# Save Stage iter_id, curr_lr, logs, time_cost, eta)
if iter_id > 0 and iter_id % int( logger.info(strs)
cfg.snapshot_iter) == 0 and fluid.dygraph.parallel.Env( # Save Stage
).local_rank == 0: if iter_id > 0 and iter_id % int(
cfg_name = os.path.basename(FLAGS.config).split('.')[0] cfg.snapshot_iter) == 0 or iter_id == cfg.max_iters - 1:
save_name = str( cfg_name = os.path.basename(FLAGS.config).split('.')[0]
iter_id) if iter_id != cfg.max_iters - 1 else "model_final" save_name = str(
save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) iter_id) if iter_id != cfg.max_iters - 1 else "model_final"
save_dygraph_ckpt(model, optimizer, save_dir) save_dir = os.path.join(cfg.save_dir, cfg_name, save_name)
save_dygraph_ckpt(model, optimizer, save_dir)
def main(): def main():
...@@ -179,7 +190,7 @@ def main(): ...@@ -179,7 +190,7 @@ def main():
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_version() check_version()
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ place = fluid.CUDAPlace(ParallelEnv().dev_id) \
if cfg.use_gpu else fluid.CPUPlace() if cfg.use_gpu else fluid.CPUPlace()
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册