未验证 提交 0b4dd432 编写于 作者: G George Ni 提交者: GitHub

[MOT] JDE and DeepSORT: model (#2782)

* jde and deepsort: model
上级 30dda1ea
# OP docs may contains math formula which may cause
# DeprecationWarning in string parsing
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
warnings.filterwarnings(
action='ignore', category=DeprecationWarning, module='ops')
......@@ -13,6 +25,8 @@ from . import losses
from . import architectures
from . import post_process
from . import layers
from . import reid
from . import mot
from .ops import *
from .backbones import *
......@@ -23,3 +37,5 @@ from .losses import *
from .architectures import *
from .post_process import *
from .layers import *
from .reid import *
from .mot import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
from . import meta_arch
from . import faster_rcnn
from . import mask_rcnn
......@@ -17,6 +17,8 @@ from . import ttfnet
from . import s2anet
from . import keypoint_hrhrnet
from . import keypoint_hrnet
from . import jde
from . import deepsort
from .meta_arch import *
from .faster_rcnn import *
......@@ -30,3 +32,5 @@ from .ttfnet import *
from .s2anet import *
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
from .jde import *
from .deepsort import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
__all__ = ['DeepSORT']
@register
class DeepSORT(BaseArch):
"""
DeepSORT network, see https://arxiv.org/abs/1703.07402
Args:
detector (object): detector model instance
reid (object): reid model instance
tracker (object): tracker instance
"""
__category__ = 'architecture'
def __init__(self,
detector='YOLOv3',
reid='PCBPlusDropoutPyramid',
tracker='DeepSORTTracker'):
super(DeepSORT, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
@classmethod
def from_config(cls, cfg, *args, **kwargs):
if cfg['detector'] != 'None':
detector = create(cfg['detector'])
else:
detector = None
reid = create(cfg['reid'])
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
assert 'ori_image' in self.inputs
load_dets = 'pred_bboxes' in self.inputs and 'pred_scores' in self.inputs
ori_image = self.inputs['ori_image']
input_shape = self.inputs['image'].shape[2:]
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
if self.detector and not load_dets:
outs = self.detector(self.inputs)
if outs['bbox_num'] > 0:
pred_bboxes = scale_coords(outs['bbox'][:, 2:], input_shape,
im_shape, scale_factor)
pred_scores = outs['bbox'][:, 1:2]
else:
pred_bboxes = []
pred_scores = []
else:
pred_bboxes = self.inputs['pred_bboxes']
pred_scores = self.inputs['pred_scores']
if len(pred_bboxes) > 0:
pred_bboxes = clip_box(pred_bboxes, input_shape, im_shape,
scale_factor)
bbox_tlwh = paddle.concat(
(pred_bboxes[:, 0:2],
pred_bboxes[:, 2:4] - pred_bboxes[:, 0:2] + 1),
axis=1)
crops, pred_scores = get_crops(
pred_bboxes, ori_image, pred_scores, w=64, h=192)
if len(crops) > 0:
features = self.reid(paddle.to_tensor(crops))
detections = [Detection(bbox_tlwh[i], conf, features[i])\
for i, conf in enumerate(pred_scores)]
else:
detections = []
else:
detections = []
self.tracker.predict()
online_targets = self.tracker.update(detections)
return online_targets
def get_pred(self):
return self._forward()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.modeling.mot.utils import scale_coords
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['JDE']
@register
class JDE(BaseArch):
"""
JDE network, see https://arxiv.org/abs/1909.12605v1
Args:
detector (object): detector model instance
reid (object): reid model instance
tracker (object): tracker instance
test_mode (str): 'detection', 'embedding' or 'tracking'
"""
__category__ = 'architecture'
def __init__(self,
detector='YOLOv3',
reid='JDEEmbeddingHead',
tracker='JDETracker',
test_mode='detection'):
super(JDE, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
self.test_mode = test_mode
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
kwargs = {'input_shape': detector.neck.out_shape}
reid = create(cfg['reid'], **kwargs)
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
det_outs = self.detector(self.inputs)
if self.training:
emb_feats = det_outs['emb_feats']
loss_confs = det_outs['det_losses']['loss_confs']
loss_boxes = det_outs['det_losses']['loss_boxes']
jde_losses = self.reid(emb_feats, self.inputs, loss_confs,
loss_boxes)
return jde_losses
else:
if self.test_mode == 'detection':
det_results = {
'bbox': det_outs['bbox'],
'bbox_num': det_outs['bbox_num'],
}
return det_results
elif self.test_mode == 'embedding':
emb_feats = det_outs['emb_feats']
embs_and_gts = self.reid(emb_feats, self.inputs, test_emb=True)
return embs_and_gts
elif self.test_mode == 'tracking':
emb_feats = det_outs['emb_feats']
emb_outs = self.reid(emb_feats, self.inputs)
boxes_idx = det_outs['boxes_idx']
bbox = det_outs['bbox']
input_shape = self.inputs['image'].shape[2:]
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox[:, 2:] = scale_coords(bbox[:, 2:], input_shape, im_shape,
scale_factor)
nms_keep_idx = det_outs['nms_keep_idx']
pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1)
emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
online_targets = self.tracker.update(pred_dets, pred_embs)
return online_targets
else:
raise ValueError("Unknown test_mode {}.".format(self.test_mode))
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()
......@@ -19,7 +19,8 @@ class YOLOv3(BaseArch):
neck='YOLOv3FPN',
yolo_head='YOLOv3Head',
post_process='BBoxPostProcess',
data_format='NCHW'):
data_format='NCHW',
for_mot=False):
"""
YOLOv3 network, see https://arxiv.org/abs/1804.02767
......@@ -29,12 +30,14 @@ class YOLOv3(BaseArch):
yolo_head (nn.Layer): anchor_head instance
bbox_post_process (object): `BBoxPostProcess` instance
data_format (str): data format, NCHW or NHWC
for_mot (bool): whether return other features used in tracking model
"""
super(YOLOv3, self).__init__(data_format=data_format)
self.backbone = backbone
self.neck = neck
self.yolo_head = yolo_head
self.post_process = post_process
self.for_mot = for_mot
@classmethod
def from_config(cls, cfg, *args, **kwargs):
......@@ -57,21 +60,44 @@ class YOLOv3(BaseArch):
def _forward(self):
body_feats = self.backbone(self.inputs)
body_feats = self.neck(body_feats)
neck_feats = self.neck(body_feats, self.for_mot)
if isinstance(neck_feats, dict):
assert self.for_mot == True
emb_feats = neck_feats['emb_feats']
neck_feats = neck_feats['yolo_feats']
if self.training:
return self.yolo_head(body_feats, self.inputs)
yolo_losses = self.yolo_head(neck_feats, self.inputs)
if self.for_mot:
return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
else:
return yolo_losses
else:
yolo_head_outs = self.yolo_head(body_feats)
bbox, bbox_num = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
return bbox, bbox_num
yolo_head_outs = self.yolo_head(neck_feats)
if self.for_mot:
boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors)
output = {
'bbox': bbox,
'bbox_num': bbox_num,
'boxes_idx': boxes_idx,
'nms_keep_idx': nms_keep_idx,
'emb_feats': emb_feats,
}
else:
bbox, bbox_num = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
return self._forward()
......@@ -35,6 +35,7 @@ class ConvBNLayer(nn.Layer):
norm_type='bn',
norm_decay=0.,
act="leaky",
freeze_norm=False,
data_format='NCHW',
name=''):
"""
......@@ -50,6 +51,7 @@ class ConvBNLayer(nn.Layer):
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
act (str): activation function type, default 'leaky', which means leaky_relu
freeze_norm (bool): whether to freeze norm, default False
data_format (str): data format, NCHW or NHWC
"""
super(ConvBNLayer, self).__init__()
......@@ -67,6 +69,7 @@ class ConvBNLayer(nn.Layer):
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.act = act
......@@ -89,6 +92,7 @@ class DownSample(nn.Layer):
padding=1,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
data_format='NCHW'):
"""
downsample layer
......@@ -101,6 +105,7 @@ class DownSample(nn.Layer):
padding (int): padding size, default 1
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
freeze_norm (bool): whether to freeze norm, default False
data_format (str): data format, NCHW or NHWC
"""
......@@ -114,6 +119,7 @@ class DownSample(nn.Layer):
padding=padding,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.ch_out = ch_out
......@@ -128,6 +134,7 @@ class BasicBlock(nn.Layer):
ch_out,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
data_format='NCHW'):
"""
BasicBlock layer of DarkNet
......@@ -137,6 +144,7 @@ class BasicBlock(nn.Layer):
ch_out (int): output channel
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
freeze_norm (bool): whether to freeze norm, default False
data_format (str): data format, NCHW or NHWC
"""
......@@ -150,6 +158,7 @@ class BasicBlock(nn.Layer):
padding=0,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.conv2 = ConvBNLayer(
ch_in=ch_out,
......@@ -159,6 +168,7 @@ class BasicBlock(nn.Layer):
padding=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
def forward(self, inputs):
......@@ -175,6 +185,7 @@ class Blocks(nn.Layer):
count,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
name=None,
data_format='NCHW'):
"""
......@@ -186,6 +197,7 @@ class Blocks(nn.Layer):
count (int): number of BasicBlock layer
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
freeze_norm (bool): whether to freeze norm, default False
name (str): layer name
data_format (str): data format, NCHW or NHWC
"""
......@@ -196,6 +208,7 @@ class Blocks(nn.Layer):
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.res_out_list = []
for i in range(1, count):
......@@ -207,6 +220,7 @@ class Blocks(nn.Layer):
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format))
self.res_out_list.append(res_out)
self.ch_out = ch_out
......@@ -233,6 +247,7 @@ class DarkNet(nn.Layer):
num_stages=5,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
data_format='NCHW'):
"""
Darknet, see https://pjreddie.com/darknet/yolo/
......@@ -261,6 +276,7 @@ class DarkNet(nn.Layer):
padding=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.downsample0 = DownSample(
......@@ -268,6 +284,7 @@ class DarkNet(nn.Layer):
ch_out=32 * 2,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self._out_channels = []
......@@ -284,6 +301,7 @@ class DarkNet(nn.Layer):
stage,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format,
name=name))
self.darknet_conv_block_list.append(conv_block)
......@@ -298,6 +316,7 @@ class DarkNet(nn.Layer):
ch_out=32 * (2**(i + 2)),
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format))
self.downsample_list.append(downsample)
......
......@@ -836,6 +836,111 @@ class TTFBox(object):
return results, paddle.shape(results)[0:1]
@register
@serializable
class JDEBox(object):
__shared__ = ['num_classes']
def __init__(self, num_classes=1, conf_thresh=0.3, downsample_ratio=32):
self.num_classes = num_classes
self.conf_thresh = conf_thresh
self.downsample_ratio = downsample_ratio
def generate_anchor(self, nGh, nGw, anchor_wh):
nA = len(anchor_wh)
yv, xv = paddle.meshgrid([paddle.arange(nGh), paddle.arange(nGw)])
mesh = paddle.stack(
(xv, yv), axis=0).cast(dtype='float32') # 2 x nGh x nGw
meshs = paddle.tile(mesh, [nA, 1, 1, 1])
anchor_offset_mesh = anchor_wh[:, :, None][:, :, :, None].repeat(
int(nGh), axis=-2).repeat(
int(nGw), axis=-1)
anchor_offset_mesh = paddle.to_tensor(
anchor_offset_mesh.astype(np.float32))
# nA x 2 x nGh x nGw
anchor_mesh = paddle.concat([meshs, anchor_offset_mesh], axis=1)
anchor_mesh = paddle.transpose(anchor_mesh,
[0, 2, 3, 1]) # (nA x nGh x nGw) x 4
return anchor_mesh
def decode_delta(self, delta, fg_anchor_list):
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
fg_anchor_list[:, 2], fg_anchor_list[:,3]
dx, dy, dw, dh = delta[:, 0], delta[:, 1], delta[:, 2], delta[:, 3]
gx = pw * dx + px
gy = ph * dy + py
gw = pw * paddle.exp(dw)
gh = ph * paddle.exp(dh)
gx1 = gx - gw * 0.5
gy1 = gy - gh * 0.5
gx2 = gx + gw * 0.5
gy2 = gy + gh * 0.5
return paddle.stack([gx1, gy1, gx2, gy2], axis=1)
def decode_delta_map(self, delta_map, anchors):
delta_map_shape = paddle.shape(delta_map)
delta_map_shape.stop_gradient = True
nB, nA, nGh, nGw, _ = delta_map_shape[:]
anchor_mesh = self.generate_anchor(nGh, nGw, anchors)
# only support bs=1
anchor_mesh = paddle.unsqueeze(anchor_mesh, 0)
pred_list = self.decode_delta(
paddle.reshape(
delta_map, shape=[-1, 4]),
paddle.reshape(
anchor_mesh, shape=[-1, 4]))
pred_map = paddle.reshape(pred_list, shape=[nB, -1, 4])
return pred_map
def __call__(self, yolo_head_out, anchors):
bbox_pred_list = []
for i, head_out in enumerate(yolo_head_out):
stride = self.downsample_ratio // 2**i
anc_w, anc_h = anchors[i][0::2], anchors[i][1::2]
anchor_vec = np.stack((anc_w, anc_h), axis=1) / stride
nA = len(anc_w)
boxes_shape = paddle.shape(head_out)
boxes_shape.stop_gradient = True
nB, nGh, nGw = boxes_shape[0], boxes_shape[-2], boxes_shape[-1]
p = head_out.reshape((nB, nA, self.num_classes + 5, nGh, nGw))
p = paddle.transpose(p, perm=[0, 1, 3, 4, 2]) # [nB, 4, 19, 34, 6]
p_box = p[:, :, :, :, :4] # [nB, 4, 19, 34, 4]
boxes = self.decode_delta_map(p_box, anchor_vec) # [nB, 4*19*34, 4]
boxes = boxes * stride
p_conf = paddle.transpose(
p[:, :, :, :, 4:6], perm=[0, 4, 1, 2, 3]) # [nB, 2, 4, 19, 34]
p_conf = F.softmax(
p_conf,
axis=1)[:, 1, :, :, :].unsqueeze(-1) # [nB, 4, 19, 34, 1]
scores = paddle.reshape(p_conf, shape=[nB, -1, 1])
bbox_pred_list.append(paddle.concat([boxes, scores], axis=-1))
yolo_boxes_pred = paddle.concat(bbox_pred_list, axis=1)
boxes_idx = paddle.nonzero(yolo_boxes_pred[:, :, -1] > self.conf_thresh)
boxes_idx.stop_gradient = True
if boxes_idx.shape[0] == 0: # TODO: deploy
boxes_idx = paddle.to_tensor(np.array([[0]], dtype='int64'))
yolo_boxes_out = paddle.to_tensor(
np.array(
[[[0.0, 0.0, 0.0, 0.0]]], dtype='float32'))
yolo_scores_out = paddle.to_tensor(
np.array(
[[[0.0]]], dtype='float32'))
return boxes_idx, yolo_boxes_out, yolo_scores_out
yolo_boxes = paddle.gather_nd(yolo_boxes_pred, boxes_idx)
yolo_boxes_out = paddle.reshape(yolo_boxes[:, :4], shape=[nB, -1, 4])
yolo_scores_out = paddle.reshape(yolo_boxes[:, 4:5], shape=[nB, 1, -1])
boxes_idx = boxes_idx[:, 1:]
return boxes_idx, yolo_boxes_out, yolo_scores_out # [163], [1, 163, 4], [1, 1, 163]
@register
@serializable
class MaskMatrixNMS(object):
......
......@@ -20,6 +20,7 @@ from . import fcos_loss
from . import solov2_loss
from . import ctfocal_loss
from . import keypoint_loss
from . import jde_loss
from .yolo_loss import *
from .iou_aware_loss import *
......@@ -29,3 +30,4 @@ from .fcos_loss import *
from .solov2_loss import *
from .ctfocal_loss import *
from .keypoint_loss import *
from .jde_loss import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
__all__ = ['JDEDetectionLoss', 'JDEEmbeddingLoss', 'JDELoss']
@register
class JDEDetectionLoss(nn.Layer):
__shared__ = ['num_classes']
def __init__(self, num_classes=1):
super(JDEDetectionLoss, self).__init__()
self.num_classes = num_classes
def det_loss(self, p_det, anchor, t_conf, t_box):
pshape = paddle.shape(p_det)
pshape.stop_gradient = True
nB, nGh, nGw = pshape[0], pshape[-2], pshape[-1]
nA = len(anchor)
p_det = paddle.reshape(
p_det, [nB, nA, self.num_classes + 5, nGh, nGw]).transpose(
(0, 1, 3, 4, 2))
# 1. loss_conf: cross_entropy
p_conf = p_det[:, :, :, :, 4:6]
p_conf_flatten = paddle.reshape(p_conf, [-1, 2])
t_conf_flatten = t_conf.flatten()
t_conf_flatten = paddle.cast(t_conf_flatten, dtype="int64")
t_conf_flatten.stop_gradient = True
loss_conf = F.cross_entropy(
p_conf_flatten, t_conf_flatten, ignore_index=-1, reduction='mean')
loss_conf.stop_gradient = False
# 2. loss_box: smooth_l1_loss
p_box = p_det[:, :, :, :, :4]
p_box_flatten = paddle.reshape(p_box, [-1, 4])
t_box_flatten = paddle.reshape(t_box, [-1, 4])
fg_inds = paddle.nonzero(t_conf_flatten > 0).flatten()
if fg_inds.numel() > 0:
reg_delta = paddle.gather(p_box_flatten, fg_inds)
reg_target = paddle.gather(t_box_flatten, fg_inds)
else:
reg_delta = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
reg_delta.stop_gradient = False
reg_target = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
reg_target.stop_gradient = True
loss_box = F.smooth_l1_loss(
reg_delta, reg_target, reduction='mean', delta=1.0)
loss_box.stop_gradient = False
return loss_conf, loss_box
def forward(self, det_outs, targets, anchors):
"""
Args:
det_outs (list[Tensor]): output from detection head, each one
is a 4-D Tensor with shape [N, C, H, W].
targets (dict): contains 'im_id', 'gt_bbox', 'gt_ide', 'image',
'im_shape', 'scale_factor' and 'tbox', 'tconf', 'tide' of
each FPN level.
anchors (list[list]): anchor setting of JDE model, N row M col, N is
the anchor levels(FPN levels), M is the anchor scales each
level.
"""
assert len(det_outs) == len(anchors)
loss_confs = []
loss_boxes = []
for i, (p_det, anchor) in enumerate(zip(det_outs, anchors)):
t_conf = targets['tconf{}'.format(i)]
t_box = targets['tbox{}'.format(i)]
loss_conf, loss_box = self.det_loss(p_det, anchor, t_conf, t_box)
loss_confs.append(loss_conf)
loss_boxes.append(loss_box)
return {'loss_confs': loss_confs, 'loss_boxes': loss_boxes}
@register
class JDEEmbeddingLoss(nn.Layer):
def __init__(self, ):
super(JDEEmbeddingLoss, self).__init__()
self.phony = self.create_parameter(shape=[1], dtype="float32")
def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier):
emb_dim = p_ide.shape[1]
p_ide = p_ide.transpose((0, 2, 3, 1))
p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim])
mask = t_conf > 0
mask = paddle.cast(mask, dtype="int64")
mask.stop_gradient = True
emb_mask = mask.max(1).flatten()
emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
emb_mask_inds.stop_gradient = True
# use max(1) to decide the id, TODO: more reseanable strategy
t_ide_flatten = t_ide.max(1).flatten()
t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64")
valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten()
if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0:
# loss_ide = paddle.to_tensor([0]) # will be error in gradient backward
loss_ide = self.phony * 0 # todo
else:
embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
embedding = emb_scale * F.normalize(embedding)
logits = classifier(embedding)
ide_target = paddle.gather(t_ide_flatten, emb_mask_inds)
loss_ide = F.cross_entropy(
logits, ide_target, ignore_index=-1, reduction='mean')
loss_ide.stop_gradient = False
return loss_ide
def forward(self, ide_outs, targets, emb_scale, classifier):
loss_ides = []
for i, p_ide in enumerate(ide_outs):
t_conf = targets['tconf{}'.format(i)]
t_ide = targets['tide{}'.format(i)]
loss_ide = self.emb_loss(p_ide, t_conf, t_ide, emb_scale,
classifier)
loss_ides.append(loss_ide)
return loss_ides
@register
class JDELoss(nn.Layer):
def __init__(self):
super(JDELoss, self).__init__()
def forward(self, loss_confs, loss_boxes, loss_ides, loss_params_cls,
loss_params_reg, loss_params_ide, targets):
assert len(loss_confs) == len(loss_boxes) == len(loss_ides)
assert len(loss_params_cls) == len(loss_params_reg) == len(
loss_params_ide)
assert len(loss_confs) == len(loss_params_cls)
batchsize = targets['gt_bbox'].shape[0]
nTargets = paddle.nonzero(paddle.sum(targets['gt_bbox'], axis=2)).shape[
0] / batchsize
nTargets = paddle.to_tensor(nTargets, dtype='float32')
nTargets.stop_gradient = True
jde_losses = []
for i, (loss_conf, loss_box, loss_ide, l_conf_p, l_box_p,
l_ide_p) in enumerate(
zip(loss_confs, loss_boxes, loss_ides, loss_params_cls,
loss_params_reg, loss_params_ide)):
jde_loss = l_conf_p(loss_conf) + l_box_p(loss_box) + l_ide_p(
loss_ide)
jde_losses.append(jde_loss)
loss_all = {
"loss_conf": sum(loss_confs),
"loss_box": sum(loss_boxes),
"loss_ide": sum(loss_ides),
"loss": sum(jde_losses),
"nTargets": nTargets,
}
return loss_all
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import matching
from . import tracker
from . import motion
from . import visualization
from . import utils
from .matching import *
from .tracker import *
from .motion import *
from .visualization import *
from .utils import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_matching
from . import deepsort_matching
from .jde_matching import *
from .deepsort_matching import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/nwojke/deep_sort/tree/master/deep_sort
"""
import numpy as np
from scipy.optimize import linear_sum_assignment
from ..motion import kalman_filter
INFTY_COST = 1e+5
__all__ = [
'iou_1toN',
'iou_cost',
'_nn_euclidean_distance',
'_nn_cosine_distance',
'NearestNeighborDistanceMetric',
'min_cost_matching',
'matching_cascade',
'gate_cost_matrix',
]
def iou_1toN(bbox, candidates):
"""
Computer intersection over union (IoU) by one box to N candidates.
Args:
bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`.
candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the
same format as `bbox`.
Returns:
ious (ndarray): The intersection over union in [0, 1] between the `bbox`
and each candidate. A higher score means a larger fraction of the
`bbox` is occluded by the candidate.
"""
bbox_tl = bbox[:2]
bbox_br = bbox[:2] + bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, :2] + candidates[:, 2:]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = bbox[2:].prod()
area_candidates = candidates[:, 2:].prod(axis=1)
ious = area_intersection / (area_bbox + area_candidates - area_intersection)
return ious
def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
"""
IoU distance metric.
Args:
tracks (list[Track]): A list of tracks.
detections (list[Detection]): A list of detections.
track_indices (Optional[list[int]]): A list of indices to tracks that
should be matched. Defaults to all `tracks`.
detection_indices (Optional[list[int]]): A list of indices to detections
that should be matched. Defaults to all `detections`.
Returns:
cost_matrix (ndarray): A cost matrix of shape len(track_indices),
len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
for row, track_idx in enumerate(track_indices):
if tracks[track_idx].time_since_update > 1:
cost_matrix[row, :] = 1e+5
continue
bbox = tracks[track_idx].to_tlwh()
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates)
return cost_matrix
def _nn_euclidean_distance(s, q):
"""
Compute pair-wise squared (Euclidean) distance between points in `s` and `q`.
Args:
s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
Returns:
distances (ndarray): A vector of length M that contains for each entry in `q` the
smallest Euclidean distance to a sample in `s`.
"""
s, q = np.asarray(s), np.asarray(q)
if len(s) == 0 or len(q) == 0:
return np.zeros((len(s), len(q)))
s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1)
distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :]
distances = np.clip(distances, 0., float(np.inf))
return np.maximum(0.0, distances.min(axis=0))
def _nn_cosine_distance(s, q):
"""
Compute pair-wise cosine distance between points in `s` and `q`.
Args:
s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
Returns:
distances (ndarray): A vector of length M that contains for each entry in `q` the
smallest Euclidean distance to a sample in `s`.
"""
s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True)
q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True)
distances = 1. - np.dot(s, q.T)
return distances.min(axis=0)
class NearestNeighborDistanceMetric(object):
"""
A nearest neighbor distance metric that, for each target, returns
the closest distance to any sample that has been observed so far.
Args:
metric (str): Either "euclidean" or "cosine".
matching_threshold (float): The matching threshold. Samples with larger
distance are considered an invalid match.
budget (Optional[int]): If not None, fix samples per class to at most
this number. Removes the oldest samples when the budget is reached.
Attributes:
samples (Dict[int -> List[ndarray]]): A dictionary that maps from target
identities to the list of samples that have been observed so far.
"""
def __init__(self, metric, matching_threshold, budget=None):
if metric == "euclidean":
self._metric = _nn_euclidean_distance
elif metric == "cosine":
self._metric = _nn_cosine_distance
else:
raise ValueError(
"Invalid metric; must be either 'euclidean' or 'cosine'")
self.matching_threshold = matching_threshold
self.budget = budget
self.samples = {}
def partial_fit(self, features, targets, active_targets):
"""
Update the distance metric with new data.
Args:
features (ndarray): An NxM matrix of N features of dimensionality M.
targets (ndarray): An integer array of associated target identities.
active_targets (List[int]): A list of targets that are currently
present in the scene.
"""
for feature, target in zip(features, targets):
self.samples.setdefault(target, []).append(feature)
if self.budget is not None:
self.samples[target] = self.samples[target][-self.budget:]
self.samples = {k: self.samples[k] for k in active_targets}
def distance(self, features, targets):
"""
Compute distance between features and targets.
Args:
features (ndarray): An NxM matrix of N features of dimensionality M.
targets (list[int]): A list of targets to match the given `features` against.
Returns:
cost_matrix (ndarray): a cost matrix of shape len(targets), len(features),
where element (i, j) contains the closest squared distance between
`targets[i]` and `features[j]`.
"""
cost_matrix = np.zeros((len(targets), len(features)))
for i, target in enumerate(targets):
cost_matrix[i, :] = self._metric(self.samples[target], features)
return cost_matrix
def min_cost_matching(distance_metric,
max_distance,
tracks,
detections,
track_indices=None,
detection_indices=None):
"""
Solve linear assignment problem.
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as
well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
max_distance (float): Gating threshold. Associations with cost larger
than this value are disregarded.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (list[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
Returns:
A tuple (List[(int, int)], List[int], List[int]) with the following
three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(tracks, detections, track_indices,
detection_indices)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
indices = linear_sum_assignment(cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
for col, detection_idx in enumerate(detection_indices):
if col not in indices[1]:
unmatched_detections.append(detection_idx)
for row, track_idx in enumerate(track_indices):
if row not in indices[0]:
unmatched_tracks.append(track_idx)
for row, col in zip(indices[0], indices[1]):
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def matching_cascade(distance_metric,
max_distance,
cascade_depth,
tracks,
detections,
track_indices=None,
detection_indices=None):
"""
Run matching cascade.
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as
well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
max_distance (float): Gating threshold. Associations with cost larger
than this value are disregarded.
cascade_depth (int): The cascade depth, should be se to the maximum
track age.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (list[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
Returns:
A tuple (List[(int, int)], List[int], List[int]) with the following
three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [
k for k in track_indices if tracks[k].time_since_update == 1 + level
]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = \
min_cost_matching(
distance_metric, max_distance, tracks, detections,
track_indices_l, unmatched_detections)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
def gate_cost_matrix(kf,
cost_matrix,
tracks,
detections,
track_indices,
detection_indices,
gated_cost=INFTY_COST,
only_position=False):
"""
Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Args:
kf (object): The Kalman filter.
cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the
number of track indices and M is the number of detection indices,
such that entry (i, j) is the association cost between
`tracks[track_indices[i]]` and `detections[detection_indices[j]]`.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (List[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
gated_cost (Optional[float]): Entries in the cost matrix corresponding
to infeasible associations are set this value. Defaults to a very
large value.
only_position (Optional[bool]): If True, only the x, y position of the
state distribution is considered during gating. Default False.
"""
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray(
[detections[i].to_xyah() for i in detection_indices])
for row, track_idx in enumerate(track_indices):
track = tracks[track_idx]
gating_distance = kf.gating_distance(track.mean, track.covariance,
measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py
"""
import scipy
import numpy as np
from scipy.spatial.distance import cdist
from ..motion import kalman_filter
__all__ = [
'merge_matches',
'linear_assignment',
'cython_bbox_ious',
'iou_distance',
'embedding_distance',
'fuse_motion',
]
def merge_matches(m1, m2, shape):
O, P, Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix(
(np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix(
(np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1 * M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def linear_assignment(cost_matrix, thresh):
if cost_matrix.size == 0:
return np.empty(
(0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(
range(cost_matrix.shape[1]))
matches, unmatched_a, unmatched_b = [], [], []
import lap
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def cython_bbox_ious(atlbrs, btlbrs):
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
if ious.size == 0:
return ious
import cython_bbox
ious = cython_bbox.bbox_overlaps(
np.ascontiguousarray(
atlbrs, dtype=np.float),
np.ascontiguousarray(
btlbrs, dtype=np.float))
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU between two list[STrack].
"""
if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = cython_bbox_ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def embedding_distance(tracks, detections, metric='euclidean'):
"""
Compute cost based on features between two list[STrack].
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray(
[track.curr_feat for track in detections], dtype=np.float)
track_features = np.asarray(
[track.smooth_feat for track in tracks], dtype=np.float)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features,
metric)) # Nomalized features
return cost_matrix
def fuse_motion(kf,
cost_matrix,
tracks,
detections,
only_position=False,
lambda_=0.98):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean,
track.covariance,
measurements,
only_position,
metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_
) * gating_distance
return cost_matrix
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import kalman_filter
from .kalman_filter import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py
"""
import numpy as np
import scipy.linalg
from ppdet.core.workspace import register, serializable
__all__ = ['KalmanFilter']
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919
}
@register
@serializable
class KalmanFilter(object):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self):
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""
Create track from unassociated measurement.
Args:
measurement (ndarray): Bounding box coordinates (x, y, a, h) with
center position (x, y), aspect ratio a, and height h.
Returns:
The mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are
initialized to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[3], 1e-2,
2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3], 1e-5,
10 * self._std_weight_velocity * measurement[3]
]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""
Run Kalman filter prediction step.
Args:
mean (ndarray): The 8 dimensional mean vector of the object state
at the previous time step.
covariance (ndarray): The 8x8 dimensional covariance matrix of the
object state at the previous time step.
Returns:
The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3], self._std_weight_position *
mean[3], 1e-2, self._std_weight_position * mean[3]
]
std_vel = [
self._std_weight_velocity * mean[3], self._std_weight_velocity *
mean[3], 1e-5, self._std_weight_velocity * mean[3]
]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
#mean = np.dot(self._motion_mat, mean)
mean = np.dot(mean, self._motion_mat.T)
covariance = np.linalg.multi_dot(
(self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""
Project state distribution to measurement space.
Args
mean (ndarray): The state's mean vector (8 dimensional array).
covariance (ndarray): The state's covariance matrix (8x8 dimensional).
Returns:
The projected mean and covariance matrix of the given state estimate.
"""
std = [
self._std_weight_position * mean[3], self._std_weight_position *
mean[3], 1e-1, self._std_weight_position * mean[3]
]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((self._update_mat, covariance,
self._update_mat.T))
return mean, covariance + innovation_cov
def multi_predict(self, mean, covariance):
"""
Run Kalman filter prediction step (Vectorized version).
Args:
mean (ndarray): The Nx8 dimensional mean matrix of the object states
at the previous time step.
covariance (ndarray): The Nx8x8 dimensional covariance matrics of the
object states at the previous time step.
Returns:
The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[:, 3], self._std_weight_position *
mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]),
self._std_weight_position * mean[:, 3]
]
std_vel = [
self._std_weight_velocity * mean[:, 3], self._std_weight_velocity *
mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]),
self._std_weight_velocity * mean[:, 3]
]
sqr = np.square(np.r_[std_pos, std_vel]).T
motion_cov = []
for i in range(len(mean)):
motion_cov.append(np.diag(sqr[i]))
motion_cov = np.asarray(motion_cov)
mean = np.dot(mean, self._motion_mat.T)
left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
covariance = np.dot(left, self._motion_mat.T) + motion_cov
return mean, covariance
def update(self, mean, covariance, measurement):
"""
Run Kalman filter correction step.
Args:
mean (ndarray): The predicted state's mean vector (8 dimensional).
covariance (ndarray): The state's covariance matrix (8x8 dimensional).
measurement (ndarray): The 4 dimensional measurement vector
(x, y, a, h), where (x, y) is the center position, a the aspect
ratio, and h the height of the bounding box.
Returns:
The measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
chol_factor, lower = scipy.linalg.cho_factor(
projected_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower),
np.dot(covariance, self._update_mat.T).T,
check_finite=False).T
innovation = measurement - projected_mean
new_mean = mean + np.dot(innovation, kalman_gain.T)
new_covariance = covariance - np.linalg.multi_dot(
(kalman_gain, projected_cov, kalman_gain.T))
return new_mean, new_covariance
def gating_distance(self,
mean,
covariance,
measurements,
only_position=False,
metric='maha'):
"""
Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Args:
mean (ndarray): Mean vector over the state distribution (8
dimensional).
covariance (ndarray): Covariance of the state distribution (8x8
dimensional).
measurements (ndarray): An Nx4 dimensional matrix of N measurements,
each in format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position (Optional[bool]): If True, distance computation is
done with respect to the bounding box center position only.
metric (str): Metric type, 'gaussian' or 'maha'.
Returns
An array of length N, where the i-th element contains the squared
Mahalanobis distance between (mean, covariance) and `measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
d = measurements - mean
if metric == 'gaussian':
return np.sum(d * d, axis=1)
elif metric == 'maha':
cholesky_factor = np.linalg.cholesky(covariance)
z = scipy.linalg.solve_triangular(
cholesky_factor,
d.T,
lower=True,
check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha
else:
raise ValueError('invalid distance metric')
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import base_jde_tracker
from . import base_sde_tracker
from . import jde_tracker
from . import deepsort_tracker
from .base_jde_tracker import *
from .base_sde_tracker import *
from .jde_tracker import *
from .deepsort_tracker import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import numpy as np
from collections import deque, OrderedDict
from ..matching import jde_matching as matching
from ppdet.core.workspace import register, serializable
__all__ = [
'TrackState',
'BaseTrack',
'STrack',
'joint_stracks',
'sub_stracks',
'remove_duplicate_stracks',
]
class TrackState(object):
New = 0
Tracked = 1
Lost = 2
Removed = 3
@register
@serializable
class BaseTrack(object):
_count = 0
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
return self.frame_id
@staticmethod
def next_id():
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
raise NotImplementedError
def predict(self):
raise NotImplementedError
def update(self, *args, **kwargs):
raise NotImplementedError
def mark_lost(self):
self.state = TrackState.Lost
def mark_removed(self):
self.state = TrackState.Removed
@register
@serializable
class STrack(BaseTrack):
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
# wait activate
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
self.score = score
self.tracklet_len = 0
self.smooth_feat = None
self.update_features(temp_feat)
self.features = deque([], maxlen=buffer_size)
self.alpha = 0.9
def update_features(self, feat):
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha
) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state,
self.covariance)
@staticmethod
def multi_predict(stracks, kalman_filter):
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
for i, st in enumerate(stracks):
if st.state != TrackState.Tracked:
multi_mean[i][7] = 0
multi_mean, multi_covariance = kalman_filter.multi_predict(
multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet"""
self.kalman_filter = kalman_filter
self.track_id = self.next_id()
self.mean, self.covariance = self.kalman_filter.initiate(
self.tlwh_to_xyah(self._tlwh))
self.tracklet_len = 0
self.state = TrackState.Tracked
if frame_id == 1:
self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
self.update_features(new_track.curr_feat)
self.tracklet_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
def update(self, new_track, frame_id, update_feature=True):
self.frame_id = frame_id
self.tracklet_len += 1
new_tlwh = new_track.tlwh
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.score = new_track.score
if update_feature:
self.update_features(new_track.curr_feat)
@property
def tlwh(self):
"""
Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""
Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""
Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def to_xyah(self):
return self.tlwh_to_xyah(self.tlwh)
@staticmethod
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame,
self.end_frame)
def joint_stracks(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_stracks(tlista, tlistb):
stracks = {}
for t in tlista:
stracks[t.track_id] = t
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
def remove_duplicate_stracks(stracksa, stracksb):
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(stracksa) if not i in dupa]
resb = [t for i, t in enumerate(stracksb) if not i in dupb]
return resa, resb
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py
"""
import numpy as np
from ppdet.core.workspace import register, serializable
__all__ = ['TrackState', 'Track']
class TrackState(object):
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
@register
@serializable
class Track(object):
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Args:
mean (ndarray): Mean vector of the initial state distribution.
covariance (ndarray): Covariance matrix of the initial state distribution.
track_id (int): A unique track identifier.
n_init (int): Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age (int): The maximum number of consecutive misses before the track
state is set to `Deleted`.
feature (Optional[ndarray]): Feature vector of the detection this track
originates from. If not None, this feature is added to the `features` cache.
Attributes:
hits (int): Total number of measurement updates.
age (int): Total number of frames since first occurance.
time_since_update (int): Total number of frames since last measurement
update.
state (TrackState): The current track state.
features (List[ndarray]): A cache of features. On each measurement update,
the associated feature vector is added to this list.
"""
def __init__(self,
mean,
covariance,
track_id,
n_init,
max_age,
feature=None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.state = TrackState.Tentative
self.features = []
if feature is not None:
self.features.append(feature)
self._n_init = n_init
self._max_age = max_age
def to_tlwh(self):
"""Get position in format `(top left x, top left y, width, height)`."""
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def to_tlbr(self):
"""Get position in bounding box format `(min x, miny, max x, max y)`."""
ret = self.to_tlwh()
ret[2:] = ret[:2] + ret[2:]
return ret
def predict(self, kalman_filter):
"""
Propagate the state distribution to the current time step using a Kalman
filter prediction step.
"""
self.mean, self.covariance = kalman_filter.predict(self.mean,
self.covariance)
self.age += 1
self.time_since_update += 1
def update(self, kalman_filter, detection):
"""
Perform Kalman filter measurement update step and update the associated
detection feature cache.
"""
self.mean, self.covariance = kalman_filter.update(self.mean,
self.covariance,
detection.to_xyah())
self.features.append(detection.feature)
self.hits += 1
self.time_since_update = 0
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed)."""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py
"""
import numpy as np
from ..matching.deepsort_matching import NearestNeighborDistanceMetric
from ..matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
from .base_sde_tracker import Track
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['DeepSORTTracker']
@register
@serializable
class DeepSORTTracker(object):
__inject__ = ['motion']
"""
DeepSORT tracker
Args:
img_size (list): input image size, [h, w]
budget (int): If not None, fix samples per class to at most this number.
Removes the oldest samples when the budget is reached.
max_age (int): maximum number of missed misses before a track is deleted
n_init (float): Number of frames that a track remains in initialization
phase. Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
matching_threshold (float): samples with larger distance are
considered an invalid match.
max_iou_distance (float): max iou distance threshold
motion (object): KalmanFilter instance
"""
def __init__(self,
img_size=[608, 1088],
budget=100,
max_age=30,
n_init=3,
metric_type='cosine',
matching_threshold=0.2,
max_iou_distance=0.7,
motion='KalmanFilter'):
self.img_size = img_size
self.max_age = max_age
self.n_init = n_init
self.metric = NearestNeighborDistanceMetric(metric_type,
matching_threshold, budget)
self.max_iou_distance = max_iou_distance
self.motion = motion
self.tracks = []
self._next_id = 1
def predict(self):
"""
Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.motion)
def update(self, detections):
"""
Perform measurement update and track management.
Args:
detections (list): List[ppdet.modeling.mot.utils.Detection]
A list of detections at the current time step.
"""
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# Update track set.
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.motion,
detections[detection_idx])
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
continue
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
self.metric.partial_fit(
np.asarray(features), np.asarray(targets), active_targets)
output_stracks = self.tracks
return output_stracks
def _match(self, detections):
def gated_metric(tracks, dets, track_indices, detection_indices):
features = np.array([dets[i].feature for i in detection_indices])
targets = np.array([tracks[i].track_id for i in track_indices])
cost_matrix = self.metric.distance(features, targets)
cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks,
dets, track_indices,
detection_indices)
return cost_matrix
# Split track set into confirmed and unconfirmed tracks.
confirmed_tracks = [
i for i, t in enumerate(self.tracks) if t.is_confirmed()
]
unconfirmed_tracks = [
i for i, t in enumerate(self.tracks) if not t.is_confirmed()
]
# Associate confirmed tracks using appearance features.
matches_a, unmatched_tracks_a, unmatched_detections = \
matching_cascade(
gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks)
# Associate remaining tracks together with unconfirmed tracks using IOU.
iou_track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update == 1
]
unmatched_tracks_a = [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update != 1
]
matches_b, unmatched_tracks_b, unmatched_detections = \
min_cost_matching(
iou_cost, self.max_iou_distance, self.tracks,
detections, iou_track_candidates, unmatched_detections)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
mean, covariance = self.motion.initiate(detection.to_xyah())
self.tracks.append(
Track(mean, covariance, self._next_id, self.n_init, self.max_age,
detection.feature))
self._next_id += 1
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import paddle
from ..matching import jde_matching as matching
from .base_jde_tracker import TrackState, BaseTrack, STrack
from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['JDETracker']
@register
@serializable
class JDETracker(object):
__inject__ = ['motion']
"""
JDE tracker
Args:
det_thresh (float): threshold of detection score
track_buffer (int): buffer for tracker
min_box_area (int): min box area to filter out low quality boxes
tracked_thresh (float): linear assignment threshold of tracked
stracks and detections
r_tracked_thresh (float): linear assignment threshold of
tracked stracks and unmatched detections
unconfirmed_thresh (float): linear assignment threshold of
unconfirmed stracks and unmatched detections
motion (object): KalmanFilter instance
"""
def __init__(self,
det_thresh=0.3,
track_buffer=30,
min_box_area=200,
tracked_thresh=0.7,
r_tracked_thresh=0.5,
unconfirmed_thresh=0.7,
motion='KalmanFilter'):
self.det_thresh = det_thresh
self.track_buffer = track_buffer
self.min_box_area = min_box_area
self.tracked_thresh = tracked_thresh
self.r_tracked_thresh = r_tracked_thresh
self.unconfirmed_thresh = unconfirmed_thresh
self.motion = motion
self.frame_id = 0
self.tracked_stracks = []
self.lost_stracks = []
self.removed_stracks = []
self.max_time_lost = 0
# max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
def update(self, pred_dets, pred_embs):
"""
Processes the image frame and finds bounding box(detections).
Associates the detection with corresponding tracklets and also handles
lost, removed, refound and active tracklets.
Args:
pred_dets (Tensor): Detection results of the image, shape is [N, 5].
pred_embs (Tensor): Embedding results of the image, shape is [N, 512].
Return:
output_stracks (list): The list contains information regarding the
online_tracklets for the recieved image tensor.
"""
self.frame_id += 1
activated_starcks = []
# for storing active tracks, for the current frame
refind_stracks = []
# Lost Tracks whose detections are obtained in the current frame
lost_stracks = []
# The tracks which are not obtained in the current frame but are not
# removed. (Lost for some time lesser than the threshold for removing)
removed_stracks = []
# Filter out the image with box_num = 0. pred_dets = [[0.0, 0.0, 0.0 ,0.0]]
empty_pred = True if len(pred_dets) == 1 and paddle.sum(
pred_dets) == 0.0 else False
""" Step 1: Network forward, get detections & embeddings"""
if len(pred_dets) > 0 and not empty_pred:
pred_dets = pred_dets.numpy()
pred_embs = pred_embs.numpy()
detections = [
STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
for (tlbrs, f) in zip(pred_dets, pred_embs)
]
else:
detections = []
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
# previous tracks which are not active in the current frame are added in unconfirmed list
unconfirmed.append(track)
else:
# Active tracks are added to the local list 'tracked_stracks'
tracked_stracks.append(track)
""" Step 2: First association, with embedding"""
# Combining currently tracked_stracks and lost_stracks
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
STrack.multi_predict(strack_pool, self.motion)
dists = matching.embedding_distance(strack_pool, detections)
dists = matching.fuse_motion(self.motion, dists, strack_pool,
detections)
# The dists is the list of distances of the detection with the tracks in strack_pool
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.tracked_thresh)
# The matches is the array for corresponding matches of the detection with the corresponding strack_pool
for itracked, idet in matches:
# itracked is the id of the track and idet is the detection
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
# If the track is active, add the detection to the track
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
# We have obtained a detection from a track which is not active,
# hence put the track in refind_stracks list
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# None of the steps below happen if there are no undetected tracks.
""" Step 3: Second association, with IOU"""
detections = [detections[i] for i in u_detection]
# detections is now a list of the unmatched detections
r_tracked_stracks = []
# This is container for stracks which were tracked till the previous
# frame but no detection was found for it in the current frame.
for i in u_track:
if strack_pool[i].state == TrackState.Tracked:
r_tracked_stracks.append(strack_pool[i])
dists = matching.iou_distance(r_tracked_stracks, detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.r_tracked_thresh)
# matches is the list of detections which matched with corresponding
# tracks by IOU distance method.
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# Same process done for some unmatched detections, but now considering IOU_distance as measure
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
# If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(
dists, thresh=self.unconfirmed_thresh)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_starcks.append(unconfirmed[itracked])
# The tracks which are yet not matched
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
# after all these confirmation steps, if a new detection is found, it is initialized for a new track
""" Step 4: Init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.motion, self.frame_id)
activated_starcks.append(track)
""" Step 5: Update state"""
# If the tracks are lost for more frames than the threshold number, the tracks are removed.
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
# Update the self.tracked_stracks and self.lost_stracks using the updates in this step.
self.tracked_stracks = [
t for t in self.tracked_stracks if t.state == TrackState.Tracked
]
self.tracked_stracks = joint_stracks(self.tracked_stracks,
activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks,
refind_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
self.tracked_stracks, self.lost_stracks)
# get scores of lost tracks
output_stracks = [
track for track in self.tracked_stracks if track.is_activated
]
logger.debug('===========Frame {}=========='.format(self.frame_id))
logger.debug('Activated: {}'.format(
[track.track_id for track in activated_starcks]))
logger.debug('Refind: {}'.format(
[track.track_id for track in refind_stracks]))
logger.debug('Lost: {}'.format(
[track.track_id for track in lost_stracks]))
logger.debug('Removed: {}'.format(
[track.track_id for track in removed_stracks]))
return output_stracks
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import time
import paddle
import numpy as np
__all__ = [
'Timer',
'Detection',
'load_det_results',
'preprocess_reid',
'get_crops',
'clip_box',
'scale_coords',
]
class Timer(object):
"""
This class used to compute and print the current FPS while evaling.
"""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
self.duration = self.average_time
else:
self.duration = self.diff
return self.duration
def clear(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.
class Detection(object):
"""
This class represents a bounding box detection in a single image.
Args:
tlwh (ndarray): Bounding box in format `(top left x, top left y,
width, height)`.
confidence (ndarray): Detector confidence score.
feature (Tensor): A feature vector that describes the object
contained in this image.
"""
def __init__(self, tlwh, confidence, feature):
self.tlwh = np.asarray(tlwh, dtype=np.float32)
self.confidence = np.asarray(confidence, dtype=np.float32)
self.feature = feature.numpy()
def to_tlbr(self):
"""
Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
def to_xyah(self):
"""
Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def load_det_results(det_file, num_frames):
assert os.path.exists(det_file) and os.path.isfile(det_file), \
'Error: det_file: {} not exist or not a file.'.format(det_file)
labels = np.loadtxt(det_file, dtype='float32', delimiter=',')
results_list = []
for frame_i in range(0, num_frames):
results = {'bbox': [], 'score': []}
lables_with_frame = labels[labels[:, 0] == frame_i + 1]
for l in lables_with_frame:
results['bbox'].append(l[2:6])
results['score'].append(l[6])
results_list.append(results)
return results_list
def scale_coords(coords, input_shape, im_shape, scale_factor):
im_shape = im_shape.numpy()[0]
ratio = scale_factor.numpy()[0][0]
img0_shape = [int(im_shape[0] / ratio), int(im_shape[1] / ratio)]
pad_w = (input_shape[1] - img0_shape[1] * ratio) / 2
pad_h = (input_shape[0] - img0_shape[0] * ratio) / 2
coords[:, 0::2] -= pad_w
coords[:, 1::2] -= pad_h
coords[:, 0:4] /= paddle.to_tensor(ratio)
coords[:, :4] = paddle.clip(coords[:, :4], min=0, max=coords[:, :4].max())
return coords.round()
def clip_box(xyxy, input_shape, im_shape, scale_factor):
im_shape = im_shape.numpy()[0]
ratio = scale_factor.numpy()[0][0]
img0_shape = [int(im_shape[0] / ratio), int(im_shape[1] / ratio)]
xyxy[:, 0::2] = paddle.clip(xyxy[:, 0::2], min=0, max=img0_shape[1])
xyxy[:, 1::2] = paddle.clip(xyxy[:, 1::2], min=0, max=img0_shape[0])
return xyxy
def get_crops(xyxy, ori_img, pred_scores, w, h):
crops = []
keep_scores = []
xyxy = xyxy.numpy().astype(np.int64)
ori_img = ori_img.numpy()
ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2)
pred_scores = pred_scores.numpy()
for i, bbox in enumerate(xyxy):
if bbox[2] <= bbox[0] or bbox[3] <= bbox[1]:
continue
crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
crops.append(crop)
keep_scores.append(pred_scores[i])
if len(crops) == 0:
return [], []
crops = preprocess_reid(crops, w, h)
return crops, keep_scores
def preprocess_reid(imgs,
w=64,
h=192,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
im_batch = []
for img in imgs:
img = cv2.resize(img, (w, h))
img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255
img_mean = np.array(mean).reshape((3, 1, 1))
img_std = np.array(std).reshape((3, 1, 1))
img -= img_mean
img /= img_std
img = np.expand_dims(img, axis=0)
im_batch.append(img)
im_batch = np.concatenate(im_batch, 0)
return im_batch
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
def tlwhs_to_tlbrs(tlwhs):
tlbrs = np.copy(tlwhs)
if len(tlbrs) == 0:
return tlbrs
tlbrs[:, 2] += tlwhs[:, 0]
tlbrs[:, 3] += tlwhs[:, 1]
return tlbrs
def get_color(idx):
idx = idx * 3
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color
def resize_image(image, max_size=800):
if max(image.shape[:2]) > max_size:
scale = float(max_size) / max(image.shape[:2])
image = cv2.resize(image, None, fx=scale, fy=scale)
return image
def plot_tracking(image,
tlwhs,
obj_ids,
scores=None,
frame_id=0,
fps=0.,
ids2=None):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 2
line_thickness = max(1, int(image.shape[1] / 500.))
radius = max(5, int(im_w / 140.))
cv2.putText(
im,
'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
(0, int(15 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
obj_id = int(obj_ids[i])
id_text = '{}'.format(int(obj_id))
if ids2 is not None:
id_text = id_text + ', {}'.format(int(ids2[i]))
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(
im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
cv2.putText(
im,
id_text, (intbox[0], intbox[1] + 30),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=text_thickness)
return im
def plot_trajectory(image, tlwhs, track_ids):
image = image.copy()
for one_tlwhs, track_id in zip(tlwhs, track_ids):
color = get_color(int(track_id))
for tlwh in one_tlwhs:
x1, y1, w, h = tuple(map(int, tlwh))
cv2.circle(
image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
return image
def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
im = np.copy(image)
text_scale = max(1, image.shape[1] / 800.)
thickness = 2 if text_scale > 1.3 else 1
for i, det in enumerate(tlbrs):
x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
if len(det) >= 7:
label = 'det' if det[5] > 0 else 'trk'
if ids is not None:
text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
cv2.putText(
im,
text, (x1, y1 + 30),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=thickness)
else:
text = '{}# {:.2f}'.format(label, det[6])
if scores is not None:
text = '{:.2f}'.format(scores[i])
cv2.putText(
im,
text, (x1, y1 + 30),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=thickness)
cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
return im
......@@ -52,7 +52,13 @@ def add_coord(x, data_format):
class YoloDetBlock(nn.Layer):
def __init__(self, ch_in, channel, norm_type, name, data_format='NCHW'):
def __init__(self,
ch_in,
channel,
norm_type,
freeze_norm=False,
name='',
data_format='NCHW'):
"""
YOLODetBlock layer for yolov3, see https://arxiv.org/abs/1804.02767
......@@ -60,6 +66,7 @@ class YoloDetBlock(nn.Layer):
ch_in (int): input channel
channel (int): base channel
norm_type (str): batch norm type
freeze_norm (bool): whether to freeze norm, default False
name (str): layer name
data_format (str): data format, NCHW or NHWC
"""
......@@ -87,6 +94,7 @@ class YoloDetBlock(nn.Layer):
filter_size=filter_size,
padding=(filter_size - 1) // 2,
norm_type=norm_type,
freeze_norm=freeze_norm,
data_format=data_format,
name=name + post_name))
......@@ -96,6 +104,7 @@ class YoloDetBlock(nn.Layer):
filter_size=3,
padding=1,
norm_type=norm_type,
freeze_norm=freeze_norm,
data_format=data_format,
name=name + '.tip')
......@@ -112,7 +121,8 @@ class SPP(nn.Layer):
k,
pool_size,
norm_type,
name,
freeze_norm=False,
name='',
act='leaky',
data_format='NCHW'):
"""
......@@ -123,7 +133,9 @@ class SPP(nn.Layer):
ch_out (int): output channel of conv layer
k (int): kernel size of conv layer
norm_type (str): batch norm type
freeze_norm (bool): whether to freeze norm, default False
name (str): layer name
act (str): activation function
data_format (str): data format, NCHW or NHWC
"""
super(SPP, self).__init__()
......@@ -145,6 +157,7 @@ class SPP(nn.Layer):
k,
padding=k // 2,
norm_type=norm_type,
freeze_norm=freeze_norm,
name=name,
act=act,
data_format=data_format)
......@@ -210,7 +223,8 @@ class CoordConv(nn.Layer):
filter_size,
padding,
norm_type,
name,
freeze_norm=False,
name='',
data_format='NCHW'):
"""
CoordConv layer
......@@ -232,6 +246,7 @@ class CoordConv(nn.Layer):
filter_size=filter_size,
padding=padding,
norm_type=norm_type,
freeze_norm=freeze_norm,
data_format=data_format,
name=name)
self.data_format = data_format
......@@ -419,6 +434,7 @@ class YOLOv3FPN(nn.Layer):
def __init__(self,
in_channels=[256, 512, 1024],
norm_type='bn',
freeze_norm=False,
data_format='NCHW'):
"""
YOLOv3FPN layer
......@@ -449,6 +465,7 @@ class YOLOv3FPN(nn.Layer):
in_channel,
channel=512 // (2**i),
norm_type=norm_type,
freeze_norm=freeze_norm,
data_format=data_format,
name=name))
self.yolo_blocks.append(yolo_block)
......@@ -466,14 +483,17 @@ class YOLOv3FPN(nn.Layer):
stride=1,
padding=0,
norm_type=norm_type,
freeze_norm=freeze_norm,
data_format=data_format,
name=name))
self.routes.append(route)
def forward(self, blocks):
def forward(self, blocks, for_mot=False):
assert len(blocks) == self.num_blocks
blocks = blocks[::-1]
yolo_feats = []
if for_mot:
emb_feats = []
for i, block in enumerate(blocks):
if i > 0:
if self.data_format == 'NCHW':
......@@ -483,12 +503,19 @@ class YOLOv3FPN(nn.Layer):
route, tip = self.yolo_blocks[i](block)
yolo_feats.append(tip)
if for_mot:
# add emb_feats output
emb_feats.append(route)
if i < self.num_blocks - 1:
route = self.routes[i](route)
route = F.interpolate(
route, scale_factor=2., data_format=self.data_format)
return yolo_feats
if for_mot:
return {'yolo_feats': yolo_feats, 'emb_feats': emb_feats}
else:
return yolo_feats
@classmethod
def from_config(cls, cfg, input_shape):
......@@ -507,6 +534,7 @@ class PPYOLOFPN(nn.Layer):
def __init__(self,
in_channels=[512, 1024, 2048],
norm_type='bn',
freeze_norm=False,
data_format='NCHW',
coord_conv=False,
conv_block_num=2,
......@@ -568,22 +596,26 @@ class PPYOLOFPN(nn.Layer):
[
'conv{}'.format(2 * j), ConvLayer, [c_in, c_out, 1],
dict(
padding=0, norm_type=norm_type)
padding=0,
norm_type=norm_type,
freeze_norm=freeze_norm)
],
[
'conv{}'.format(2 * j + 1), ConvBNLayer,
[c_out, c_out * 2, 3], dict(
padding=1, norm_type=norm_type)
padding=1,
norm_type=norm_type,
freeze_norm=freeze_norm)
],
]
c_in, c_out = c_out * 2, c_out
base_cfg += [[
'route', ConvLayer, [c_in, c_out, 1], dict(
padding=0, norm_type=norm_type)
padding=0, norm_type=norm_type, freeze_norm=freeze_norm)
], [
'tip', ConvLayer, [c_out, c_out * 2, 3], dict(
padding=1, norm_type=norm_type)
padding=1, norm_type=norm_type, freeze_norm=freeze_norm)
]]
if self.conv_block_num == 2:
......@@ -591,7 +623,9 @@ class PPYOLOFPN(nn.Layer):
if self.spp:
spp_cfg = [[
'spp', SPP, [channel * 4, channel, 1], dict(
pool_size=[5, 9, 13], norm_type=norm_type)
pool_size=[5, 9, 13],
norm_type=norm_type,
freeze_norm=freeze_norm)
]]
else:
spp_cfg = []
......@@ -603,7 +637,9 @@ class PPYOLOFPN(nn.Layer):
if self.spp and i == 0:
spp_cfg = [[
'spp', SPP, [c_in * 4, c_in, 1], dict(
pool_size=[5, 9, 13], norm_type=norm_type)
pool_size=[5, 9, 13],
norm_type=norm_type,
freeze_norm=freeze_norm)
]]
else:
spp_cfg = []
......@@ -623,14 +659,17 @@ class PPYOLOFPN(nn.Layer):
stride=1,
padding=0,
norm_type=norm_type,
freeze_norm=freeze_norm,
data_format=data_format,
name=name))
self.routes.append(route)
def forward(self, blocks):
def forward(self, blocks, for_mot=False):
assert len(blocks) == self.num_blocks
blocks = blocks[::-1]
yolo_feats = []
if for_mot:
emb_feats = []
for i, block in enumerate(blocks):
if i > 0:
if self.data_format == 'NCHW':
......@@ -640,12 +679,19 @@ class PPYOLOFPN(nn.Layer):
route, tip = self.yolo_blocks[i](block)
yolo_feats.append(tip)
if for_mot:
# add emb_feats output
emb_feats.append(route)
if i < self.num_blocks - 1:
route = self.routes[i](route)
route = F.interpolate(
route, scale_factor=2., data_format=self.data_format)
return yolo_feats
if for_mot:
return {'yolo_feats': yolo_feats, 'emb_feats': emb_feats}
else:
return yolo_feats
@classmethod
def from_config(cls, cfg, input_shape):
......
......@@ -52,6 +52,7 @@ def mish(x):
def batch_norm(ch,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
initializer=None,
data_format='NCHW'):
if norm_type == 'sync_bn':
......@@ -59,13 +60,30 @@ def batch_norm(ch,
else:
batch_norm = nn.BatchNorm2D
return batch_norm(
norm_lr = 0. if freeze_norm else 1.
weight_attr = ParamAttr(
initializer=initializer,
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
trainable=False if freeze_norm else True)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
trainable=False if freeze_norm else True)
norm_layer = batch_norm(
ch,
weight_attr=ParamAttr(
initializer=initializer, regularizer=L2Decay(norm_decay)),
bias_attr=ParamAttr(regularizer=L2Decay(norm_decay)),
weight_attr=weight_attr,
bias_attr=bias_attr,
data_format=data_format)
norm_params = norm_layer.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
return norm_layer
@paddle.jit.not_to_static
def roi_pool(input,
......
......@@ -18,13 +18,18 @@ import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, pd_rbox2poly
from . import ops
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
__all__ = ['BBoxPostProcess', 'MaskPostProcess', 'FCOSPostProcess']
__all__ = [
'BBoxPostProcess',
'MaskPostProcess',
'FCOSPostProcess',
'S2ANetBBoxPostProcess',
'JDEBBoxPostProcess',
]
@register
......@@ -307,3 +312,33 @@ class S2ANetBBoxPostProcess(object):
pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1)
pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1)
return pred_result
@register
class JDEBBoxPostProcess(BBoxPostProcess):
def __call__(self, head_out, anchors):
"""
Decode the bbox and do NMS for JDE model.
Args:
head_out (list): Bbox_pred and cls_prob of bbox_head output.
anchors (list): Anchors of JDE model.
Returns:
boxes_idx (Tensor): The index of kept bboxes after decode 'JDEBox'.
bbox_pred (Tensor): The output is the prediction with shape [N, 6]
including labels, scores and bboxes.
bbox_num (Tensor): The number of prediction of each batch with shape [N].
nms_keep_idx (Tensor): The index of kept bboxes after NMS.
"""
boxes_idx, bboxes, score = self.decode(head_out, anchors)
bbox_pred, bbox_num, nms_keep_idx = self.nms(bboxes, score,
self.num_classes)
if bbox_pred.shape[0] == 0:
bbox_pred = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
nms_keep_idx = paddle.to_tensor(np.array([[0]], dtype='int32'))
return boxes_idx, bbox_pred, bbox_num, nms_keep_idx
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_embedding_head
from . import pyramidal_embedding
from . import resnet
from .jde_embedding_head import *
from .pyramidal_embedding import *
from .resnet import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from paddle.nn.initializer import Normal, Constant
__all__ = ['JDEEmbeddingHead']
class LossParam(nn.Layer):
def __init__(self, init_value=0., use_uncertainy=True):
super(LossParam, self).__init__()
self.loss_param = self.create_parameter(
shape=[1],
attr=ParamAttr(initializer=Constant(value=init_value)),
dtype="float32")
def forward(self, inputs):
out = paddle.exp(-self.loss_param) * inputs + self.loss_param
return out * 0.5
@register
class JDEEmbeddingHead(nn.Layer):
__shared__ = ['num_classes']
__inject__ = ['emb_loss', 'jde_loss']
"""
JDEEmbeddingHead
Args:
num_classes(int): Number of classes. Only support one class tracking.
num_identifiers(int): Number of identifiers.
anchor_levels(int): Number of anchor levels, same as FPN levels.
anchor_scales(int): Number of anchor scales on each FPN level.
embedding_dim(int): Embedding dimension. Default: 512.
emb_loss(object): Instance of 'JDEEmbeddingLoss'
jde_loss(object): Instance of 'JDELoss'
"""
def __init__(
self,
num_classes=1,
num_identifiers=1, # defined by dataset.total_identities
anchor_levels=3,
anchor_scales=4,
embedding_dim=512,
emb_loss='JDEEmbeddingLoss',
jde_loss='JDELoss'):
super(JDEEmbeddingHead, self).__init__()
self.num_classes = num_classes
self.num_identifiers = num_identifiers
self.anchor_levels = anchor_levels
self.anchor_scales = anchor_scales
self.embedding_dim = embedding_dim
self.emb_loss = emb_loss
self.jde_loss = jde_loss
self.emb_scale = math.sqrt(2) * math.log(
self.num_identifiers - 1) if self.num_identifiers > 1 else 1
self.identify_outputs = []
self.loss_params_cls = []
self.loss_params_reg = []
self.loss_params_ide = []
for i in range(self.anchor_levels):
name = 'identify_output.{}'.format(i)
identify_output = self.add_sublayer(
name,
nn.Conv2D(
in_channels=64 * (2**self.anchor_levels) // (2**i),
out_channels=self.embedding_dim,
kernel_size=3,
stride=1,
padding=1,
weight_attr=ParamAttr(name=name + '.conv.weights'),
bias_attr=ParamAttr(
name=name + '.conv.bias', regularizer=L2Decay(0.))))
self.identify_outputs.append(identify_output)
loss_p_cls = self.add_sublayer('cls.{}'.format(i), LossParam(-4.15))
self.loss_params_cls.append(loss_p_cls)
loss_p_reg = self.add_sublayer('reg.{}'.format(i), LossParam(-4.85))
self.loss_params_reg.append(loss_p_reg)
loss_p_ide = self.add_sublayer('ide.{}'.format(i), LossParam(-2.3))
self.loss_params_ide.append(loss_p_ide)
self.classifier = self.add_sublayer(
'classifier',
nn.Linear(
self.embedding_dim,
self.num_identifiers,
weight_attr=ParamAttr(
learning_rate=1., initializer=Normal(
mean=0.0, std=0.01)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
def forward(self,
identify_feats,
targets=None,
loss_confs=None,
loss_boxes=None,
test_emb=False):
assert len(identify_feats) == self.anchor_levels
ide_outs = []
for feat, ide_head in zip(identify_feats, self.identify_outputs):
ide_outs.append(ide_head(feat))
if self.training:
assert targets != None
assert len(loss_confs) == len(loss_boxes) == self.anchor_levels
loss_ides = self.emb_loss(ide_outs, targets, self.emb_scale,
self.classifier)
return self.jde_loss(loss_confs, loss_boxes, loss_ides,
self.loss_params_cls, self.loss_params_reg,
self.loss_params_ide, targets)
else:
if test_emb:
assert targets != None
embs_and_gts = self.get_emb_and_gt_outs(ide_outs, targets)
return embs_and_gts
else:
emb_outs = self.get_emb_outs(ide_outs)
return emb_outs
def get_emb_and_gt_outs(self, ide_outs, targets):
emb_and_gts = []
for i, p_ide in enumerate(ide_outs):
t_conf = targets['tconf{}'.format(i)]
t_ide = targets['tide{}'.format(i)]
p_ide = p_ide.transpose((0, 2, 3, 1))
p_ide_flatten = paddle.reshape(p_ide, [-1, self.embedding_dim])
mask = t_conf > 0
mask = paddle.cast(mask, dtype="int64")
emb_mask = mask.max(1).flatten()
emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
if len(emb_mask_inds) > 0:
t_ide_flatten = paddle.reshape(t_ide.max(1), [-1, 1])
tids = paddle.gather(t_ide_flatten, emb_mask_inds)
embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
embedding = self.emb_scale * F.normalize(embedding)
emb_and_gt = paddle.concat([embedding, tids], axis=1)
emb_and_gts.append(emb_and_gt)
if len(emb_and_gts) > 0:
return paddle.concat(emb_and_gts, axis=0)
else:
return paddle.zeros((1, self.embedding_dim + 1))
def get_emb_outs(self, ide_outs):
emb_outs = []
for i, p_ide in enumerate(ide_outs):
p_ide = p_ide.transpose((0, 2, 3, 1))
p_ide_repeat = paddle.tile(
p_ide.unsqueeze(axis=0), [1, self.anchor_scales, 1, 1, 1])
embedding = F.normalize(p_ide_repeat, axis=-1)
emb = paddle.reshape(embedding, [-1, self.embedding_dim])
emb_outs.append(emb)
if len(emb_outs) > 0:
return paddle.concat(emb_outs, axis=0)
else:
return paddle.zeros((1, self.embedding_dim))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from paddle import ParamAttr
from .resnet import *
from ppdet.core.workspace import register
__all__ = ['PCBPlusDropoutPyramid']
@register
class PCBPlusDropoutPyramid(nn.Layer):
def __init__(
self,
input_ch=2048,
num_stripes=6, # number of sub-parts
used_levels=(1, 1, 1, 1, 1, 1),
num_classes=751,
last_conv_stride=1,
last_conv_dilation=1,
num_conv_out_channels=128):
super(PCBPlusDropoutPyramid, self).__init__()
self.num_stripes = num_stripes
self.used_levels = used_levels
self.num_classes = num_classes
self.num_in_each_level = [i for i in range(self.num_stripes, 0, -1)]
self.num_branches = sum(self.num_in_each_level)
self.base = ResNet101(
lr_mult=0.1,
last_conv_stride=last_conv_stride,
last_conv_dilation=last_conv_dilation)
self.dropout_layer = nn.Dropout(p=0.2)
self.pyramid_conv_list0, self.pyramid_fc_list0 = self.basic_branch(
num_conv_out_channels, input_ch)
def basic_branch(self, num_conv_out_channels, input_ch):
# the level indexes are defined from fine to coarse,
# the branch will contain one more part than that of its previous level
# the sliding step is set to 1
pyramid_conv_list = nn.LayerList()
pyramid_fc_list = nn.LayerList()
idx_levels = 0
for idx_branches in range(self.num_branches):
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
idx_levels += 1
if self.used_levels[idx_levels] == 0:
continue
pyramid_conv_list.append(
nn.Sequential(
nn.Conv2D(input_ch, num_conv_out_channels, 1),
nn.BatchNorm2D(num_conv_out_channels), nn.ReLU()))
idx_levels = 0
for idx_branches in range(self.num_branches):
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
idx_levels += 1
if self.used_levels[idx_levels] == 0:
continue
name = "Linear_branch_id_{}".format(idx_branches)
fc = nn.Linear(
in_features=num_conv_out_channels,
out_features=self.num_classes,
weight_attr=ParamAttr(
name=name + "_weights",
initializer=Normal(
mean=0., std=0.001)),
bias_attr=ParamAttr(
name=name + "_bias", initializer=Constant(value=0.)))
pyramid_fc_list.append(fc)
return pyramid_conv_list, pyramid_fc_list
def pyramid_forward(self, feat):
each_stripe_size = int(feat.shape[2] / self.num_stripes)
feat_list, logits_list = [], []
idx_levels = 0
used_branches = 0
for idx_branches in range(self.num_branches):
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
idx_levels += 1
if self.used_levels[idx_levels] == 0:
continue
idx_in_each_level = idx_branches - sum(self.num_in_each_level[
0:idx_levels])
stripe_size_in_each_level = each_stripe_size * (idx_levels + 1)
start = idx_in_each_level * each_stripe_size
end = start + stripe_size_in_each_level
k = feat.shape[-1]
local_feat_avgpool = F.avg_pool2d(
feat[:, :, start:end, :],
kernel_size=(stripe_size_in_each_level, k))
local_feat_maxpool = F.max_pool2d(
feat[:, :, start:end, :],
kernel_size=(stripe_size_in_each_level, k))
local_feat = local_feat_avgpool + local_feat_maxpool
local_feat = self.pyramid_conv_list0[used_branches](local_feat)
local_feat = paddle.reshape(
local_feat, shape=[local_feat.shape[0], -1])
feat_list.append(local_feat)
local_logits = self.pyramid_fc_list0[used_branches](
self.dropout_layer(local_feat))
logits_list.append(local_logits)
used_branches += 1
return feat_list, logits_list
def forward(self, x):
feat = self.base(x)
assert feat.shape[2] % self.num_stripes == 0
feat_list, logits_list = self.pyramid_forward(feat)
feat_out = paddle.concat(feat_list, axis=-1)
return feat_out
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal
__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
dilation=1,
groups=1,
act=None,
lr_mult=1.0,
name=None,
data_format="NCHW"):
super(ConvBNLayer, self).__init__()
conv_stdv = filter_size * filter_size * num_filters
self._conv = nn.Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
dilation=dilation,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights",
learning_rate=lr_mult,
initializer=Normal(0, math.sqrt(2. / conv_stdv))),
bias_attr=False,
data_format=data_format)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(bn_name + "_offset"),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance",
data_layout=data_format)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None,
lr_mult=1.0,
dilation=1,
data_format="NCHW"):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
dilation=dilation,
act="relu",
lr_mult=lr_mult,
name=name + "_branch2a",
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
dilation=dilation,
stride=stride,
act="relu",
lr_mult=lr_mult,
name=name + "_branch2b",
data_format=data_format)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
dilation=dilation,
act=None,
lr_mult=lr_mult,
name=name + "_branch2c",
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
dilation=dilation,
stride=stride,
lr_mult=lr_mult,
name=name + "_branch1",
data_format=data_format)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None,
data_format="NCHW"):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_branch2a",
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b",
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride,
name=name + "_branch1",
data_format=data_format)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv1)
y = F.relu(y)
return y
class ResNet(nn.Layer):
def __init__(self,
layers=50,
lr_mult=1.0,
last_conv_stride=2,
last_conv_dilation=1):
super(ResNet, self).__init__()
self.layers = layers
self.data_format = "NCHW"
self.input_image_channel = 3
supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=self.input_image_channel,
num_filters=64,
filter_size=7,
stride=2,
act="relu",
lr_mult=lr_mult,
name="conv1",
data_format=self.data_format)
self.pool2d_max = nn.MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
self.block_list = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
if i != 0 or block == 0:
stride = 1
elif block == len(depth) - 1:
stride = last_conv_stride
else:
stride = 2
bottleneck_block = self.add_sublayer(
conv_name,
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=stride,
shortcut=shortcut,
name=conv_name,
lr_mult=lr_mult,
dilation=last_conv_dilation
if block == len(depth) - 1 else 1,
data_format=self.data_format))
self.block_list.append(bottleneck_block)
shortcut = True
else:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
conv_name,
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
name=conv_name,
data_format=self.data_format))
self.block_list.append(basic_block)
shortcut = True
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
return y
def ResNet18(**args):
model = ResNet(layers=18, **args)
return model
def ResNet34(**args):
model = ResNet(layers=34, **args)
return model
def ResNet50(pretrained=None, **args):
model = ResNet(layers=50, **args)
if pretrained is not None:
if not (os.path.isdir(pretrained) or
os.path.exists(pretrained + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(pretrained))
param_state_dict = paddle.load(pretrained + '.pdparams')
model.set_dict(param_state_dict)
return model
def ResNet101(pretrained=None, **args):
model = ResNet(layers=101, **args)
if pretrained is not None:
if not (os.path.isdir(pretrained) or
os.path.exists(pretrained + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(pretrained))
param_state_dict = paddle.load(pretrained + '.pdparams')
model.set_dict(param_state_dict)
return model
def ResNet152(**args):
model = ResNet(layers=152, **args)
return model
......@@ -10,3 +10,5 @@ Cython
pycocotools
#xtcocotools==1.6 #only for crowdpose
setuptools>=42.0.0
#lap #for mot
#motmetrics #for mot
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册