未验证 提交 b2f3ad7c 编写于 作者: F Feng Ni 提交者: GitHub

[MOT] refine deepsort, fix jde (#4490)

上级 d4a7c9e0
......@@ -17,19 +17,21 @@ import time
import yaml
import cv2
import numpy as np
import paddle
from benchmark_utils import PaddleInferBenchmark
from preprocess import preprocess
from tracker import DeepSORTTracker
from ppdet.modeling.mot import visualization as mot_vis
from ppdet.modeling.mot.utils import MOTTimer
from collections import defaultdict
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from preprocess import preprocess
from utils import argsparser, Timer, get_current_memory_mb
from infer import get_test_images, print_arguments, PredictConfig, Detector
from mot_jde_infer import write_mot_results
from infer import Detector, get_test_images, print_arguments, PredictConfig
from infer import load_predictor
from benchmark_utils import PaddleInferBenchmark
from ppdet.modeling.mot.tracker import DeepSORTTracker
from ppdet.modeling.mot.visualization import plot_tracking
from ppdet.modeling.mot.utils import MOTTimer, write_mot_results
# Global dictionary
......@@ -362,7 +364,7 @@ def predict_image(detector, reid_model, image_list):
online_tlwhs, online_scores, online_ids = reid_model.predict(
crops, pred_dets)
online_im = mot_vis.plot_tracking(
online_im = plot_tracking(
frame, online_tlwhs, online_ids, online_scores, frame_id=i)
if FLAGS.save_images:
......@@ -396,7 +398,7 @@ def predict_video(detector, reid_model, camera_id):
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0
timer = MOTTimer()
results = []
results = defaultdict(list)
while (1):
ret, frame = capture.read()
if not ret:
......@@ -415,12 +417,12 @@ def predict_video(detector, reid_model, camera_id):
crops = reid_model.get_crops(pred_xyxys, frame)
online_tlwhs, online_scores, online_ids = reid_model.predict(
crops, pred_dets)
(frame_id + 1, online_tlwhs, online_scores, online_ids))
fps = 1. / timer.average_time
im = mot_vis.plot_tracking(
im = plot_tracking(
......@@ -437,23 +439,6 @@ def predict_video(detector, reid_model, camera_id):
if FLAGS.save_mot_txt_per_img:
save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
if not os.path.exists(save_dir):
result_filename = os.path.join(save_dir,
# First few frames, the model may have no tracking results but have
# detection results,use the detection results instead, and set id -1.
if results[-1][2] == []:
tlwhs = [tlwh for tlwh in pred_dets[:, :4]]
scores = [score[0] for score in pred_dets[:, 4:5]]
ids = [-1] * len(tlwhs)
result = (frame_id + 1, tlwhs, scores, ids)
result = results[-1]
write_mot_results(result_filename, [result])
frame_id += 1
print('detect frame:%d' % (frame_id))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
from . import deepsort_tracker
from .deepsort_tracker import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py
import numpy as np
from ppdet.modeling.mot.motion import KalmanFilter
from ppdet.modeling.mot.matching.deepsort_matching import NearestNeighborDistanceMetric
from ppdet.modeling.mot.matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
from ppdet.modeling.mot.tracker.base_sde_tracker import Track
from ppdet.modeling.mot.utils import Detection
__all__ = ['DeepSORTTracker']
class DeepSORTTracker(object):
DeepSORT tracker
input_size (list): input feature map size to reid model, [h, w] format,
[64, 192] as default.
min_box_area (int): min box area to filter out low quality boxes
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
bad results, set 1.6 default for pedestrian tracking. If set <=0
means no need to filter bboxes.
budget (int): If not None, fix samples per class to at most this number.
Removes the oldest samples when the budget is reached.
max_age (int): maximum number of missed misses before a track is deleted
n_init (float): Number of frames that a track remains in initialization
phase. Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
matching_threshold (float): samples with larger distance are
considered an invalid match.
max_iou_distance (float): max iou distance threshold
motion (object): KalmanFilter instance
def __init__(self,
input_size=[64, 192],
self.input_size = input_size
self.min_box_area = min_box_area
self.vertical_ratio = vertical_ratio
self.max_age = max_age
self.n_init = n_init
self.metric = NearestNeighborDistanceMetric(metric_type,
matching_threshold, budget)
self.max_iou_distance = max_iou_distance
self.motion = KalmanFilter()
self.tracks = []
self._next_id = 1
def predict(self):
Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
for track in self.tracks:
def update(self, pred_dets, pred_embs):
pred_dets (Tensor): Detection results of the image, shape is [N, 6].
pred_embs (Tensor): Embedding results of the image, shape is [N, 128],
usually pred_embs.shape[1] can be a multiple of 128, in PCB
Pyramidal model is 128*21.
pred_tlwhs = pred_dets[:, :4]
pred_scores = pred_dets[:, 4:5]
pred_cls_ids = pred_dets[:, 5:]
detections = [
Detection(tlwh, score, feat, cls_id)
for tlwh, score, feat, cls_id in zip(pred_tlwhs, pred_scores,
pred_embs, pred_cls_ids)
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = \
# Update track set.
for track_idx, detection_idx in matches:
for track_idx in unmatched_tracks:
for detection_idx in unmatched_detections:
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
np.asarray(features), np.asarray(targets), active_targets)
output_stracks = self.tracks
return output_stracks
def _match(self, detections):
def gated_metric(tracks, dets, track_indices, detection_indices):
features = np.array([dets[i].feature for i in detection_indices])
targets = np.array([tracks[i].track_id for i in track_indices])
cost_matrix = self.metric.distance(features, targets)
cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks,
dets, track_indices,
return cost_matrix
# Split track set into confirmed and unconfirmed tracks.
confirmed_tracks = [
i for i, t in enumerate(self.tracks) if t.is_confirmed()
unconfirmed_tracks = [
i for i, t in enumerate(self.tracks) if not t.is_confirmed()
# Associate confirmed tracks using appearance features.
matches_a, unmatched_tracks_a, unmatched_detections = \
gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks)
# Associate remaining tracks together with unconfirmed tracks using IOU.
iou_track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update == 1
unmatched_tracks_a = [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update != 1
matches_b, unmatched_tracks_b, unmatched_detections = \
iou_cost, self.max_iou_distance, self.tracks,
detections, iou_track_candidates, unmatched_detections)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
mean, covariance = self.motion.initiate(detection.to_xyah())
Track(mean, covariance, self._next_id, self.n_init, self.max_age,
detection.cls_id, detection.score, detection.feature))
self._next_id += 1
......@@ -184,7 +184,7 @@ class Tracker(object):
use_detector = False if not self.model.detector else True
timer = MOTTimer()
results = []
results = defaultdict(list)
frame_id = 0
self.status['mode'] = 'track'
......@@ -269,6 +269,7 @@ class Tracker(object):
data.update({'crops': crops})
pred_embs = self.model(data)
pred_dets, pred_embs = pred_dets.numpy(), pred_embs.numpy()
online_targets = tracker.update(pred_dets, pred_embs)
......@@ -291,7 +292,7 @@ class Tracker(object):
# save results
(frame_id + 1, online_tlwhs, online_scores, online_ids))
save_vis_results(data, frame_id, online_ids, online_tlwhs,
online_scores, timer.average_time, show_image,
......@@ -105,7 +105,7 @@ class JDE(BaseArch):
nms_keep_idx = det_outs['nms_keep_idx']
pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1)
pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2], bbox[:, 0:1]), axis=1)
emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
......@@ -17,6 +17,7 @@ This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_so
import numpy as np
from ..motion import KalmanFilter
from ..matching.deepsort_matching import NearestNeighborDistanceMetric
from ..matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
from .base_sde_tracker import Track
......@@ -32,7 +33,6 @@ __all__ = ['DeepSORTTracker']
class DeepSORTTracker(object):
__inject__ = ['motion']
DeepSORT tracker
......@@ -77,7 +77,8 @@ class DeepSORTTracker(object):
self.metric = NearestNeighborDistanceMetric(metric_type,
matching_threshold, budget)
self.max_iou_distance = max_iou_distance
self.motion = motion
if motion == 'KalmanFilter':
self.motion = KalmanFilter()
self.tracks = []
self._next_id = 1
......@@ -94,14 +95,14 @@ class DeepSORTTracker(object):
Perform measurement update and track management.
pred_dets (Tensor): Detection results of the image, shape is [N, 6].
pred_embs (Tensor): Embedding results of the image, shape is [N, 128],
usually pred_embs.shape[1] can be a multiple of 128, in PCB
Pyramidal model is 128*21.
pred_dets (np.array): Detection results of the image, the shape is
[N, 6], means 'x0, y0, x1, y1, score, cls_id'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128], usually pred_embs.shape[1] is a multiple of 128.
pred_tlwhs = pred_dets[:, :4]
pred_scores = pred_dets[:, 4:5].squeeze(1)
pred_cls_ids = pred_dets[:, 5:].squeeze(1)
pred_scores = pred_dets[:, 4:5]
pred_cls_ids = pred_dets[:, 5:]
detections = [
Detection(tlwh, score, feat, cls_id)
......@@ -21,9 +21,9 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from paddle import ParamAttr
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingNormal
from paddle.nn.initializer import KaimingNormal, XavierNormal
from ppdet.core.workspace import register
__all__ = ['PPLCNetEmbedding']
......@@ -250,6 +250,17 @@ class PPLCNet(nn.Layer):
return x
class FC(nn.Layer):
def __init__(self, input_ch, output_ch):
super(FC, self).__init__()
weight_attr = ParamAttr(initializer=XavierNormal())
self.fc = paddle.nn.Linear(input_ch, output_ch, weight_attr=weight_attr)
def forward(self, x):
out = self.fc(x)
return out
class PPLCNetEmbedding(nn.Layer):
......@@ -262,7 +273,7 @@ class PPLCNetEmbedding(nn.Layer):
def __init__(self, scale=2.5, input_ch=1280, output_ch=512):
super(PPLCNetEmbedding, self).__init__()
self.backbone = PPLCNet(scale=scale)
self.neck = nn.Linear(input_ch, output_ch)
self.neck = FC(input_ch, output_ch)
def forward(self, x):
feat = self.backbone(x)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册