From 56d22694f75b00e89cf694ccc069e4a027b2b6fb Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Tue, 8 Mar 2022 12:03:05 +0800 Subject: [PATCH] [MOT] unify mot and det output format (#5320) --- .../python/mot/tracker/deepsort_tracker.py | 8 +-- .../python/mot/tracker/jde_tracker.py | 51 ++++++++++++------- deploy/python/mot_jde_infer.py | 4 +- deploy/python/mot_sde_infer.py | 5 +- ppdet/engine/tracker.py | 4 +- .../modeling/mot/tracker/deepsort_tracker.py | 8 +-- ppdet/modeling/mot/tracker/jde_tracker.py | 51 ++++++++++++------- ppdet/modeling/post_process.py | 3 +- ppdet/modeling/reid/jde_embedding_head.py | 5 +- 9 files changed, 82 insertions(+), 57 deletions(-) diff --git a/deploy/pptracking/python/mot/tracker/deepsort_tracker.py b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py index 045cfba7f..d42447580 100644 --- a/deploy/pptracking/python/mot/tracker/deepsort_tracker.py +++ b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py @@ -90,13 +90,13 @@ class DeepSORTTracker(object): Perform measurement update and track management. Args: pred_dets (np.array): Detection results of the image, the shape is - [N, 6], means 'x0, y0, x1, y1, score, cls_id'. + [N, 6], means 'cls_id, score, x0, y0, x1, y1'. pred_embs (np.array): Embedding results of the image, the shape is [N, 128], usually pred_embs.shape[1] is a multiple of 128. """ - pred_tlwhs = pred_dets[:, :4] - pred_scores = pred_dets[:, 4:5] - pred_cls_ids = pred_dets[:, 5:] + pred_cls_ids = pred_dets[:, 0:1] + pred_scores = pred_dets[:, 1:2] + pred_tlwhs = pred_dets[:, 2:6] detections = [ Detection(tlwh, score, feat, cls_id) diff --git a/deploy/pptracking/python/mot/tracker/jde_tracker.py b/deploy/pptracking/python/mot/tracker/jde_tracker.py index 11bac733a..45d4965b4 100644 --- a/deploy/pptracking/python/mot/tracker/jde_tracker.py +++ b/deploy/pptracking/python/mot/tracker/jde_tracker.py @@ -100,7 +100,7 @@ class JDETracker(object): Args: pred_dets (np.array): Detection results of the image, the shape is - [N, 6], means 'x0, y0, x1, y1, score, cls_id'. + [N, 6], means 'cls_id, score, x0, y0, x1, y1'. pred_embs (np.array): Embedding results of the image, the shape is [N, 128] or [N, 512]. @@ -122,7 +122,7 @@ class JDETracker(object): # unify single and multi classes detection and embedding results for cls_id in range(self.num_classes): - cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1) + cls_idx = (pred_dets[:, 0:1] == cls_id).squeeze(-1) pred_dets_dict[cls_id] = pred_dets[cls_idx] if pred_embs is not None: pred_embs_dict[cls_id] = pred_embs[cls_idx] @@ -133,21 +133,26 @@ class JDETracker(object): """ Step 1: Get detections by class""" pred_dets_cls = pred_dets_dict[cls_id] pred_embs_cls = pred_embs_dict[cls_id] - remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1) + remain_inds = (pred_dets_cls[:, 1:2] > self.conf_thres).squeeze(-1) if remain_inds.sum() > 0: pred_dets_cls = pred_dets_cls[remain_inds] if self.use_byte: detections = [ STrack( - STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id, 30, temp_feat=None) - for tlbrs in pred_dets_cls + STrack.tlbr_to_tlwh(tlbrs[2:6]), + tlbrs[1], + cls_id, + 30, + temp_feat=None) for tlbrs in pred_dets_cls ] else: pred_embs_cls = pred_embs_cls[remain_inds] detections = [ STrack( - STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id, 30, temp_feat) - for (tlbrs, temp_feat) in zip(pred_dets_cls, pred_embs_cls) + STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id, + 30, temp_feat) + for (tlbrs, temp_feat + ) in zip(pred_dets_cls, pred_embs_cls) ] else: detections = [] @@ -171,14 +176,17 @@ class JDETracker(object): STrack.multi_predict(track_pool_dict[cls_id], self.motion) if self.use_byte: - dists = matching.iou_distance(track_pool_dict[cls_id], detections) + dists = matching.iou_distance(track_pool_dict[cls_id], + detections) matches, u_track, u_detection = matching.linear_assignment( - dists, thresh=self.match_thres) # not self.tracked_thresh + dists, thresh=self.match_thres) # not self.tracked_thresh else: dists = matching.embedding_distance( - track_pool_dict[cls_id], detections, metric=self.metric_type) - dists = matching.fuse_motion(self.motion, dists, - track_pool_dict[cls_id], detections) + track_pool_dict[cls_id], + detections, + metric=self.metric_type) + dists = matching.fuse_motion( + self.motion, dists, track_pool_dict[cls_id], detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.tracked_thresh) @@ -199,15 +207,20 @@ class JDETracker(object): # None of the steps below happen if there are no undetected tracks. """ Step 3: Second association, with IOU""" if self.use_byte: - inds_low = pred_dets_dict[cls_id][:, 4:5] > self.low_conf_thres - inds_high = pred_dets_dict[cls_id][:, 4:5] < self.conf_thres + inds_low = pred_dets_dict[cls_id][:, 1:2] > self.low_conf_thres + inds_high = pred_dets_dict[cls_id][:, 1:2] < self.conf_thres inds_second = np.logical_and(inds_low, inds_high).squeeze(-1) pred_dets_cls_second = pred_dets_dict[cls_id][inds_second] # association the untrack to the low score detections if len(pred_dets_cls_second) > 0: detections_second = [ - STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id, 30, temp_feat=None) + STrack( + STrack.tlbr_to_tlwh(tlbrs[:4]), + tlbrs[4], + cls_id, + 30, + temp_feat=None) for tlbrs in pred_dets_cls_second[:, :5] ] else: @@ -216,9 +229,10 @@ class JDETracker(object): track_pool_dict[cls_id][i] for i in u_track if track_pool_dict[cls_id][i].state == TrackState.Tracked ] - dists = matching.iou_distance(r_tracked_stracks, detections_second) + dists = matching.iou_distance(r_tracked_stracks, + detections_second) matches, u_track, u_detection_second = matching.linear_assignment( - dists, thresh=0.4) # not r_tracked_thresh + dists, thresh=0.4) # not r_tracked_thresh else: detections = [detections[i] for i in u_detection] r_tracked_stracks = [] @@ -232,7 +246,8 @@ class JDETracker(object): for i_tracked, idet in matches: track = r_tracked_stracks[i_tracked] - det = detections[idet] if not self.use_byte else detections_second[idet] + det = detections[ + idet] if not self.use_byte else detections_second[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_tracks_dict[cls_id].append(track) diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py index 04603b446..99033caba 100644 --- a/deploy/python/mot_jde_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -115,7 +115,7 @@ class JDE_Detector(Detector): return result def tracking(self, det_results): - pred_dets = det_results['pred_dets'] + pred_dets = det_results['pred_dets'] # 'cls_id, score, x0, y0, x1, y1' pred_embs = det_results['pred_embs'] online_targets_dict = self.tracker.update(pred_dets, pred_embs) @@ -143,7 +143,7 @@ class JDE_Detector(Detector): repeats (int): repeats number for prediction Returns: result (dict): include 'pred_dets': np.ndarray: shape:[N,6], N: number of box, - matix element:[x_min, y_min, x_max, y_max, score, class] + matix element:[class, score, x_min, y_min, x_max, y_max] FairMOT(JDE)'s result include 'pred_embs': np.ndarray: shape: [N, 128] ''' diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index f54c8c312..37c4cdae0 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -111,11 +111,8 @@ class SDE_Detector(Detector): low_conf_thres=low_conf_thres) def tracking(self, det_results): - pred_dets = det_results['boxes'] + pred_dets = det_results['boxes'] # 'cls_id, score, x0, y0, x1, y1' pred_embs = None - pred_dets = np.concatenate( - (pred_dets[:, 2:], pred_dets[:, 1:2], pred_dets[:, 0:1]), 1) - # pred_dets should be 'x0, y0, x1, y1, score, cls_id' online_targets_dict = self.tracker.update(pred_dets, pred_embs) online_tlwhs = defaultdict(list) diff --git a/ppdet/engine/tracker.py b/ppdet/engine/tracker.py index 794e00045..daa7a47af 100644 --- a/ppdet/engine/tracker.py +++ b/ppdet/engine/tracker.py @@ -282,14 +282,14 @@ class Tracker(object): # thus will not inference reid model continue - pred_scores = pred_scores[keep_idx[0]] pred_cls_ids = pred_cls_ids[keep_idx[0]] + pred_scores = pred_scores[keep_idx[0]] pred_tlwhs = np.concatenate( (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), axis=1) pred_dets = np.concatenate( - (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) + (pred_cls_ids, pred_scores, pred_tlwhs), axis=1) tracker = self.model.tracker crops = get_crops( diff --git a/ppdet/modeling/mot/tracker/deepsort_tracker.py b/ppdet/modeling/mot/tracker/deepsort_tracker.py index ef38a67f9..fe5aa25b3 100644 --- a/ppdet/modeling/mot/tracker/deepsort_tracker.py +++ b/ppdet/modeling/mot/tracker/deepsort_tracker.py @@ -96,13 +96,13 @@ class DeepSORTTracker(object): Perform measurement update and track management. Args: pred_dets (np.array): Detection results of the image, the shape is - [N, 6], means 'x0, y0, x1, y1, score, cls_id'. + [N, 6], means 'cls_id, score, x0, y0, x1, y1'. pred_embs (np.array): Embedding results of the image, the shape is [N, 128], usually pred_embs.shape[1] is a multiple of 128. """ - pred_tlwhs = pred_dets[:, :4] - pred_scores = pred_dets[:, 4:5] - pred_cls_ids = pred_dets[:, 5:] + pred_cls_ids = pred_dets[:, 0:1] + pred_scores = pred_dets[:, 1:2] + pred_tlwhs = pred_dets[:, 2:6] detections = [ Detection(tlwh, score, feat, cls_id) diff --git a/ppdet/modeling/mot/tracker/jde_tracker.py b/ppdet/modeling/mot/tracker/jde_tracker.py index cc4b7316a..c5ba97a34 100644 --- a/ppdet/modeling/mot/tracker/jde_tracker.py +++ b/ppdet/modeling/mot/tracker/jde_tracker.py @@ -106,7 +106,7 @@ class JDETracker(object): Args: pred_dets (np.array): Detection results of the image, the shape is - [N, 6], means 'x0, y0, x1, y1, score, cls_id'. + [N, 6], means 'cls_id, score, x0, y0, x1, y1'. pred_embs (np.array): Embedding results of the image, the shape is [N, 128] or [N, 512]. @@ -128,7 +128,7 @@ class JDETracker(object): # unify single and multi classes detection and embedding results for cls_id in range(self.num_classes): - cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1) + cls_idx = (pred_dets[:, 0:1] == cls_id).squeeze(-1) pred_dets_dict[cls_id] = pred_dets[cls_idx] if pred_embs is not None: pred_embs_dict[cls_id] = pred_embs[cls_idx] @@ -139,21 +139,26 @@ class JDETracker(object): """ Step 1: Get detections by class""" pred_dets_cls = pred_dets_dict[cls_id] pred_embs_cls = pred_embs_dict[cls_id] - remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1) + remain_inds = (pred_dets_cls[:, 1:2] > self.conf_thres).squeeze(-1) if remain_inds.sum() > 0: pred_dets_cls = pred_dets_cls[remain_inds] if self.use_byte: detections = [ STrack( - STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id, 30, temp_feat=None) - for tlbrs in pred_dets_cls + STrack.tlbr_to_tlwh(tlbrs[2:6]), + tlbrs[1], + cls_id, + 30, + temp_feat=None) for tlbrs in pred_dets_cls ] else: pred_embs_cls = pred_embs_cls[remain_inds] detections = [ STrack( - STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id, 30, temp_feat) - for (tlbrs, temp_feat) in zip(pred_dets_cls, pred_embs_cls) + STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id, + 30, temp_feat) + for (tlbrs, temp_feat + ) in zip(pred_dets_cls, pred_embs_cls) ] else: detections = [] @@ -177,14 +182,17 @@ class JDETracker(object): STrack.multi_predict(track_pool_dict[cls_id], self.motion) if self.use_byte: - dists = matching.iou_distance(track_pool_dict[cls_id], detections) + dists = matching.iou_distance(track_pool_dict[cls_id], + detections) matches, u_track, u_detection = matching.linear_assignment( - dists, thresh=self.match_thres) # + dists, thresh=self.match_thres) # not self.tracked_thresh else: dists = matching.embedding_distance( - track_pool_dict[cls_id], detections, metric=self.metric_type) - dists = matching.fuse_motion(self.motion, dists, - track_pool_dict[cls_id], detections) + track_pool_dict[cls_id], + detections, + metric=self.metric_type) + dists = matching.fuse_motion( + self.motion, dists, track_pool_dict[cls_id], detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.tracked_thresh) @@ -205,15 +213,20 @@ class JDETracker(object): # None of the steps below happen if there are no undetected tracks. """ Step 3: Second association, with IOU""" if self.use_byte: - inds_low = pred_dets_dict[cls_id][:, 4:5] > self.low_conf_thres - inds_high = pred_dets_dict[cls_id][:, 4:5] < self.conf_thres + inds_low = pred_dets_dict[cls_id][:, 1:2] > self.low_conf_thres + inds_high = pred_dets_dict[cls_id][:, 1:2] < self.conf_thres inds_second = np.logical_and(inds_low, inds_high).squeeze(-1) pred_dets_cls_second = pred_dets_dict[cls_id][inds_second] # association the untrack to the low score detections if len(pred_dets_cls_second) > 0: detections_second = [ - STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id, 30, temp_feat=None) + STrack( + STrack.tlbr_to_tlwh(tlbrs[:4]), + tlbrs[4], + cls_id, + 30, + temp_feat=None) for tlbrs in pred_dets_cls_second[:, :5] ] else: @@ -222,9 +235,10 @@ class JDETracker(object): track_pool_dict[cls_id][i] for i in u_track if track_pool_dict[cls_id][i].state == TrackState.Tracked ] - dists = matching.iou_distance(r_tracked_stracks, detections_second) + dists = matching.iou_distance(r_tracked_stracks, + detections_second) matches, u_track, u_detection_second = matching.linear_assignment( - dists, thresh=0.4) # not r_tracked_thresh + dists, thresh=0.4) # not r_tracked_thresh else: detections = [detections[i] for i in u_detection] r_tracked_stracks = [] @@ -238,7 +252,8 @@ class JDETracker(object): for i_tracked, idet in matches: track = r_tracked_stracks[i_tracked] - det = detections[idet] if not self.use_byte else detections_second[idet] + det = detections[ + idet] if not self.use_byte else detections_second[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_tracks_dict[cls_id].append(track) diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py index 967d43db0..8a78bf3f0 100644 --- a/ppdet/modeling/post_process.py +++ b/ppdet/modeling/post_process.py @@ -504,11 +504,10 @@ class CenterNetPostProcess(TTFBox): boxes_shape = bboxes.shape[:] scale_expand = paddle.expand(scale_expand, shape=boxes_shape) bboxes = paddle.divide(bboxes, scale_expand) + results = paddle.concat([clses, scores, bboxes], axis=1) if self.for_mot: - results = paddle.concat([bboxes, scores, clses], axis=1) return results, inds, topk_clses else: - results = paddle.concat([clses, scores, bboxes], axis=1) return results, paddle.shape(results)[0:1], topk_clses diff --git a/ppdet/modeling/reid/jde_embedding_head.py b/ppdet/modeling/reid/jde_embedding_head.py index c35f8cfb0..1d1e60f3c 100644 --- a/ppdet/modeling/reid/jde_embedding_head.py +++ b/ppdet/modeling/reid/jde_embedding_head.py @@ -152,9 +152,8 @@ class JDEEmbeddingHead(nn.Layer): scale_factor = targets['scale_factor'][0].numpy() bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape, im_shape, scale_factor) - # tlwhs, scores, cls_ids - pred_dets = paddle.concat( - (bboxes[:, 2:], bboxes[:, 1:2], bboxes[:, 0:1]), axis=1) + # cls_ids, scores, tlwhs + pred_dets = bboxes return pred_dets, pred_embs def scale_coords(self, coords, input_shape, im_shape, scale_factor): -- GitLab