未验证 提交 51d311d3 编写于 作者: G George Ni 提交者: GitHub

[MOT][dev] JDE FairMOT deploy (#3226)

* jde fairmot export and video python infer
上级 9d942c50
...@@ -27,24 +27,10 @@ EvalMOTDataset: ...@@ -27,24 +27,10 @@ EvalMOTDataset:
task: MOT16_train task: MOT16_train
dataset_dir: dataset/mot dataset_dir: dataset/mot
data_root: MOT16/images/train data_root: MOT16/images/train
keep_ori_im: False # set True if save visualization images or video keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
# for MOT video inference # for MOT video inference
TestMOTDataset: TestMOTDataset:
!MOTVideoDataset !MOTVideoDataset
dataset_dir: dataset/mot dataset_dir: dataset/mot
keep_ori_im: True # set True if save visualization images or video keep_ori_im: True # set True if save visualization images or video
# for detection or reid evaluation, following the JDE paper, but no use in MOT evaluation
EvalDataset:
!MOTDataSet
dataset_dir: dataset/mot
image_lists: ['citypersons.val', 'caltech.val'] # for detection evaluation
# image_lists: ['caltech.10k.val', 'cuhksysu.val', 'prw.val'] # for reid evaluation
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
# for detection inference, no use in MOT inference
TestDataset:
!ImageFolder
dataset_dir: dataset/mot
TestReader:
inputs_def:
image_shape: [3, 608, 1088]
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [608, 1088]}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_size: 1
EvalMOTReader: EvalMOTReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
...@@ -22,7 +11,6 @@ TestMOTReader: ...@@ -22,7 +11,6 @@ TestMOTReader:
inputs_def: inputs_def:
image_shape: [3, 608, 1088] image_shape: [3, 608, 1088]
sample_transforms: sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [608, 1088]} - LetterBoxResize: {target_size: [608, 1088]}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {} - Permute: {}
......
architecture: DeepSORT architecture: DeepSORT
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams pretrain_weights: None
DeepSORT: DeepSORT:
detector: YOLOv3 detector: YOLOv3 # JDE version
reid: PCBPyramid reid: PCBPyramid
tracker: DeepSORTTracker tracker: DeepSORTTracker
# JDE version for MOT dataset
YOLOv3: YOLOv3:
backbone: DarkNet backbone: DarkNet
neck: YOLOv3FPN neck: YOLOv3FPN
yolo_head: YOLOv3Head yolo_head: YOLOv3Head
post_process: BBoxPostProcess post_process: JDEBBoxPostProcess
DarkNet: DarkNet:
depth: 53 depth: 53
return_idx: [2, 3, 4] return_idx: [2, 3, 4]
freeze_norm: True
# use default config YOLOv3FPN:
# YOLOv3FPN: freeze_norm: True
YOLOv3Head: YOLOv3Head:
anchors: [[10, 13], [16, 30], [33, 23], anchors: [[128,384], [180,540], [256,640], [512,640],
[30, 61], [62, 45], [59, 119], [32,96], [45,135], [64,192], [90,271],
[116, 90], [156, 198], [373, 326]] [8,24], [11,34], [16,48], [23,68]]
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] anchor_masks: [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
loss: YOLOv3Loss loss: JDEDetectionLoss
YOLOv3Loss: JDEBBoxPostProcess:
ignore_thresh: 0.7
downsample: [32, 16, 8]
label_smooth: false
BBoxPostProcess:
decode: decode:
name: YOLOBox name: JDEBox
conf_thresh: 0.2 conf_thresh: 0.3
downsample_ratio: 32 downsample_ratio: 32
clip_bbox: true
nms: nms:
name: MultiClassNMS name: MultiClassNMS
keep_top_k: 100 keep_top_k: 500
score_threshold: 0.01 score_threshold: 0.01
nms_threshold: 0.45 nms_threshold: 0.5
nms_top_k: 1000 nms_top_k: 2000
normalized: true
return_idx: false
PCBPyramid: PCBPyramid:
num_conv_out_channels: 128 num_conv_out_channels: 128
...@@ -52,7 +50,7 @@ DeepSORTTracker: ...@@ -52,7 +50,7 @@ DeepSORTTracker:
budget: 100 budget: 100
max_age: 70 max_age: 70
n_init: 3 n_init: 3
metric_type: 'cosine' metric_type: cosine
matching_threshold: 0.2 matching_threshold: 0.2
max_iou_distance: 0.9 max_iou_distance: 0.9
motion: 'KalmanFilter' motion: KalmanFilter
...@@ -5,14 +5,12 @@ _BASE_: [ ...@@ -5,14 +5,12 @@ _BASE_: [
'_base_/deepsort_reader_1088x608.yml', '_base_/deepsort_reader_1088x608.yml',
] ]
metric: MOT
EvalMOTDataset: EvalMOTDataset:
!MOTImageFolder !MOTImageFolder
task: MOT16_train task: MOT16_train
dataset_dir: dataset/mot dataset_dir: dataset/mot
data_root: MOT16/images/train data_root: MOT16/images/train
keep_ori_im: True # set True if used in DeepSORT keep_ori_im: True # set as True in DeepSORT
det_weights: None det_weights: None
reid_weights: https://paddledet.bj.bcebos.com/models/mot/deepsort_pcb_pyramid_r101.pdparams reid_weights: https://paddledet.bj.bcebos.com/models/mot/deepsort_pcb_pyramid_r101.pdparams
......
_BASE_: [
'../../datasets/mot.yml',
'../../runtime.yml',
'_base_/deepsort_yolov3_darknet53_pcb_pyramid_r101.yml',
'_base_/deepsort_reader_1088x608.yml',
]
EvalMOTDataset:
!MOTImageFolder
task: MOT16_train
dataset_dir: dataset/mot
data_root: MOT16/images/train
keep_ori_im: True # set as True in DeepSORT
det_weights: https://paddledet.bj.bcebos.com/models/mot/jde_yolov3_darknet53_30e_1088x608.pdparams
reid_weights: https://paddledet.bj.bcebos.com/models/mot/deepsort_pcb_pyramid_r101.pdparams
DeepSORT:
detector: YOLOv3
reid: PCBPyramid
tracker: DeepSORTTracker
# JDE version for MOT dataset
YOLOv3:
backbone: DarkNet
neck: YOLOv3FPN
yolo_head: YOLOv3Head
post_process: JDEBBoxPostProcess
...@@ -28,38 +28,6 @@ TrainReader: ...@@ -28,38 +28,6 @@ TrainReader:
use_shared_memory: true use_shared_memory: true
EvalReader:
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [608, 1088]}
- BboxXYXY2XYWH: {}
- NormalizeBox: {}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_transforms:
- Gt2JDETargetMax:
anchor_masks: [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
anchors: [[[128,384], [180,540], [256,640], [512,640]],
[[32,96], [45,135], [64,192], [90,271]],
[[8,24], [11,34], [16,48], [23,68]]]
downsample_ratios: [32, 16, 8]
max_iou_thresh: 0.60
- BboxCXCYWH2XYXY: {}
- Norm2PixelBbox: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 608, 1088]
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [608, 1088]}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_size: 1
EvalMOTReader: EvalMOTReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
......
...@@ -28,38 +28,6 @@ TrainReader: ...@@ -28,38 +28,6 @@ TrainReader:
use_shared_memory: true use_shared_memory: true
EvalReader:
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [320, 576]}
- BboxXYXY2XYWH: {}
- NormalizeBox: {}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_transforms:
- Gt2JDETargetMax:
anchor_masks: [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
anchors: [[[85,255], [120,320], [170,320], [340,320]],
[[21,64], [30,90], [43,128], [60,180]],
[[6,16], [8,23], [11,32], [16,45]]]
downsample_ratios: [32, 16, 8]
max_iou_thresh: 0.60
- BboxCXCYWH2XYXY: {}
- Norm2PixelBbox: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 320, 576]
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [320, 576]}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_size: 1
EvalMOTReader: EvalMOTReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
......
...@@ -28,38 +28,6 @@ TrainReader: ...@@ -28,38 +28,6 @@ TrainReader:
use_shared_memory: true use_shared_memory: true
EvalReader:
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [480, 864]}
- BboxXYXY2XYWH: {}
- NormalizeBox: {}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_transforms:
- Gt2JDETargetMax:
anchor_masks: [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
anchors: [[[102,305], [143, 429], [203,508], [407,508]],
[[25,76], [36,107], [51,152], [71,215]],
[[6,19], [9,27], [13,38], [18,54]]]
downsample_ratios: [32, 16, 8]
max_iou_thresh: 0.60
- BboxCXCYWH2XYXY: {}
- Norm2PixelBbox: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 480, 864]
sample_transforms:
- Decode: {}
- LetterBoxResize: {target_size: [480, 864]}
- NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- Permute: {}
batch_size: 1
EvalMOTReader: EvalMOTReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import yaml
import cv2
import numpy as np
import paddle
from benchmark_utils import PaddleInferBenchmark
from preprocess import preprocess, NormalizeImage, Permute
from mot_preprocess import LetterBoxResize
from tracker import JDETracker
from ppdet.modeling.mot import visualization as mot_vis
from ppdet.modeling.mot.utils import Timer as MOTTimer
from paddle.inference import Config
from paddle.inference import create_predictor
from utils import argsparser, Timer, get_current_memory_mb
from infer import get_test_images, print_arguments
# Global dictionary
MOT_SUPPORT_MODELS = {
'JDE',
'FairMOT',
}
class MOT_Detector(object):
"""
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
pred_config,
model_dir,
use_gpu=False,
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
self.pred_config = pred_config
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
self.tracker = JDETracker()
def preprocess(self, im):
preprocess_ops = []
for op_info in self.pred_config.preprocess_infos:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
preprocess_ops.append(eval(op_type)(**new_op_info))
im, im_info = preprocess(im, preprocess_ops)
inputs = create_inputs(im, im_info)
return inputs
def postprocess(self, pred_dets, pred_embs):
online_targets = self.tracker.update(pred_dets, pred_embs)
online_tlwhs, online_ids = [], []
for t in online_targets:
tlwh = t.tlwh
tid = t.track_id
vertical = tlwh[2] / tlwh[3] > 1.6
if tlwh[2] * tlwh[3] > self.tracker.min_box_area and not vertical:
online_tlwhs.append(tlwh)
online_ids.append(tid)
return online_tlwhs, online_ids
def predict(self, image, threshold=0.5, repeats=1):
'''
Args:
image (dict): dict(['image', 'im_shape', 'scale_factor'])
threshold (float): threshold of predicted box' score
Returns:
online_tlwhs, online_ids (np.ndarray)
'''
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
pred_dets, pred_embs = None, None
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
pred_dets = boxes_tensor.copy_to_cpu()
embs_tensor = self.predictor.get_output_handle(output_names[1])
pred_embs = embs_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
online_tlwhs, online_ids = self.postprocess(pred_dets, pred_embs)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
return online_tlwhs, online_ids
def create_inputs(im, im_info):
"""generate input for different model type
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
model_arch (str): model type
Returns:
inputs (dict): input of model
"""
inputs = {}
inputs['image'] = np.array((im, )).astype('float32')
inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info['scale_factor'], )).astype('float32')
return inputs
class PredictConfig_MOT():
"""set config of preprocess, postprocess and visualize
Args:
model_dir (str): root path of model.yml
"""
def __init__(self, model_dir):
# parsing Yaml config for Preprocess
deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
self.check_model(yml_conf)
self.arch = yml_conf['arch']
self.preprocess_infos = yml_conf['Preprocess']
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']
self.mask = False
self.use_dynamic_shape = yml_conf['use_dynamic_shape']
if 'mask' in yml_conf:
self.mask = yml_conf['mask']
self.print_config()
def check_model(self, yml_conf):
"""
Raises:
ValueError: loaded model not in supported model type
"""
for support_model in MOT_SUPPORT_MODELS:
if support_model in yml_conf['arch']:
return True
raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
'arch'], MOT_SUPPORT_MODELS))
def print_config(self):
print('----------- Model Configuration -----------')
print('%s: %s' % ('Model Arch', self.arch))
print('%s: ' % ('Transform Order'))
for op_info in self.preprocess_infos:
print('--%s: %s' % ('transform op', op_info['type']))
print('--------------------------------------------')
def load_predictor(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
batch_size (int): size of pre batch in inference
use_gpu (bool): whether use gpu
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need use_gpu == True.
"""
if not use_gpu and not run_mode == 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if use_gpu:
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
if enable_mkldnn:
try:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
except Exception as e:
print(
"The current environment does not support `mkldnn`, so disable mkldnn."
)
pass
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=1 << 10,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=trt_calib_mode)
if use_dynamic_shape:
min_input_shape = {'image': [1, 3, trt_min_shape, trt_min_shape]}
max_input_shape = {'image': [1, 3, trt_max_shape, trt_max_shape]}
opt_input_shape = {'image': [1, 3, trt_opt_shape, trt_opt_shape]}
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
opt_input_shape)
print('trt set dynamic shape done!')
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, config
def predict_video(detector, camera_id):
if camera_id != -1:
capture = cv2.VideoCapture(camera_id)
video_name = 'mot_output.mp4'
else:
capture = cv2.VideoCapture(FLAGS.video_file)
video_name = os.path.split(FLAGS.video_file)[-1]
fps = 30
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print('frame_count', frame_count)
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
# yapf: disable
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# yapf: enable
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name)
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0
timer = MOTTimer()
while (1):
ret, frame = capture.read()
if not ret:
break
timer.tic()
online_tlwhs, online_ids = detector.predict(frame, FLAGS.threshold)
timer.toc()
online_im = mot_vis.plot_tracking(
frame,
online_tlwhs,
online_ids,
frame_id=frame_id,
fps=1. / timer.average_time)
frame_id += 1
print('detect frame:%d' % (frame_id))
im = np.array(online_im)
writer.write(im)
if camera_id != -1:
cv2.imshow('Tracking Detection', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
writer.release()
def main():
pred_config = PredictConfig_MOT(FLAGS.model_dir)
detector = MOT_Detector(
pred_config,
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
predict_video(detector, FLAGS.camera_id)
else:
print('MOT models do not support predict single image.')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
class LetterBoxResize(object):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
height, width = self.target_size
h, w = im.shape[:2]
im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
new_shape = [round(h * ratio), round(w * ratio)]
im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
return im, im_info
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_tracker
from . import deepsort_tracker
from .jde_tracker import *
from .deepsort_tracker import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py
"""
import numpy as np
from ppdet.modeling.mot.matching.deepsort_matching import NearestNeighborDistanceMetric
from ppdet.modeling.mot.matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
from ppdet.modeling.mot.tracker.base_sde_tracker import Track
__all__ = ['DeepSORTTracker']
class DeepSORTTracker(object):
__inject__ = ['motion']
"""
DeepSORT tracker
Args:
img_size (list): input image size, [h, w]
budget (int): If not None, fix samples per class to at most this number.
Removes the oldest samples when the budget is reached.
max_age (int): maximum number of missed misses before a track is deleted
n_init (float): Number of frames that a track remains in initialization
phase. Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
matching_threshold (float): samples with larger distance are
considered an invalid match.
max_iou_distance (float): max iou distance threshold
motion (object): KalmanFilter instance
"""
def __init__(self,
img_size=[608, 1088],
budget=100,
max_age=30,
n_init=3,
metric_type='cosine',
matching_threshold=0.2,
max_iou_distance=0.7,
motion='KalmanFilter'):
self.img_size = img_size
self.max_age = max_age
self.n_init = n_init
self.metric = NearestNeighborDistanceMetric(metric_type,
matching_threshold, budget)
self.max_iou_distance = max_iou_distance
self.motion = motion
self.tracks = []
self._next_id = 1
def predict(self):
"""
Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.motion)
def update(self, detections):
"""
Perform measurement update and track management.
Args:
detections (list): List[ppdet.modeling.mot.utils.Detection]
A list of detections at the current time step.
"""
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# Update track set.
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.motion,
detections[detection_idx])
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
continue
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
self.metric.partial_fit(
np.asarray(features), np.asarray(targets), active_targets)
output_stracks = self.tracks
return output_stracks
def _match(self, detections):
def gated_metric(tracks, dets, track_indices, detection_indices):
features = np.array([dets[i].feature for i in detection_indices])
targets = np.array([tracks[i].track_id for i in track_indices])
cost_matrix = self.metric.distance(features, targets)
cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks,
dets, track_indices,
detection_indices)
return cost_matrix
# Split track set into confirmed and unconfirmed tracks.
confirmed_tracks = [
i for i, t in enumerate(self.tracks) if t.is_confirmed()
]
unconfirmed_tracks = [
i for i, t in enumerate(self.tracks) if not t.is_confirmed()
]
# Associate confirmed tracks using appearance features.
matches_a, unmatched_tracks_a, unmatched_detections = \
matching_cascade(
gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks)
# Associate remaining tracks together with unconfirmed tracks using IOU.
iou_track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update == 1
]
unmatched_tracks_a = [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update != 1
]
matches_b, unmatched_tracks_b, unmatched_detections = \
min_cost_matching(
iou_cost, self.max_iou_distance, self.tracks,
detections, iou_track_candidates, unmatched_detections)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
mean, covariance = self.motion.initiate(detection.to_xyah())
self.tracks.append(
Track(mean, covariance, self._next_id, self.n_init, self.max_age,
detection.feature))
self._next_id += 1
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import numpy as np
from ppdet.modeling.mot.matching import jde_matching as matching
from ppdet.modeling.mot.motion import KalmanFilter
from ppdet.modeling.mot.tracker.base_jde_tracker import TrackState, BaseTrack, STrack
from ppdet.modeling.mot.tracker.base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
__all__ = ['JDETracker']
class JDETracker(object):
__inject__ = ['motion']
"""
JDE tracker
Args:
det_thresh (float): threshold of detection score
track_buffer (int): buffer for tracker
min_box_area (int): min box area to filter out low quality boxes
tracked_thresh (float): linear assignment threshold of tracked
stracks and detections
r_tracked_thresh (float): linear assignment threshold of
tracked stracks and unmatched detections
unconfirmed_thresh (float): linear assignment threshold of
unconfirmed stracks and unmatched detections
motion (object): KalmanFilter instance
conf_thres (float): confidence threshold for tracking
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
"""
def __init__(self,
det_thresh=0.3,
track_buffer=30,
min_box_area=200,
tracked_thresh=0.7,
r_tracked_thresh=0.5,
unconfirmed_thresh=0.7,
motion='KalmanFilter',
conf_thres=0,
metric_type='euclidean'):
self.det_thresh = det_thresh
self.track_buffer = track_buffer
self.min_box_area = min_box_area
self.tracked_thresh = tracked_thresh
self.r_tracked_thresh = r_tracked_thresh
self.unconfirmed_thresh = unconfirmed_thresh
self.motion = KalmanFilter()
self.conf_thres = conf_thres
self.metric_type = metric_type
self.frame_id = 0
self.tracked_stracks = []
self.lost_stracks = []
self.removed_stracks = []
self.max_time_lost = 0
# max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
def update(self, pred_dets, pred_embs):
"""
Processes the image frame and finds bounding box(detections).
Associates the detection with corresponding tracklets and also handles
lost, removed, refound and active tracklets.
Args:
pred_dets (Tensor): Detection results of the image, shape is [N, 5].
pred_embs (Tensor): Embedding results of the image, shape is [N, 512].
Return:
output_stracks (list): The list contains information regarding the
online_tracklets for the recieved image tensor.
"""
self.frame_id += 1
activated_starcks = []
# for storing active tracks, for the current frame
refind_stracks = []
# Lost Tracks whose detections are obtained in the current frame
lost_stracks = []
# The tracks which are not obtained in the current frame but are not
# removed. (Lost for some time lesser than the threshold for removing)
removed_stracks = []
remain_inds = np.nonzero(pred_dets[:, 4] > self.conf_thres)
if len(remain_inds) == 0:
pred_dets = np.zeros([0, 1])
pred_embs = np.zeros([0, 1])
else:
pred_dets = pred_dets[remain_inds]
pred_embs = pred_embs[remain_inds]
# Filter out the image with box_num = 0. pred_dets = [[0.0, 0.0, 0.0 ,0.0]]
empty_pred = True if len(pred_dets) == 1 and np.sum(
pred_dets) == 0.0 else False
""" Step 1: Network forward, get detections & embeddings"""
if len(pred_dets) > 0 and not empty_pred:
detections = [
STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
for (tlbrs, f) in zip(pred_dets, pred_embs)
]
else:
detections = []
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
# previous tracks which are not active in the current frame are added in unconfirmed list
unconfirmed.append(track)
else:
# Active tracks are added to the local list 'tracked_stracks'
tracked_stracks.append(track)
""" Step 2: First association, with embedding"""
# Combining currently tracked_stracks and lost_stracks
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
STrack.multi_predict(strack_pool, self.motion)
dists = matching.embedding_distance(
strack_pool, detections, metric=self.metric_type)
dists = matching.fuse_motion(self.motion, dists, strack_pool,
detections)
# The dists is the list of distances of the detection with the tracks in strack_pool
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.tracked_thresh)
# The matches is the array for corresponding matches of the detection with the corresponding strack_pool
for itracked, idet in matches:
# itracked is the id of the track and idet is the detection
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
# If the track is active, add the detection to the track
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
# We have obtained a detection from a track which is not active,
# hence put the track in refind_stracks list
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# None of the steps below happen if there are no undetected tracks.
""" Step 3: Second association, with IOU"""
detections = [detections[i] for i in u_detection]
# detections is now a list of the unmatched detections
r_tracked_stracks = []
# This is container for stracks which were tracked till the previous
# frame but no detection was found for it in the current frame.
for i in u_track:
if strack_pool[i].state == TrackState.Tracked:
r_tracked_stracks.append(strack_pool[i])
dists = matching.iou_distance(r_tracked_stracks, detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.r_tracked_thresh)
# matches is the list of detections which matched with corresponding
# tracks by IOU distance method.
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# Same process done for some unmatched detections, but now considering IOU_distance as measure
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
# If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(
dists, thresh=self.unconfirmed_thresh)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_starcks.append(unconfirmed[itracked])
# The tracks which are yet not matched
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
# after all these confirmation steps, if a new detection is found, it is initialized for a new track
""" Step 4: Init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.motion, self.frame_id)
activated_starcks.append(track)
""" Step 5: Update state"""
# If the tracks are lost for more frames than the threshold number, the tracks are removed.
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
# Update the self.tracked_stracks and self.lost_stracks using the updates in this step.
self.tracked_stracks = [
t for t in self.tracked_stracks if t.state == TrackState.Tracked
]
self.tracked_stracks = joint_stracks(self.tracked_stracks,
activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks,
refind_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
self.tracked_stracks, self.lost_stracks)
# get scores of lost tracks
output_stracks = [
track for track in self.tracked_stracks if track.is_activated
]
return output_stracks
...@@ -39,9 +39,13 @@ TRT_MIN_SUBGRAPH = { ...@@ -39,9 +39,13 @@ TRT_MIN_SUBGRAPH = {
'SOLOv2': 60, 'SOLOv2': 60,
'HigherHRNet': 3, 'HigherHRNet': 3,
'HRNet': 3, 'HRNet': 3,
'DeepSORT': 3,
'JDE': 3,
'FairMOT': 3,
} }
KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet'] KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
...@@ -54,7 +58,9 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): ...@@ -54,7 +58,9 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
label_list = [str(cat) for cat in catid2name.values()] label_list = [str(cat) for cat in catid2name.values()]
sample_transforms = reader_cfg['sample_transforms'] sample_transforms = reader_cfg['sample_transforms']
for st in sample_transforms[1:]: if arch != 'mot_arch':
sample_transforms = sample_transforms[1:]
for st in sample_transforms:
for key, value in st.items(): for key, value in st.items():
p = {'type': key} p = {'type': key}
if key == 'Resize': if key == 'Resize':
...@@ -106,9 +112,17 @@ def _dump_infer_config(config, path, image_shape, model): ...@@ -106,9 +112,17 @@ def _dump_infer_config(config, path, image_shape, model):
label_arch = 'detection_arch' label_arch = 'detection_arch'
if infer_arch in KEYPOINT_ARCH: if infer_arch in KEYPOINT_ARCH:
label_arch = 'keypoint_arch' label_arch = 'keypoint_arch'
if infer_arch in MOT_ARCH:
label_arch = 'mot_arch'
reader_cfg = config['TestMOTReader']
dataset_cfg = config['TestMOTDataset']
else:
reader_cfg = config['TestReader']
dataset_cfg = config['TestDataset']
infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader( infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader(
config['TestReader'], config['TestDataset'], config['metric'], reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape)
label_arch, image_shape)
if infer_arch == 'S2ANet': if infer_arch == 'S2ANet':
# TODO: move background to num_classes # TODO: move background to num_classes
......
...@@ -32,7 +32,6 @@ from ppdet.metrics import Metric, MOTMetric ...@@ -32,7 +32,6 @@ from ppdet.metrics import Metric, MOTMetric
import ppdet.utils.stats as stats import ppdet.utils.stats as stats
from .callbacks import Callback, ComposeCallback from .callbacks import Callback, ComposeCallback
from .export_utils import _dump_infer_config
from ppdet.utils.logger import setup_logger from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__) logger = setup_logger(__name__)
...@@ -104,7 +103,9 @@ class Tracker(object): ...@@ -104,7 +103,9 @@ class Tracker(object):
def load_weights_sde(self, det_weights, reid_weights): def load_weights_sde(self, det_weights, reid_weights):
if self.model.detector: if self.model.detector:
load_weight(self.model.detector, det_weights, self.optimizer) load_weight(self.model.detector, det_weights)
load_weight(self.model.reid, reid_weights)
else:
load_weight(self.model.reid, reid_weights, self.optimizer) load_weight(self.model.reid, reid_weights, self.optimizer)
def _eval_seq_jde(self, def _eval_seq_jde(self,
...@@ -130,7 +131,8 @@ class Tracker(object): ...@@ -130,7 +131,8 @@ class Tracker(object):
# forward # forward
timer.tic() timer.tic()
online_targets = self.model(data) pred_dets, pred_embs = self.model(data)
online_targets = self.model.tracker.update(pred_dets, pred_embs)
online_tlwhs, online_ids = [], [] online_tlwhs, online_ids = [], []
for t in online_targets: for t in online_targets:
...@@ -199,7 +201,9 @@ class Tracker(object): ...@@ -199,7 +201,9 @@ class Tracker(object):
# forward # forward
timer.tic() timer.tic()
online_targets = self.model(data) detections = self.model(data)
self.model.tracker.predict()
online_targets = self.model.tracker.update(detections)
online_tlwhs = [] online_tlwhs = []
online_ids = [] online_ids = []
...@@ -239,6 +243,7 @@ class Tracker(object): ...@@ -239,6 +243,7 @@ class Tracker(object):
"model_type should be 'JDE', 'DeepSORT' or 'FairMOT'" "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
# run tracking # run tracking
n_frame = 0 n_frame = 0
timer_avgs, timer_calls = [], [] timer_avgs, timer_calls = [], []
for seq in seqs: for seq in seqs:
......
...@@ -17,6 +17,7 @@ from __future__ import division ...@@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import sys
import time import time
import random import random
import datetime import datetime
...@@ -46,6 +47,8 @@ logger = setup_logger('ppdet.engine') ...@@ -46,6 +47,8 @@ logger = setup_logger('ppdet.engine')
__all__ = ['Trainer'] __all__ = ['Trainer']
MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
class Trainer(object): class Trainer(object):
def __init__(self, cfg, mode='train'): def __init__(self, cfg, mode='train'):
...@@ -57,7 +60,15 @@ class Trainer(object): ...@@ -57,7 +60,15 @@ class Trainer(object):
self.is_loaded_weights = False self.is_loaded_weights = False
# build data loader # build data loader
if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
else:
self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())] self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
if cfg.architecture == 'DeepSORT' and self.mode == 'train':
logger.error('DeepSORT has no need of training on mot dataset.')
sys.exit(1)
if self.mode == 'train': if self.mode == 'train':
self.loader = create('{}Reader'.format(self.mode.capitalize()))( self.loader = create('{}Reader'.format(self.mode.capitalize()))(
self.dataset, cfg.worker_num) self.dataset, cfg.worker_num)
...@@ -225,15 +236,16 @@ class Trainer(object): ...@@ -225,15 +236,16 @@ class Trainer(object):
if self.is_loaded_weights: if self.is_loaded_weights:
return return
self.start_epoch = 0 self.start_epoch = 0
if hasattr(self.model, 'detector'):
if self.model.__class__.__name__ == 'FairMOT':
load_pretrain_weight(self.model, weights)
else:
load_pretrain_weight(self.model.detector, weights)
else:
load_pretrain_weight(self.model, weights) load_pretrain_weight(self.model, weights)
logger.debug("Load weights {} to start training".format(weights)) logger.debug("Load weights {} to start training".format(weights))
def load_weights_sde(self, det_weights, reid_weights):
if self.model.detector:
load_weight(self.model.detector, det_weights)
load_weight(self.model.reid, reid_weights)
else:
load_weight(self.model.reid, reid_weights)
def resume_weights(self, weights): def resume_weights(self, weights):
# support Distill resume weights # support Distill resume weights
if hasattr(self.model, 'student_model'): if hasattr(self.model, 'student_model'):
...@@ -472,8 +484,12 @@ class Trainer(object): ...@@ -472,8 +484,12 @@ class Trainer(object):
if not os.path.exists(save_dir): if not os.path.exists(save_dir):
os.makedirs(save_dir) os.makedirs(save_dir)
image_shape = None image_shape = None
if 'inputs_def' in self.cfg['TestReader']: if self.cfg.architecture in MOT_ARCH:
inputs_def = self.cfg['TestReader']['inputs_def'] test_reader_name = 'TestMOTReader'
else:
test_reader_name = 'TestReader'
if 'inputs_def' in self.cfg[test_reader_name]:
inputs_def = self.cfg[test_reader_name]['inputs_def']
image_shape = inputs_def.get('image_shape', None) image_shape = inputs_def.get('image_shape', None)
# set image_shape=[3, -1, -1] as default # set image_shape=[3, -1, -1] as default
if image_shape is None: if image_shape is None:
......
...@@ -61,7 +61,6 @@ class DeepSORT(BaseArch): ...@@ -61,7 +61,6 @@ class DeepSORT(BaseArch):
} }
def _forward(self): def _forward(self):
assert 'ori_image' in self.inputs
load_dets = 'pred_bboxes' in self.inputs and 'pred_scores' in self.inputs load_dets = 'pred_bboxes' in self.inputs and 'pred_scores' in self.inputs
ori_image = self.inputs['ori_image'] ori_image = self.inputs['ori_image']
...@@ -102,10 +101,7 @@ class DeepSORT(BaseArch): ...@@ -102,10 +101,7 @@ class DeepSORT(BaseArch):
else: else:
detections = [] detections = []
self.tracker.predict() return detections
online_targets = self.tracker.update(detections)
return online_targets
def get_pred(self): def get_pred(self):
return self._forward() return self._forward()
...@@ -91,12 +91,9 @@ class FairMOT(BaseArch): ...@@ -91,12 +91,9 @@ class FairMOT(BaseArch):
embedding = paddle.transpose(embedding, [0, 2, 3, 1]) embedding = paddle.transpose(embedding, [0, 2, 3, 1])
embedding = paddle.reshape(embedding, embedding = paddle.reshape(embedding,
[-1, paddle.shape(embedding)[-1]]) [-1, paddle.shape(embedding)[-1]])
id_feature = paddle.gather(embedding, bbox_inds) pred_embs = paddle.gather(embedding, bbox_inds)
dets = det_outs['bbox'] pred_dets = det_outs['bbox']
id_feature = id_feature return pred_dets, pred_embs
# Note: the tracker only considers batch_size=1 and num_classses=1
online_targets = self.tracker.update(dets, id_feature)
return online_targets
def get_pred(self): def get_pred(self):
output = self._forward() output = self._forward()
......
...@@ -37,7 +37,7 @@ class JDE(BaseArch): ...@@ -37,7 +37,7 @@ class JDE(BaseArch):
tracker (object): tracker instance tracker (object): tracker instance
metric (str): 'MOTDet' for training and detection evaluation, 'ReID' metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
for ReID embedding evaluation, or 'MOT' for multi object tracking for ReID embedding evaluation, or 'MOT' for multi object tracking
evaluation evaluation.
""" """
def __init__(self, def __init__(self,
...@@ -107,11 +107,11 @@ class JDE(BaseArch): ...@@ -107,11 +107,11 @@ class JDE(BaseArch):
pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1) pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1)
boxes_idx = paddle.cast(boxes_idx, 'int64')
emb_valid = paddle.gather_nd(emb_outs, boxes_idx) emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx) pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
online_targets = self.tracker.update(pred_dets, pred_embs) return pred_dets, pred_embs
return online_targets
else: else:
raise ValueError("Unknown metric {} for multi object tracking.". raise ValueError("Unknown metric {} for multi object tracking.".
......
...@@ -863,9 +863,7 @@ class JDEBox(object): ...@@ -863,9 +863,7 @@ class JDEBox(object):
return paddle.stack([gx1, gy1, gx2, gy2], axis=1) return paddle.stack([gx1, gy1, gx2, gy2], axis=1)
def decode_delta_map(self, delta_map, anchors): def decode_delta_map(self, delta_map, anchors):
delta_map_shape = paddle.shape(delta_map) nB, nA, nGh, nGw, _ = delta_map.shape[:]
delta_map_shape.stop_gradient = True
nB, nA, nGh, nGw, _ = delta_map_shape[:]
anchor_mesh = self.generate_anchor(nGh, nGw, anchors) anchor_mesh = self.generate_anchor(nGh, nGw, anchors)
# only support bs=1 # only support bs=1
anchor_mesh = paddle.unsqueeze(anchor_mesh, 0) anchor_mesh = paddle.unsqueeze(anchor_mesh, 0)
...@@ -875,9 +873,27 @@ class JDEBox(object): ...@@ -875,9 +873,27 @@ class JDEBox(object):
delta_map, shape=[-1, 4]), delta_map, shape=[-1, 4]),
paddle.reshape( paddle.reshape(
anchor_mesh, shape=[-1, 4])) anchor_mesh, shape=[-1, 4]))
pred_map = paddle.reshape(pred_list, shape=[nB, -1, 4]) pred_map = paddle.reshape(pred_list, shape=[nB, nA * nGh * nGw, 4])
return pred_map return pred_map
def _postprocessing_by_level(self, nA, stride, head_out, anchor_vec):
boxes_shape = head_out.shape
nB, nGh, nGw = 1, boxes_shape[-2], boxes_shape[-1]
# only support bs=1
p = paddle.reshape(
head_out, shape=[nB, nA, self.num_classes + 5, nGh, nGw])
p = paddle.transpose(p, perm=[0, 1, 3, 4, 2]) # [nB, 4, nGh, nGw, 6]
p_box = p[:, :, :, :, :4]
boxes = self.decode_delta_map(p_box, anchor_vec) # [nB, 4*nGh*nGw, 4]
boxes = boxes * stride
p_conf = paddle.transpose(
p[:, :, :, :, 4:6], perm=[0, 4, 1, 2, 3]) # [nB, 2, 4, 19, 34]
p_conf = F.softmax(
p_conf, axis=1)[:, 1, :, :, :].unsqueeze(-1) # [nB, 4, 19, 34, 1]
scores = paddle.reshape(p_conf, shape=[nB, nA * nGh * nGw, 1])
return boxes, scores
def __call__(self, yolo_head_out, anchors): def __call__(self, yolo_head_out, anchors):
bbox_pred_list = [] bbox_pred_list = []
for i, head_out in enumerate(yolo_head_out): for i, head_out in enumerate(yolo_head_out):
...@@ -885,43 +901,16 @@ class JDEBox(object): ...@@ -885,43 +901,16 @@ class JDEBox(object):
anc_w, anc_h = anchors[i][0::2], anchors[i][1::2] anc_w, anc_h = anchors[i][0::2], anchors[i][1::2]
anchor_vec = np.stack((anc_w, anc_h), axis=1) / stride anchor_vec = np.stack((anc_w, anc_h), axis=1) / stride
nA = len(anc_w) nA = len(anc_w)
boxes_shape = paddle.shape(head_out) boxes, scores = self._postprocessing_by_level(nA, stride, head_out,
boxes_shape.stop_gradient = True anchor_vec)
nB, nGh, nGw = boxes_shape[0], boxes_shape[-2], boxes_shape[-1]
p = head_out.reshape((nB, nA, self.num_classes + 5, nGh, nGw))
p = paddle.transpose(p, perm=[0, 1, 3, 4, 2]) # [nB, 4, 19, 34, 6]
p_box = p[:, :, :, :, :4] # [nB, 4, 19, 34, 4]
boxes = self.decode_delta_map(p_box, anchor_vec) # [nB, 4*19*34, 4]
boxes = boxes * stride
p_conf = paddle.transpose(
p[:, :, :, :, 4:6], perm=[0, 4, 1, 2, 3]) # [nB, 2, 4, 19, 34]
p_conf = F.softmax(
p_conf,
axis=1)[:, 1, :, :, :].unsqueeze(-1) # [nB, 4, 19, 34, 1]
scores = paddle.reshape(p_conf, shape=[nB, -1, 1])
bbox_pred_list.append(paddle.concat([boxes, scores], axis=-1)) bbox_pred_list.append(paddle.concat([boxes, scores], axis=-1))
yolo_boxes_pred = paddle.concat(bbox_pred_list, axis=1) yolo_boxes_scores = paddle.concat(bbox_pred_list, axis=1)
boxes_idx = paddle.nonzero(yolo_boxes_pred[:, :, -1] > self.conf_thresh) boxes_idx_over_conf_thr = paddle.nonzero(
boxes_idx.stop_gradient = True yolo_boxes_scores[:, :, -1] > self.conf_thresh)
if boxes_idx.shape[0] == 0: # TODO: deploy boxes_idx_over_conf_thr.stop_gradient = True
boxes_idx = paddle.to_tensor(np.array([[0]], dtype='int64'))
yolo_boxes_out = paddle.to_tensor( return boxes_idx_over_conf_thr, yolo_boxes_scores
np.array(
[[[0.0, 0.0, 0.0, 0.0]]], dtype='float32'))
yolo_scores_out = paddle.to_tensor(
np.array(
[[[0.0]]], dtype='float32'))
return boxes_idx, yolo_boxes_out, yolo_scores_out
yolo_boxes = paddle.gather_nd(yolo_boxes_pred, boxes_idx)
yolo_boxes_out = paddle.reshape(yolo_boxes[:, :4], shape=[nB, -1, 4])
yolo_scores_out = paddle.reshape(yolo_boxes[:, 4:5], shape=[nB, 1, -1])
boxes_idx = boxes_idx[:, 1:]
return boxes_idx, yolo_boxes_out, yolo_scores_out # [163], [1, 163, 4], [1, 1, 163]
@register @register
......
...@@ -121,14 +121,13 @@ def load_det_results(det_file, num_frames): ...@@ -121,14 +121,13 @@ def load_det_results(det_file, num_frames):
def scale_coords(coords, input_shape, im_shape, scale_factor): def scale_coords(coords, input_shape, im_shape, scale_factor):
im_shape = im_shape.numpy()[0] im_shape = im_shape.numpy()[0]
ratio = scale_factor.numpy()[0][0] ratio = scale_factor[0][0]
img0_shape = [int(im_shape[0] / ratio), int(im_shape[1] / ratio)] pad_w = (input_shape[1] - int(im_shape[1])) / 2
pad_h = (input_shape[0] - int(im_shape[0])) / 2
pad_w = (input_shape[1] - round(img0_shape[1] * ratio)) / 2 coords = paddle.cast(coords, 'float32')
pad_h = (input_shape[0] - round(img0_shape[0] * ratio)) / 2
coords[:, 0::2] -= pad_w coords[:, 0::2] -= pad_w
coords[:, 1::2] -= pad_h coords[:, 1::2] -= pad_h
coords[:, 0:4] /= paddle.to_tensor(ratio) coords[:, 0:4] /= ratio
coords[:, :4] = paddle.clip(coords[:, :4], min=0, max=coords[:, :4].max()) coords[:, :4] = paddle.clip(coords[:, :4], min=0, max=coords[:, :4].max())
return coords.round() return coords.round()
......
...@@ -329,8 +329,38 @@ class S2ANetBBoxPostProcess(nn.Layer): ...@@ -329,8 +329,38 @@ class S2ANetBBoxPostProcess(nn.Layer):
@register @register
class JDEBBoxPostProcess(BBoxPostProcess): class JDEBBoxPostProcess(nn.Layer):
def __call__(self, head_out, anchors): __shared__ = ['num_classes']
__inject__ = ['decode', 'nms']
def __init__(self, num_classes=1, decode=None, nms=None, return_idx=True):
super(JDEBBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.decode = decode
self.nms = nms
self.return_idx = return_idx
self.fake_bbox_pred = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
self.fake_nms_keep_idx = paddle.to_tensor(
np.array(
[[0]], dtype='int32'))
self.fake_yolo_boxes_out = paddle.to_tensor(
np.array(
[[[0.0, 0.0, 0.0, 0.0]]], dtype='float32'))
self.fake_yolo_scores_out = paddle.to_tensor(
np.array(
[[[0.0]]], dtype='float32'))
self.fake_boxes_idx = paddle.to_tensor(np.array([[0]], dtype='int64'))
def forward(self,
head_out,
anchors,
im_shape=[[608, 1088]],
scale_factor=[[1.0, 1.0]]):
""" """
Decode the bbox and do NMS for JDE model. Decode the bbox and do NMS for JDE model.
...@@ -345,17 +375,31 @@ class JDEBBoxPostProcess(BBoxPostProcess): ...@@ -345,17 +375,31 @@ class JDEBBoxPostProcess(BBoxPostProcess):
bbox_num (Tensor): The number of prediction of each batch with shape [N]. bbox_num (Tensor): The number of prediction of each batch with shape [N].
nms_keep_idx (Tensor): The index of kept bboxes after NMS. nms_keep_idx (Tensor): The index of kept bboxes after NMS.
""" """
boxes_idx, bboxes, score = self.decode(head_out, anchors) boxes_idx, yolo_boxes_scores = self.decode(head_out, anchors)
bbox_pred, bbox_num, nms_keep_idx = self.nms(bboxes, score,
self.num_classes)
if bbox_pred.shape[0] == 0:
bbox_pred = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
nms_keep_idx = paddle.to_tensor(np.array([[0]], dtype='int32'))
if len(boxes_idx) == 0:
boxes_idx = self.fake_boxes_idx
yolo_boxes_out = self.fake_yolo_boxes_out
yolo_scores_out = self.fake_yolo_scores_out
else:
yolo_boxes = paddle.gather_nd(yolo_boxes_scores, boxes_idx)
# TODO: only support bs=1 now
yolo_boxes_out = paddle.reshape(
yolo_boxes[:, :4], shape=[1, len(boxes_idx), 4])
yolo_scores_out = paddle.reshape(
yolo_boxes[:, 4:5], shape=[1, 1, len(boxes_idx)])
boxes_idx = boxes_idx[:, 1:]
bbox_pred, bbox_num, nms_keep_idx = self.nms(
yolo_boxes_out, yolo_scores_out, self.num_classes)
if bbox_pred.shape[0] == 0:
bbox_pred = self.fake_bbox_pred
bbox_num = self.fake_bbox_num
nms_keep_idx = self.fake_nms_keep_idx
if self.return_idx:
return boxes_idx, bbox_pred, bbox_num, nms_keep_idx return boxes_idx, bbox_pred, bbox_num, nms_keep_idx
else:
return bbox_pred, bbox_num
@register @register
...@@ -420,7 +464,7 @@ class CenterNetPostProcess(TTFBox): ...@@ -420,7 +464,7 @@ class CenterNetPostProcess(TTFBox):
x2 = xs + wh[:, 0:1] / 2 x2 = xs + wh[:, 0:1] / 2
y2 = ys + wh[:, 1:2] / 2 y2 = ys + wh[:, 1:2] / 2
n, c, feat_h, feat_w = paddle.shape(hm) n, c, feat_h, feat_w = hm.shape[:]
padw = (feat_w * self.down_ratio - im_shape[0, 1]) / 2 padw = (feat_w * self.down_ratio - im_shape[0, 1]) / 2
padh = (feat_h * self.down_ratio - im_shape[0, 0]) / 2 padh = (feat_h * self.down_ratio - im_shape[0, 0]) / 2
x1 = x1 * self.down_ratio x1 = x1 * self.down_ratio
......
...@@ -60,7 +60,7 @@ class JDEEmbeddingHead(nn.Layer): ...@@ -60,7 +60,7 @@ class JDEEmbeddingHead(nn.Layer):
def __init__( def __init__(
self, self,
num_classes=1, num_classes=1,
num_identifiers=1, # defined by dataset.total_identities num_identifiers=14455, # defined by dataset.total_identities when training
anchor_levels=3, anchor_levels=3,
anchor_scales=4, anchor_scales=4,
embedding_dim=512, embedding_dim=512,
......
...@@ -47,7 +47,7 @@ def parse_args(): ...@@ -47,7 +47,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--det_results_dir", "--det_results_dir",
type=str, type=str,
default=None, default='',
help="Directory name for detection results.") help="Directory name for detection results.")
parser.add_argument( parser.add_argument(
'--output_dir', '--output_dir',
......
...@@ -63,6 +63,12 @@ def run(FLAGS, cfg): ...@@ -63,6 +63,12 @@ def run(FLAGS, cfg):
trainer = Trainer(cfg, mode='test') trainer = Trainer(cfg, mode='test')
# load weights # load weights
if cfg.architecture in ['DeepSORT']:
if cfg.det_weights != 'None':
trainer.load_weights_sde(cfg.det_weights, cfg.reid_weights)
else:
trainer.load_weights_sde(None, cfg.reid_weights)
else:
trainer.load_weights(cfg.weights) trainer.load_weights(cfg.weights)
# export model # export model
......
...@@ -49,7 +49,7 @@ def parse_args(): ...@@ -49,7 +49,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--det_results_dir", "--det_results_dir",
type=str, type=str,
default=None, default='',
help="Directory name for detection results.") help="Directory name for detection results.")
parser.add_argument( parser.add_argument(
'--output_dir', '--output_dir',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册