diff --git a/deploy/pptracking/python/README.md b/deploy/pptracking/python/README.md index d68d2c1742ef453f490e574180e28d4bb74b69d5..f48459058a7e8e55939633fc90397f971ffe18c5 100644 --- a/deploy/pptracking/python/README.md +++ b/deploy/pptracking/python/README.md @@ -58,11 +58,12 @@ python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/fa ### 2.1 导出预测模型 Step 1:导出检测模型 ```bash -# 导出JDE YOLOv3行人检测模型 -CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/jde_yolov3_darknet53_30e_1088x608_mix.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams -# 或导出PPYOLOv2行人检测模型 +# 导出PPYOLOv2行人检测模型 CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyolov2_r50vd_dcn_365e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyolov2_r50vd_dcn_365e_640x640_mot17half.pdparams +# 或导出PPYOLOe行人检测模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyoloe_crn_l_36e_640x640_mot17half.pdparams ``` + Step 2:导出行人ReID模型 ```bash # 导出PCB Pyramid ReID模型 @@ -76,11 +77,10 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid # 下载行人跟踪demo视频: wget https://bj.bcebos.com/v1/paddledet/data/mot/demo/mot17_demo.mp4 -# 用导出的JDE YOLOv3行人检测模型和PCB Pyramid ReID模型 -python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/jde_yolov3_darknet53_30e_1088x608_mix/ --reid_model_dir=output_inference/deepsort_pcb_pyramid_r101/ --video_file=mot17_demo.mp4 --device=GPU --threshold=0.5 --save_mot_txts --save_images - -# 或用导出的PPYOLOv2行人检测模型和PPLCNet ReID模型 -python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/ppyolov2_r50vd_dcn_365e_640x640_mot17half/ --reid_model_dir=output_inference/deepsort_pplcnet/ --video_file=mot17_demo.mp4 --device=GPU --threshold=0.5 --scaled=True --save_mot_txts --save_images +# 用导出的PPYOLOv2行人检测模型和PPLCNet ReID模型 +python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/ppyolov2_r50vd_dcn_365e_640x640_mot17half/ --reid_model_dir=output_inference/deepsort_pplcnet/ --video_file=mot17_demo.mp4 --device=GPU --threshold=0.5 --save_mot_txts --save_images +# 或用导出的PPYOLOe行人检测模型和PPLCNet ReID模型 +python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/ppyoloe_crn_l_36e_640x640_mot17half/ --reid_model_dir=output_inference/deepsort_pplcnet/ --video_file=mot17_demo.mp4 --device=GPU --threshold=0.5 --save_mot_txts --save_images ``` ### 2.3 用导出的模型基于Python去预测车辆跟踪 @@ -97,17 +97,16 @@ wget https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicl tar -xvf deepsort_pplcnet_vehicle.tar # 用导出的PicoDet车辆检测模型和PPLCNet车辆ReID模型 -python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --device=GPU --scaled=True --threshold=0.5 --video_file={your video}.mp4 --save_mot_txts --save_images +python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --device=GPU --threshold=0.5 --video_file={your video}.mp4 --save_mot_txts --save_images # 用导出的PP-YOLOv2车辆检测模型和PPLCNet车辆ReID模型 -python deploy/pptracking/python/mot_sde_infer.py --model_dir=ppyolov2_r50vd_dcn_365e_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --device=GPU --scaled=True --threshold=0.5 --video_file={your video}.mp4 --save_mot_txts --save_images +python deploy/pptracking/python/mot_sde_infer.py --model_dir=ppyolov2_r50vd_dcn_365e_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --device=GPU --threshold=0.5 --video_file={your video}.mp4 --save_mot_txts --save_images ``` **注意:** - 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt)或`--save_images`表示保存跟踪结果可视化图片。 - 跟踪结果txt文件每行信息是`frame,id,x1,y1,w,h,score,-1,-1,-1`。 - `--threshold`表示结果可视化的置信度阈值,默认为0.5,低于该阈值的结果会被过滤掉,为了可视化效果更佳,可根据实际情况自行修改。 - - `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。 - DeepSORT算法不支持多类别跟踪,只支持单类别跟踪,且ReID模型最好是与检测模型同一类别的物体训练过的,比如行人跟踪最好使用行人ReID模型,车辆跟踪最好使用车辆ReID模型。 @@ -135,94 +134,22 @@ wget https://paddledet.bj.bcebos.com/data/mot/demo/mtmct-demo.tar tar -xvf mtmct-demo.tar # 用导出的PicoDet车辆检测模型和PPLCNet车辆ReID模型 -python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir=mtmct-demo --mtmct_cfg=mtmct_cfg --device=GPU --scaled=True --threshold=0.5 --save_mot_txts --save_images +python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir=mtmct-demo --mtmct_cfg=mtmct_cfg.yml --device=GPU --threshold=0.5 --save_mot_txts --save_images # 用导出的PP-YOLOv2车辆检测模型和PPLCNet车辆ReID模型 -python deploy/pptracking/python/mot_sde_infer.py --model_dir=ppyolov2_r50vd_dcn_365e_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir=mtmct-demo --mtmct_cfg=mtmct_cfg --device=GPU --scaled=True --threshold=0.5 --save_mot_txts --save_images +python deploy/pptracking/python/mot_sde_infer.py --model_dir=ppyolov2_r50vd_dcn_365e_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir=mtmct-demo --mtmct_cfg=mtmct_cfg.yml --device=GPU --threshold=0.5 --save_mot_txts --save_images ``` **注意:** - 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt),或`--save_images`表示保存跟踪结果可视化图片。 - 跨镜头跟踪结果txt文件每行信息是`camera_id,frame,id,x1,y1,w,h,-1,-1`。 - `--threshold`表示结果可视化的置信度阈值,默认为0.5,低于该阈值的结果会被过滤掉,为了可视化效果更佳,可根据实际情况自行修改。 - - `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。 - DeepSORT算法不支持多类别跟踪,只支持单类别跟踪,且ReID模型最好是与检测模型同一类别的物体训练过的,比如行人跟踪最好使用行人ReID模型,车辆跟踪最好使用车辆ReID模型。 - `--mtmct_dir`是MTMCT预测的某个场景的文件夹名字,里面包含该场景不同摄像头拍摄视频的图片文件夹,其数量至少为两个。 - `--mtmct_cfg`是MTMCT预测的某个场景的配置文件,里面包含该一些trick操作的开关和该场景摄像头相关设置的文件路径,用户可以自行更改相关路径以及设置某些操作是否启用。 -## 4. API调用方式: - -### 4.1 FairMOT模型API调用 -``` -import mot_jde_infer - -# 1.model config and weights -model_dir = 'fairmot_hrnetv2_w18_dlafpn_30e_576x320/' - -# 2.inference data -video_file = 'test.mp4' -image_dir = None - -# 3.other settings -device = 'CPU' # device should be CPU, GPU or XPU -threshold = 0.3 -output_dir = 'output' - -# mot predict -mot_jde_infer.predict_naive(model_dir, video_file, image_dir, device, threshold, output_dir) -``` -**注意:** - - 以上代码必须进入目录`PaddleDetection/deploy/pptracking/python`下执行。 - - 支持对视频和图片文件夹进行预测,不支持单张图的预测,`video_file`或`image_dir`不能同时为None,推荐使用`video_file`,而`image_dir`需直接存放命名顺序规范的图片。 - - 默认会保存跟踪结果可视化后的图片和视频,以及跟踪结果txt文件,默认不会进行轨迹可视化和流量统计。 - - -### 4.2 DeepSORT模型API调用 -``` -import mot_sde_infer - -# 1.model config and weights -model_dir = 'ppyolov2_r50vd_dcn_365e_aic21mtmct_vehicle/' -reid_model_dir = 'deepsort_pplcnet_vehicle/' - -# 2.inference data -video_file = 'test.mp4' -image_dir = None - -# 3.other settings -scaled = True # set False only when use JDE YOLOv3 -device = 'CPU' # device should be CPU, GPU or XPU -threshold = 0.3 -output_dir = 'output' - -# 4. MTMCT settings, default None -mtmct_dir = None -mtmct_cfg = None - -# mot predict -mot_sde_infer.predict_naive(model_dir, - reid_model_dir, - video_file, - image_dir, - mtmct_dir, - mtmct_cfg, - scaled, - device, - threshold, - output_dir) -``` -**注意:** - - 以上代码必须进入目录`PaddleDetection/deploy/pptracking/python`下执行。 - - 支持对视频和图片文件夹进行预测,不支持单张图的预测,`video_file`或`image_dir`或`--mtmct_dir`不能同时为None,推荐使用`video_file`,而`image_dir`需直接存放命名顺序规范的图片,`--mtmct_dir`不为None表示是进行的MTMCT跨镜头跟踪任务。 - - 默认会保存跟踪结果可视化后的图片和视频,以及跟踪结果txt文件,默认不会进行轨迹可视化和流量统计。 - - `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。 - - `--mtmct_dir`是MTMCT预测的某个场景的文件夹名字,里面包含该场景不同摄像头拍摄视频的图片文件夹,其数量至少为两个。 - - `--mtmct_cfg`是MTMCT预测的某个场景的配置文件,里面包含该一些trick操作的开关和该场景摄像头相关设置的文件路径,用户可以自行更改相关路径以及设置某些操作是否启用。 - - 开启MTMCT预测必须将`video_file`和`image_dir`同时设置为None,且`--mtmct_dir`和`--mtmct_cfg`都必须不为None。 - - -## 5. 参数说明: +## 4. 参数说明: | 参数 | 是否必须|含义 | |-------|-------|----------| diff --git a/deploy/pptracking/python/benchmark_utils.py b/deploy/pptracking/python/benchmark_utils.py index af7637288dff517314d95e5404dcd88b8db956c7..adf36217955ed71103ad46a7e7ae5cb488e93d96 100644 --- a/deploy/pptracking/python/benchmark_utils.py +++ b/deploy/pptracking/python/benchmark_utils.py @@ -89,6 +89,8 @@ class PaddleInferBenchmark(object): self.preprocess_time_s = perf_info.get('preprocess_time_s', 0) self.postprocess_time_s = perf_info.get('postprocess_time_s', 0) + self.with_tracker = True if 'tracking_time_s' in perf_info else False + self.tracking_time_s = perf_info.get('tracking_time_s', 0) self.total_time_s = perf_info.get('total_time_s', 0) self.inference_time_s_90 = perf_info.get("inference_time_s_90", "") @@ -235,9 +237,19 @@ class PaddleInferBenchmark(object): ) self.logger.info( f"{identifier} total time spent(s): {self.total_time_s}") - self.logger.info( - f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, inference_time(ms): {round(self.inference_time_s*1000, 1)}, postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}" - ) + + if self.with_tracker: + self.logger.info( + f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, " + f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, " + f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}, " + f"tracking_time(ms): {round(self.tracking_time_s*1000, 1)}") + else: + self.logger.info( + f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, " + f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, " + f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}" + ) if self.inference_time_s_90: self.looger.info( f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}" diff --git a/deploy/pptracking/python/det_infer.py b/deploy/pptracking/python/det_infer.py index e40d8d9f17feb33d2a6edd12e6971b9cab1a5c18..b586621f961e29093e0ec0c11ae1f9fddf6210a1 100644 --- a/deploy/pptracking/python/det_infer.py +++ b/deploy/pptracking/python/det_infer.py @@ -25,9 +25,14 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor +import sys +# add deploy path of PadleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..']))) +sys.path.insert(0, parent_path) + from benchmark_utils import PaddleInferBenchmark from picodet_postprocess import PicoDetPostProcess -from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize +from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize, decode_image from visualize import visualize_box_mask from utils import argsparser, Timer, get_current_memory_mb @@ -38,9 +43,27 @@ SUPPORT_MODELS = { 'JDE', 'FairMOT', 'DeepSORT', + 'StrongBaseline', } +def bench_log(detector, img_list, model_info, batch_size=1, name=None): + mems = { + 'cpu_rss_mb': detector.cpu_mem / len(img_list), + 'gpu_rss_mb': detector.gpu_mem / len(img_list), + 'gpu_util': detector.gpu_util * 100 / len(img_list) + } + perf_info = detector.det_times.report(average=True) + data_info = { + 'batch_size': batch_size, + 'shape': "dynamic_shape", + 'data_num': perf_info['img_num'] + } + log = PaddleInferBenchmark(detector.config, model_info, data_info, + perf_info, mems) + log(name) + + class Detector(object): """ Args: @@ -56,21 +79,25 @@ class Detector(object): calibration, trt_calib_mode need to set True cpu_threads (int): cpu threads enable_mkldnn (bool): whether to open MKLDNN + output_dir (str): The path of output + threshold (float): The threshold of score for visualization """ - def __init__(self, - pred_config, - model_dir, - device='CPU', - run_mode='paddle', - batch_size=1, - trt_min_shape=1, - trt_max_shape=1280, - trt_opt_shape=640, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False): - self.pred_config = pred_config + def __init__( + self, + model_dir, + device='CPU', + run_mode='paddle', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + output_dir='output', + threshold=0.5, ): + self.pred_config = self.set_config(model_dir) self.predictor, self.config = load_predictor( model_dir, run_mode=run_mode, @@ -86,6 +113,12 @@ class Detector(object): enable_mkldnn=enable_mkldnn) self.det_times = Timer() self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 + self.batch_size = batch_size + self.output_dir = output_dir + self.threshold = threshold + + def set_config(self, model_dir): + return PredictConfig(model_dir) def preprocess(self, image_list): preprocess_ops = [] @@ -101,51 +134,32 @@ class Detector(object): input_im_lst.append(im) input_im_info_lst.append(im_info) inputs = create_inputs(input_im_lst, input_im_info_lst) + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + return inputs - def postprocess(self, - np_boxes, - np_masks, - inputs, - np_boxes_num, - threshold=0.5): + def postprocess(self, inputs, result): # postprocess output of predictor - results = {} - results['boxes'] = np_boxes - results['boxes_num'] = np_boxes_num - if np_masks is not None: - results['masks'] = np_masks - return results + np_boxes_num = result['boxes_num'] + if np_boxes_num[0] <= 0: + print('[WARNNING] No object detected.') + result = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]} + result = {k: v for k, v in result.items() if v is not None} + return result - def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + def predict(self, repeats=1): ''' Args: - image_list (list): list of image - threshold (float): threshold of predicted box' score + repeats (int): repeats number for prediction Returns: - results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + result (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] - MaskRCNN's results include 'masks': np.ndarray: - shape: [N, im_h, im_w] ''' - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(image_list) - self.det_times.preprocess_time_s.end() - np_boxes, np_masks = None, None - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - for i in range(warmup): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - np_boxes = boxes_tensor.copy_to_cpu() - if self.pred_config.mask: - masks_tensor = self.predictor.get_output_handle(output_names[2]) - np_masks = masks_tensor.copy_to_cpu() - - self.det_times.inference_time_s.start() + # model prediction + np_boxes, np_boxes_num = None, None for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -153,130 +167,131 @@ class Detector(object): np_boxes = boxes_tensor.copy_to_cpu() boxes_num = self.predictor.get_output_handle(output_names[1]) np_boxes_num = boxes_num.copy_to_cpu() - if self.pred_config.mask: - masks_tensor = self.predictor.get_output_handle(output_names[2]) - np_masks = masks_tensor.copy_to_cpu() - self.det_times.inference_time_s.end(repeats=repeats) - - self.det_times.postprocess_time_s.start() - results = [] - if reduce(lambda x, y: x * y, np_boxes.shape) < 6: - print('[WARNNING] No object detected.') - results = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]} - else: - results = self.postprocess( - np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(image_list) + result = dict(boxes=np_boxes, boxes_num=np_boxes_num) + return result + + def merge_batch_result(self, batch_result): + if len(batch_result) == 1: + return batch_result[0] + res_key = batch_result[0].keys() + results = {k: [] for k in res_key} + for res in batch_result: + for k, v in res.items(): + results[k].append(v) + for k, v in results.items(): + results[k] = np.concatenate(v) return results def get_timer(self): return self.det_times + def predict_image(self, + image_list, + run_benchmark=False, + repeats=1, + visual=True): + batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) + results = [] + for i in range(batch_loop_cnt): + start_index = i * self.batch_size + end_index = min((i + 1) * self.batch_size, len(image_list)) + batch_image_list = image_list[start_index:end_index] + if run_benchmark: + # preprocess + inputs = self.preprocess(batch_image_list) # warmup + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + result = self.predict(repeats=repeats) # warmup + self.det_times.inference_time_s.start() + result = self.predict(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) + + # postprocess + result_warmup = self.postprocess(inputs, result) # warmup + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + cm, gm, gu = get_current_memory_mb() + self.cpu_mem += cm + self.gpu_mem += gm + self.gpu_util += gu + else: + # preprocess + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + # postprocess + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + if visual: + visualize( + batch_image_list, + result, + self.pred_config.labels, + output_dir=self.output_dir, + threshold=self.threshold) + + results.append(result) + if visual: + print('Test iter {}'.format(i)) + + results = self.merge_batch_result(results) + return results -class DetectorPicoDet(Detector): - """ - Args: - config (object): config of model, defined by `Config(model_dir)` - model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) - batch_size (int): size of pre batch in inference - trt_min_shape (int): min shape for dynamic shape in trt - trt_max_shape (int): max shape for dynamic shape in trt - trt_opt_shape (int): opt shape for dynamic shape in trt - trt_calib_mode (bool): If the model is produced by TRT offline quantitative - calibration, trt_calib_mode need to set True - cpu_threads (int): cpu threads - enable_mkldnn (bool): whether to open MKLDNN - """ - - def __init__(self, - pred_config, - model_dir, - device='CPU', - run_mode='paddle', - batch_size=1, - trt_min_shape=1, - trt_max_shape=1280, - trt_opt_shape=640, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False): - self.pred_config = pred_config - self.predictor, self.config = load_predictor( - model_dir, - run_mode=run_mode, - batch_size=batch_size, - min_subgraph_size=self.pred_config.min_subgraph_size, - device=device, - use_dynamic_shape=self.pred_config.use_dynamic_shape, - trt_min_shape=trt_min_shape, - trt_max_shape=trt_max_shape, - trt_opt_shape=trt_opt_shape, - trt_calib_mode=trt_calib_mode, - cpu_threads=cpu_threads, - enable_mkldnn=enable_mkldnn) - self.det_times = Timer() - self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 + def predict_video(self, video_file, camera_id): + video_out_name = 'output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(video_file) + video_out_name = os.path.split(video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, video_out_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + index = 1 + while (1): + ret, frame = capture.read() + if not ret: + break + print('detect frame: %d' % (index)) + index += 1 + results = self.predict_image([frame], visual=False) - def predict(self, image, threshold=0.5, warmup=0, repeats=1): - ''' - Args: - image (str/np.ndarray): path of image/ np.ndarray read by cv2 - threshold (float): threshold of predicted box' score - Returns: - results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, - matix element:[class, score, x_min, y_min, x_max, y_max] - ''' - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(image) - self.det_times.preprocess_time_s.end() - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - np_score_list, np_boxes_list = [], [] - for i in range(warmup): - self.predictor.run() - np_score_list.clear() - np_boxes_list.clear() - output_names = self.predictor.get_output_names() - num_outs = int(len(output_names) / 2) - for out_idx in range(num_outs): - np_score_list.append( - self.predictor.get_output_handle(output_names[out_idx]) - .copy_to_cpu()) - np_boxes_list.append( - self.predictor.get_output_handle(output_names[ - out_idx + num_outs]).copy_to_cpu()) - - self.det_times.inference_time_s.start() - for i in range(repeats): - self.predictor.run() - np_score_list.clear() - np_boxes_list.clear() - output_names = self.predictor.get_output_names() - num_outs = int(len(output_names) / 2) - for out_idx in range(num_outs): - np_score_list.append( - self.predictor.get_output_handle(output_names[out_idx]) - .copy_to_cpu()) - np_boxes_list.append( - self.predictor.get_output_handle(output_names[ - out_idx + num_outs]).copy_to_cpu()) - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.img_num += 1 - self.det_times.postprocess_time_s.start() - self.postprocess = PicoDetPostProcess( - inputs['image'].shape[2:], - inputs['im_shape'], - inputs['scale_factor'], - strides=self.pred_config.fpn_stride, - nms_threshold=self.pred_config.nms['nms_threshold']) - np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list) - self.det_times.postprocess_time_s.end() - return dict(boxes=np_boxes, boxes_num=np_boxes_num) + im = visualize_box_mask( + frame, + results, + self.pred_config.labels, + threshold=self.threshold) + im = np.array(im) + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + writer.release() def create_inputs(imgs, im_info): @@ -433,7 +448,7 @@ def load_predictor(model_dir, } if run_mode in precision_map.keys(): config.enable_tensorrt_engine( - workspace_size=1 << 10, + workspace_size=1 << 25, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], @@ -495,22 +510,15 @@ def get_test_images(infer_dir, infer_img): return images -def visualize(image_list, results, labels, output_dir='output/', threshold=0.5): +def visualize(image_list, result, labels, output_dir='output/', threshold=0.5): # visualize the predict result start_idx = 0 for idx, image_file in enumerate(image_list): - im_bboxes_num = results['boxes_num'][idx] + im_bboxes_num = result['boxes_num'][idx] im_results = {} - if 'boxes' in results: - im_results['boxes'] = results['boxes'][start_idx:start_idx + - im_bboxes_num, :] - if 'label' in results: - im_results['label'] = results['label'][start_idx:start_idx + - im_bboxes_num] - if 'score' in results: - im_results['score'] = results['score'][start_idx:start_idx + - im_bboxes_num] - + if 'boxes' in result: + im_results['boxes'] = result['boxes'][start_idx:start_idx + + im_bboxes_num, :] start_idx += im_bboxes_num im = visualize_box_mask( image_file, im_results, labels, threshold=threshold) @@ -529,79 +537,13 @@ def print_arguments(args): print('------------------------------------------') -def predict_image(detector, image_list, batch_size=1): - batch_loop_cnt = math.ceil(float(len(image_list)) / batch_size) - for i in range(batch_loop_cnt): - start_index = i * batch_size - end_index = min((i + 1) * batch_size, len(image_list)) - batch_image_list = image_list[start_index:end_index] - if FLAGS.run_benchmark: - detector.predict( - batch_image_list, FLAGS.threshold, warmup=10, repeats=10) - cm, gm, gu = get_current_memory_mb() - detector.cpu_mem += cm - detector.gpu_mem += gm - detector.gpu_util += gu - print('Test iter {}'.format(i)) - else: - results = detector.predict(batch_image_list, FLAGS.threshold) - visualize( - batch_image_list, - results, - detector.pred_config.labels, - output_dir=FLAGS.output_dir, - threshold=FLAGS.threshold) - - -def predict_video(detector, camera_id): - video_out_name = 'output.mp4' - if camera_id != -1: - capture = cv2.VideoCapture(camera_id) - else: - capture = cv2.VideoCapture(FLAGS.video_file) - video_out_name = os.path.split(FLAGS.video_file)[-1] - # Get Video info : resolution, fps, frame count - width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(capture.get(cv2.CAP_PROP_FPS)) - frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) - print("fps: %d, frame_count: %d" % (fps, frame_count)) - - if not os.path.exists(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - out_path = os.path.join(FLAGS.output_dir, video_out_name) - fourcc = cv2.VideoWriter_fourcc(* 'mp4v') - writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) - index = 1 - while (1): - ret, frame = capture.read() - if not ret: - break - print('detect frame: %d' % (index)) - index += 1 - results = detector.predict([frame], FLAGS.threshold) - im = visualize_box_mask( - frame, - results, - detector.pred_config.labels, - threshold=FLAGS.threshold) - im = np.array(im) - writer.write(im) - if camera_id != -1: - cv2.imshow('Mask Detection', im) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - writer.release() - - def main(): - pred_config = PredictConfig(FLAGS.model_dir) + deploy_file = os.path.join(FLAGS.model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + arch = yml_conf['arch'] detector_func = 'Detector' - if pred_config.arch == 'PicoDet': - detector_func = 'DetectorPicoDet' - - detector = eval(detector_func)(pred_config, - FLAGS.model_dir, + detector = eval(detector_func)(FLAGS.model_dir, device=FLAGS.device, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, @@ -610,41 +552,29 @@ def main(): trt_opt_shape=FLAGS.trt_opt_shape, trt_calib_mode=FLAGS.trt_calib_mode, cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn) + enable_mkldnn=FLAGS.enable_mkldnn, + threshold=FLAGS.threshold, + output_dir=FLAGS.output_dir) # predict from video file or camera video stream if FLAGS.video_file is not None or FLAGS.camera_id != -1: - predict_video(detector, FLAGS.camera_id) + detector.predict_video(FLAGS.video_file, FLAGS.camera_id) else: # predict from image if FLAGS.image_dir is None and FLAGS.image_file is not None: assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None" img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - predict_image(detector, img_list, FLAGS.batch_size) + detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10) if not FLAGS.run_benchmark: detector.det_times.info(average=True) else: - mems = { - 'cpu_rss_mb': detector.cpu_mem / len(img_list), - 'gpu_rss_mb': detector.gpu_mem / len(img_list), - 'gpu_util': detector.gpu_util * 100 / len(img_list) - } - - perf_info = detector.det_times.report(average=True) - model_dir = FLAGS.model_dir mode = FLAGS.run_mode + model_dir = FLAGS.model_dir model_info = { 'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1] } - data_info = { - 'batch_size': FLAGS.batch_size, - 'shape': "dynamic_shape", - 'data_num': perf_info['img_num'] - } - det_log = PaddleInferBenchmark(detector.config, model_info, - data_info, perf_info, mems) - det_log('Det') + bench_log(detector, img_list, model_info, name='DET') if __name__ == '__main__': diff --git a/deploy/pptracking/python/mot/tracker/deepsort_tracker.py b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py index d4244758037e4e4a005a4776fbbac979083c93e5..335cdd38a3475823c9bef7243cbf06c688a35ece 100644 --- a/deploy/pptracking/python/mot/tracker/deepsort_tracker.py +++ b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py @@ -96,7 +96,8 @@ class DeepSORTTracker(object): """ pred_cls_ids = pred_dets[:, 0:1] pred_scores = pred_dets[:, 1:2] - pred_tlwhs = pred_dets[:, 2:6] + pred_xyxys = pred_dets[:, 2:6] + pred_tlwhs = np.concatenate((pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), axis=1) detections = [ Detection(tlwh, score, feat, cls_id) diff --git a/deploy/pptracking/python/mot/utils.py b/deploy/pptracking/python/mot/utils.py index a452b37e6fa0a3fd7a64762402ddfa39824d1569..37d39b066671e20c4030eb06e7e5698ecfb4cf68 100644 --- a/deploy/pptracking/python/mot/utils.py +++ b/deploy/pptracking/python/mot/utils.py @@ -182,8 +182,7 @@ def clip_box(xyxy, ori_image_shape): def get_crops(xyxy, ori_img, w, h): crops = [] xyxy = xyxy.astype(np.int64) - ori_img = ori_img.numpy() - ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) # [h,w,3]->[w,h,3] + ori_img = ori_img.transpose(1, 0, 2) # [h,w,3]->[w,h,3] for i, bbox in enumerate(xyxy): crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] crops.append(crop) @@ -198,7 +197,10 @@ def preprocess_reid(imgs, std=[0.229, 0.224, 0.225]): im_batch = [] for img in imgs: - img = cv2.resize(img, (w, h)) + try: + img = cv2.resize(img, (w, h)) + except: + embed() img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 img_mean = np.array(mean).reshape((3, 1, 1)) img_std = np.array(std).reshape((3, 1, 1)) diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py index 3caa65dff5a0d413025e4d8a2e93352f3f81d898..6a4e18abd8762010e8d4b37e6372b00e4781a8fd 100644 --- a/deploy/pptracking/python/mot_jde_infer.py +++ b/deploy/pptracking/python/mot_jde_infer.py @@ -18,21 +18,24 @@ import yaml import cv2 import numpy as np from collections import defaultdict - import paddle -from paddle.inference import Config -from paddle.inference import create_predictor -from utils import argsparser, Timer, get_current_memory_mb -from det_infer import Detector, get_test_images, print_arguments, PredictConfig from benchmark_utils import PaddleInferBenchmark -from visualize import plot_tracking_dict +from preprocess import decode_image +from utils import argsparser, Timer, get_current_memory_mb +from det_infer import Detector, get_test_images, print_arguments, bench_log, PredictConfig + +# add python path +import sys +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +sys.path.insert(0, parent_path) -from mot.tracker import JDETracker -from mot.utils import MOTTimer, write_mot_results, flow_statistic +from mot import JDETracker +from utils import MOTTimer, write_mot_results +from visualize import plot_tracking, plot_tracking_dict # Global dictionary -MOT_SUPPORT_MODELS = { +MOT_JDE_SUPPORT_MODELS = { 'JDE', 'FairMOT', } @@ -41,23 +44,22 @@ MOT_SUPPORT_MODELS = { class JDE_Detector(Detector): """ Args: - pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) - batch_size (int): size of per batch in inference, default is 1 in tracking models + batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True cpu_threads (int): cpu threads - enable_mkldnn (bool): whether to open MKLDNN + enable_mkldnn (bool): whether to open MKLDNN """ def __init__(self, - pred_config, model_dir, + tracker_config=None, device='CPU', run_mode='paddle', batch_size=1, @@ -66,9 +68,10 @@ class JDE_Detector(Detector): trt_opt_shape=608, trt_calib_mode=False, cpu_threads=1, - enable_mkldnn=False): + enable_mkldnn=False, + output_dir='output', + threshold=0.5): super(JDE_Detector, self).__init__( - pred_config=pred_config, model_dir=model_dir, device=device, run_mode=run_mode, @@ -78,17 +81,21 @@ class JDE_Detector(Detector): trt_opt_shape=trt_opt_shape, trt_calib_mode=trt_calib_mode, cpu_threads=cpu_threads, - enable_mkldnn=enable_mkldnn) - assert batch_size == 1, "The JDE Detector only supports batch size=1 now" - assert pred_config.tracker, "Tracking model should have tracker" - self.num_classes = len(pred_config.labels) - - tp = pred_config.tracker - min_box_area = tp['min_box_area'] if 'min_box_area' in tp else 200 - vertical_ratio = tp['vertical_ratio'] if 'vertical_ratio' in tp else 1.6 - conf_thres = tp['conf_thres'] if 'conf_thres' in tp else 0. - tracked_thresh = tp['tracked_thresh'] if 'tracked_thresh' in tp else 0.7 - metric_type = tp['metric_type'] if 'metric_type' in tp else 'euclidean' + enable_mkldnn=enable_mkldnn, + output_dir=output_dir, + threshold=threshold, ) + assert batch_size == 1, "MOT model only supports batch_size=1." + self.det_times = Timer(with_tracker=True) + self.num_classes = len(self.pred_config.labels) + + # tracker config + assert self.pred_config.tracker, "The exported JDE Detector model should have tracker." + cfg = self.pred_config.tracker + min_box_area = cfg.get('min_box_area', 200) + vertical_ratio = cfg.get('vertical_ratio', 1.6) + conf_thres = cfg.get('conf_thres', 0.0) + tracked_thresh = cfg.get('tracked_thresh', 0.7) + metric_type = cfg.get('metric_type', 'euclidean') self.tracker = JDETracker( num_classes=self.num_classes, @@ -98,7 +105,18 @@ class JDE_Detector(Detector): tracked_thresh=tracked_thresh, metric_type=metric_type) - def postprocess(self, pred_dets, pred_embs, threshold): + def postprocess(self, inputs, result): + # postprocess output of predictor + np_boxes = result['pred_dets'] + if np_boxes.shape[0] <= 0: + print('[WARNNING] No object detected.') + result = {'pred_dets': np.zeros([0, 6]), 'pred_embs': None} + result = {k: v for k, v in result.items() if v is not None} + return result + + def tracking(self, det_results): + pred_dets = det_results['pred_dets'] + pred_embs = det_results['pred_embs'] online_targets_dict = self.tracker.update(pred_dets, pred_embs) online_tlwhs = defaultdict(list) @@ -110,9 +128,7 @@ class JDE_Detector(Detector): tlwh = t.tlwh tid = t.track_id tscore = t.score - if tscore < threshold: continue - if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: - continue + if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: continue if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ 3] > self.tracker.vertical_ratio: continue @@ -121,270 +137,181 @@ class JDE_Detector(Detector): online_scores[cls_id].append(tscore) return online_tlwhs, online_scores, online_ids - def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True): + def predict(self, repeats=1): ''' Args: - image_list (list[str]): path of images, only support one image path - (batch_size=1) in tracking model - threshold (float): threshold of predicted box' score - repeats (int): repeat number for prediction - add_timer (bool): whether add timer during prediction + repeats (int): repeats number for prediction Returns: - online_tlwhs, online_scores, online_ids (dict[np.array]) + result (dict): include 'pred_dets': np.ndarray: shape:[N,6], N: number of box, + matix element:[x_min, y_min, x_max, y_max, score, class] + FairMOT(JDE)'s result include 'pred_embs': np.ndarray: + shape: [N, 128] ''' - # preprocess - if add_timer: - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(image_list) - - pred_dets, pred_embs = None, None - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - if add_timer: - self.det_times.preprocess_time_s.end() - self.det_times.inference_time_s.start() - # model prediction + np_pred_dets, np_pred_embs = None, None for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) - pred_dets = boxes_tensor.copy_to_cpu() + np_pred_dets = boxes_tensor.copy_to_cpu() embs_tensor = self.predictor.get_output_handle(output_names[1]) - pred_embs = embs_tensor.copy_to_cpu() - if add_timer: - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() - - # postprocess - online_tlwhs, online_scores, online_ids = self.postprocess( - pred_dets, pred_embs, threshold) - if add_timer: - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 - return online_tlwhs, online_scores, online_ids - - -def predict_image(detector, - image_list, - threshold, - output_dir, - save_images=True, - run_benchmark=False): - results = [] - num_classes = detector.num_classes - data_type = 'mcmot' if num_classes > 1 else 'mot' - ids2names = detector.pred_config.labels - - image_list.sort() - for frame_id, img_file in enumerate(image_list): - frame = cv2.imread(img_file) - if run_benchmark: - # warmup - detector.predict([img_file], threshold, repeats=10, add_timer=False) - # run benchmark - detector.predict([img_file], threshold, repeats=10, add_timer=True) - cm, gm, gu = get_current_memory_mb() - detector.cpu_mem += cm - detector.gpu_mem += gm - detector.gpu_util += gu - print('Test iter {}, file name:{}'.format(frame_id, img_file)) + np_pred_embs = embs_tensor.copy_to_cpu() + + result = dict(pred_dets=np_pred_dets, pred_embs=np_pred_embs) + return result + + def predict_image(self, + image_list, + run_benchmark=False, + repeats=1, + visual=True): + mot_results = [] + num_classes = self.num_classes + image_list.sort() + ids2names = self.pred_config.labels + data_type = 'mcmot' if num_classes > 1 else 'mot' + for frame_id, img_file in enumerate(image_list): + batch_image_list = [img_file] # bs=1 in MOT model + if run_benchmark: + # preprocess + inputs = self.preprocess(batch_image_list) # warmup + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + result_warmup = self.predict(repeats=repeats) # warmup + self.det_times.inference_time_s.start() + result = self.predict(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) + + # postprocess + result_warmup = self.postprocess(inputs, result) # warmup + self.det_times.postprocess_time_s.start() + det_result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + + # tracking + result_warmup = self.tracking(det_result) + self.det_times.tracking_time_s.start() + online_tlwhs, online_scores, online_ids = self.tracking( + det_result) + self.det_times.tracking_time_s.end() + self.det_times.img_num += 1 + + cm, gm, gu = get_current_memory_mb() + self.cpu_mem += cm + self.gpu_mem += gm + self.gpu_util += gu + + else: + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + self.det_times.postprocess_time_s.start() + det_result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + + # tracking process + self.det_times.tracking_time_s.start() + online_tlwhs, online_scores, online_ids = self.tracking( + det_result) + self.det_times.tracking_time_s.end() + self.det_times.img_num += 1 + + if visual: + if frame_id % 10 == 0: + print('Tracking frame {}'.format(frame_id)) + frame, _ = decode_image(img_file, {}) + + im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + ids2names=ids2names) + seq_name = image_list[0].split('/')[-2] + save_dir = os.path.join(self.output_dir, seq_name) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + cv2.imwrite( + os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) + + mot_results.append([online_tlwhs, online_scores, online_ids]) + return mot_results + + def predict_video(self, video_file, camera_id): + video_out_name = 'mot_output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) else: - online_tlwhs, online_scores, online_ids = detector.predict( - [img_file], threshold) - online_im = plot_tracking_dict( + capture = cv2.VideoCapture(video_file) + video_out_name = os.path.split(video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, video_out_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + + frame_id = 1 + timer = MOTTimer() + results = defaultdict(list) # support single class and multi classes + num_classes = self.num_classes + data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = self.pred_config.labels + while (1): + ret, frame = capture.read() + if not ret: + break + if frame_id % 10 == 0: + print('Tracking frame: %d' % (frame_id)) + frame_id += 1 + + timer.tic() + mot_results = self.predict_image([frame], visual=False) + timer.toc() + + online_tlwhs, online_scores, online_ids = mot_results[0] + for cls_id in range(num_classes): + results[cls_id].append( + (frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id], + online_ids[cls_id])) + + fps = 1. / timer.duration + im = plot_tracking_dict( frame, num_classes, online_tlwhs, online_ids, online_scores, frame_id=frame_id, + fps=fps, ids2names=ids2names) - if save_images: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - img_name = os.path.split(img_file)[-1] - out_path = os.path.join(output_dir, img_name) - cv2.imwrite(out_path, online_im) - print("save result to: " + out_path) - - -def predict_video(detector, - video_file, - threshold, - output_dir, - save_images=True, - save_mot_txts=True, - draw_center_traj=False, - secs_interval=10, - do_entrance_counting=False, - camera_id=-1): - video_name = 'mot_output.mp4' - if camera_id != -1: - capture = cv2.VideoCapture(camera_id) - else: - capture = cv2.VideoCapture(video_file) - video_name = os.path.split(video_file)[-1] - - # Get Video info : resolution, fps, frame count - width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(capture.get(cv2.CAP_PROP_FPS)) - frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) - print("fps: %d, frame_count: %d" % (fps, frame_count)) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - out_path = os.path.join(output_dir, video_name) - if not save_images: - video_format = 'mp4v' - fourcc = cv2.VideoWriter_fourcc(*video_format) - writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) - frame_id = 0 - timer = MOTTimer() - results = defaultdict(list) # support single class and multi classes - num_classes = detector.num_classes - data_type = 'mcmot' if num_classes > 1 else 'mot' - ids2names = detector.pred_config.labels - center_traj = None - entrance = None - records = None - if draw_center_traj: - center_traj = [{} for i in range(num_classes)] - - if num_classes == 1: - id_set = set() - interval_id_set = set() - in_id_list = list() - out_id_list = list() - prev_center = dict() - records = list() - entrance = [0, height / 2., width, height / 2.] - - video_fps = fps - - while (1): - ret, frame = capture.read() - if not ret: - break - timer.tic() - online_tlwhs, online_scores, online_ids = detector.predict([frame], - threshold) - timer.toc() - - for cls_id in range(num_classes): - results[cls_id].append((frame_id + 1, online_tlwhs[cls_id], - online_scores[cls_id], online_ids[cls_id])) - - fps = 1. / timer.duration - # NOTE: just implement flow statistic for one class - if num_classes == 1: - result = (frame_id + 1, online_tlwhs[0], online_scores[0], - online_ids[0]) - statistic = flow_statistic( - result, secs_interval, do_entrance_counting, video_fps, - entrance, id_set, interval_id_set, in_id_list, out_id_list, - prev_center, records, data_type, num_classes) - id_set = statistic['id_set'] - interval_id_set = statistic['interval_id_set'] - in_id_list = statistic['in_id_list'] - out_id_list = statistic['out_id_list'] - prev_center = statistic['prev_center'] - records = statistic['records'] - - elif num_classes > 1 and do_entrance_counting: - raise NotImplementedError( - 'Multi-class flow counting is not implemented now!') - im = plot_tracking_dict( - frame, - num_classes, - online_tlwhs, - online_ids, - online_scores, - frame_id=frame_id, - fps=fps, - ids2names=ids2names, - do_entrance_counting=do_entrance_counting, - entrance=entrance, - records=records, - center_traj=center_traj) - - if save_images: - save_dir = os.path.join(output_dir, video_name.split('.')[-2]) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - cv2.imwrite( - os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) - else: - writer.write(im) - frame_id += 1 - print('detect frame: %d, fps: %f' % (frame_id, fps)) - if camera_id != -1: - cv2.imshow('Tracking Detection', im) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - if save_mot_txts: - result_filename = os.path.join(output_dir, - video_name.split('.')[-2] + '.txt') - - write_mot_results(result_filename, results, data_type, num_classes) - - if num_classes == 1: - result_filename = os.path.join( - output_dir, video_name.split('.')[-2] + '_flow_statistic.txt') - f = open(result_filename, 'w') - for line in records: - f.write(line) - print('Flow statistic save in {}'.format(result_filename)) - f.close() - - if save_images: - save_dir = os.path.join(output_dir, video_name.split('.')[-2]) - cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir, - out_path) - os.system(cmd_str) - print('Save video in {}.'.format(out_path)) - else: + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break writer.release() -def predict_naive(model_dir, - video_file, - image_dir, - device='gpu', - threshold=0.5, - output_dir='output'): - pred_config = PredictConfig(model_dir) - detector = JDE_Detector(pred_config, model_dir, device=device.upper()) - - if video_file is not None: - predict_video( - detector, - video_file, - threshold=threshold, - output_dir=output_dir, - save_images=True, - save_mot_txts=True, - draw_center_traj=False, - secs_interval=10, - do_entrance_counting=False) - else: - img_list = get_test_images(image_dir, infer_img=None) - predict_image( - detector, - img_list, - threshold=threshold, - output_dir=output_dir, - save_images=True) - - def main(): - pred_config = PredictConfig(FLAGS.model_dir) detector = JDE_Detector( - pred_config, FLAGS.model_dir, device=FLAGS.device, run_mode=FLAGS.run_mode, @@ -397,50 +324,22 @@ def main(): # predict from video file or camera video stream if FLAGS.video_file is not None or FLAGS.camera_id != -1: - predict_video( - detector, - FLAGS.video_file, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir, - save_images=FLAGS.save_images, - save_mot_txts=FLAGS.save_mot_txts, - draw_center_traj=FLAGS.draw_center_traj, - secs_interval=FLAGS.secs_interval, - do_entrance_counting=FLAGS.do_entrance_counting, - camera_id=FLAGS.camera_id) + detector.predict_video(FLAGS.video_file, FLAGS.camera_id) else: # predict from image img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - predict_image( - detector, - img_list, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir, - save_images=FLAGS.save_images, - run_benchmark=FLAGS.run_benchmark) + detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10) + if not FLAGS.run_benchmark: detector.det_times.info(average=True) else: - mems = { - 'cpu_rss_mb': detector.cpu_mem / len(img_list), - 'gpu_rss_mb': detector.gpu_mem / len(img_list), - 'gpu_util': detector.gpu_util * 100 / len(img_list) - } - perf_info = detector.det_times.report(average=True) - model_dir = FLAGS.model_dir mode = FLAGS.run_mode + model_dir = FLAGS.model_dir model_info = { 'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1] } - data_info = { - 'batch_size': 1, - 'shape': "dynamic_shape", - 'data_num': perf_info['img_num'] - } - det_log = PaddleInferBenchmark(detector.config, model_info, - data_info, perf_info, mems) - det_log('MOT') + bench_log(detector, img_list, model_info, name='MOT') if __name__ == '__main__': diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py index b699db8acda790394cd01c2c3e768e1d5f10878a..d653c0d6887383e3ffa4af8e9bc2e535d88ee4ce 100644 --- a/deploy/pptracking/python/mot_sde_infer.py +++ b/deploy/pptracking/python/mot_sde_infer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,64 +11,44 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from IPython import embed import os import time import yaml import cv2 import re +import glob import numpy as np from collections import defaultdict - import paddle -from paddle.inference import Config -from paddle.inference import create_predictor -from picodet_postprocess import PicoDetPostProcess -from utils import argsparser, Timer, get_current_memory_mb, _is_valid_video, video2frames -from det_infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig -from det_infer import load_predictor from benchmark_utils import PaddleInferBenchmark -from visualize import plot_tracking +from preprocess import decode_image +from utils import argsparser, Timer, get_current_memory_mb, _is_valid_video, video2frames +from det_infer import Detector, get_test_images, print_arguments, bench_log, PredictConfig, load_predictor + +# add python path +import sys +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +sys.path.insert(0, parent_path) -from mot.tracker import DeepSORTTracker -from mot.utils import MOTTimer, write_mot_results, flow_statistic, scale_coords, clip_box, preprocess_reid +from mot.tracker import JDETracker, DeepSORTTracker +from mot.utils import MOTTimer, write_mot_results, flow_statistic, get_crops, clip_box +from visualize import plot_tracking, plot_tracking_dict from mot.mtmct.utils import parse_bias from mot.mtmct.postprocess import trajectory_fusion, sub_cluster, gen_res, print_mtmct_result from mot.mtmct.postprocess import get_mtmct_matching_results, save_mtmct_crops, save_mtmct_vis_results -# Global dictionary -MOT_SUPPORT_MODELS = {'DeepSORT'} - - -def bench_log(detector, img_list, model_info, batch_size=1, name=None): - mems = { - 'cpu_rss_mb': detector.cpu_mem / len(img_list), - 'gpu_rss_mb': detector.gpu_mem / len(img_list), - 'gpu_util': detector.gpu_util * 100 / len(img_list) - } - perf_info = detector.det_times.report(average=True) - data_info = { - 'batch_size': batch_size, - 'shape': "dynamic_shape", - 'data_num': perf_info['img_num'] - } - log = PaddleInferBenchmark(detector.config, model_info, data_info, - perf_info, mems) - log(name) - class SDE_Detector(Detector): """ - Detector of SDE methods - Args: - pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + tracker_config (str): tracker config path device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) - batch_size (int): size of per batch in inference, default is 1 in tracking models + batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -76,22 +56,27 @@ class SDE_Detector(Detector): calibration, trt_calib_mode need to set True cpu_threads (int): cpu threads enable_mkldnn (bool): whether to open MKLDNN + reid_model_dir (str): reid model dir, default None for ByteTrack, but set for DeepSORT + mtmct_dir (str): MTMCT dir, default None, set for doing MTMCT """ def __init__(self, - pred_config, model_dir, + tracker_config=None, device='CPU', run_mode='paddle', batch_size=1, trt_min_shape=1, - trt_max_shape=1088, - trt_opt_shape=608, + trt_max_shape=1280, + trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, - enable_mkldnn=False): + enable_mkldnn=False, + output_dir='output', + threshold=0.5, + reid_model_dir=None, + mtmct_dir=None): super(SDE_Detector, self).__init__( - pred_config=pred_config, model_dir=model_dir, device=device, run_mode=run_mode, @@ -101,833 +86,465 @@ class SDE_Detector(Detector): trt_opt_shape=trt_opt_shape, trt_calib_mode=trt_calib_mode, cpu_threads=cpu_threads, - enable_mkldnn=enable_mkldnn) - assert batch_size == 1, "The detector of tracking models only supports batch_size=1 now" - self.pred_config = pred_config - - def postprocess(self, - boxes, - ori_image_shape, - threshold, - inputs, - scaled=False): - over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] - if len(over_thres_idx) == 0: - pred_dets = np.zeros((1, 6), dtype=np.float32) - pred_xyxys = np.zeros((1, 4), dtype=np.float32) - return pred_dets, pred_xyxys - else: - boxes = boxes[over_thres_idx] - - if not scaled: - # scaled means whether the coords after detector outputs - # have been scaled back to the original image, set True - # in general detector, set False in JDE YOLOv3. - input_shape = inputs['image'].shape[2:] - im_shape = inputs['im_shape'][0] - scale_factor = inputs['scale_factor'][0] - pred_bboxes = scale_coords(boxes[:, 2:], input_shape, im_shape, - scale_factor) - else: - pred_bboxes = boxes[:, 2:] - - pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape) - - if len(keep_idx[0]) == 0: - pred_dets = np.zeros((1, 6), dtype=np.float32) - pred_xyxys = np.zeros((1, 4), dtype=np.float32) - return pred_dets, pred_xyxys - - pred_scores = boxes[:, 1:2][keep_idx[0]] - pred_cls_ids = boxes[:, 0:1][keep_idx[0]] - pred_tlwhs = np.concatenate( - (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), - axis=1) - - pred_dets = np.concatenate( - (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) - - return pred_dets, pred_xyxys - - def predict(self, - image_path, - ori_image_shape, - threshold=0.5, - scaled=False, - repeats=1, - add_timer=True): - ''' - Args: - image_path (list[str]): path of images, only support one image path - (batch_size=1) in tracking model - ori_image_shape (list[int]: original image shape - threshold (float): threshold of predicted box' score - scaled (bool): whether the coords after detector outputs are scaled, - default False in jde yolov3, set True in general detector. - repeats (int): repeat number for prediction - add_timer (bool): whether add timer during prediction - - Returns: - pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id' - pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2' - ''' - # preprocess - if add_timer: - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(image_path) - - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - if add_timer: - self.det_times.preprocess_time_s.end() - self.det_times.inference_time_s.start() - - # model prediction - for i in range(repeats): - self.predictor.run() - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_handle(output_names[0]) - boxes = boxes_tensor.copy_to_cpu() - if add_timer: - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() - - # postprocess - if len(boxes) == 0: - pred_dets = np.zeros((1, 6), dtype=np.float32) - pred_xyxys = np.zeros((1, 4), dtype=np.float32) - else: - pred_dets, pred_xyxys = self.postprocess( - boxes, ori_image_shape, threshold, inputs, scaled=scaled) - if add_timer: - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 - return pred_dets, pred_xyxys - - -class SDE_DetectorPicoDet(DetectorPicoDet): - """ - PicoDet of SDE methods, the postprocess of PicoDet has not been exported as - other detectors, so do postprocess here. - - Args: - pred_config (object): config of model, defined by `Config(model_dir)` - model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) - batch_size (int): size of per batch in inference, default is 1 in tracking models - trt_min_shape (int): min shape for dynamic shape in trt - trt_max_shape (int): max shape for dynamic shape in trt - trt_opt_shape (int): opt shape for dynamic shape in trt - trt_calib_mode (bool): If the model is produced by TRT offline quantitative - calibration, trt_calib_mode need to set True - cpu_threads (int): cpu threads - enable_mkldnn (bool): whether to open MKLDNN - """ - - def __init__(self, - pred_config, - model_dir, - device='CPU', - run_mode='paddle', - batch_size=1, - trt_min_shape=1, - trt_max_shape=1088, - trt_opt_shape=608, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False): - super(SDE_DetectorPicoDet, self).__init__( - pred_config=pred_config, - model_dir=model_dir, - device=device, - run_mode=run_mode, - batch_size=batch_size, - trt_min_shape=trt_min_shape, - trt_max_shape=trt_max_shape, - trt_opt_shape=trt_opt_shape, - trt_calib_mode=trt_calib_mode, - cpu_threads=cpu_threads, - enable_mkldnn=enable_mkldnn) - assert batch_size == 1, "The detector of tracking models only supports batch_size=1 now" - self.pred_config = pred_config - - def postprocess(self, boxes, ori_image_shape, threshold): - over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] - if len(over_thres_idx) == 0: - pred_dets = np.zeros((1, 6), dtype=np.float32) - pred_xyxys = np.zeros((1, 4), dtype=np.float32) - return pred_dets, pred_xyxys + enable_mkldnn=enable_mkldnn, + output_dir=output_dir, + threshold=threshold, ) + assert batch_size == 1, "MOT model only supports batch_size=1." + self.det_times = Timer(with_tracker=True) + self.num_classes = len(self.pred_config.labels) + + # reid and tracker config + self.use_reid = False if reid_model_dir is None else True + if self.use_reid: + # use DeepSORTTracker + self.reid_pred_config = self.set_config(reid_model_dir) + self.reid_predictor, self.config = load_predictor( + reid_model_dir, + run_mode=run_mode, + batch_size=50, # reid_batch_size + min_subgraph_size=self.reid_pred_config.min_subgraph_size, + device=device, + use_dynamic_shape=self.reid_pred_config.use_dynamic_shape, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + + cfg = self.reid_pred_config.tracker + max_age = cfg.get('max_age', 30) + max_iou_distance = cfg.get('max_iou_distance', 0.7) + + self.tracker = DeepSORTTracker( + max_age=max_age, + max_iou_distance=max_iou_distance, + ) else: - boxes = boxes[over_thres_idx] - - pred_bboxes = boxes[:, 2:] + # use ByteTracker + self.tracker_config = tracker_config + cfg = yaml.safe_load(open(self.tracker_config))['tracker'] + min_box_area = cfg.get('min_box_area', 200) + vertical_ratio = cfg.get('vertical_ratio', 1.6) + use_byte = cfg.get('use_byte', True) + match_thres = cfg.get('match_thres', 0.9) + conf_thres = cfg.get('conf_thres', 0.6) + low_conf_thres = cfg.get('low_conf_thres', 0.1) + + self.tracker = JDETracker( + use_byte=use_byte, + num_classes=self.num_classes, + min_box_area=min_box_area, + vertical_ratio=vertical_ratio, + match_thres=match_thres, + conf_thres=conf_thres, + low_conf_thres=low_conf_thres, + ) + + self.do_mtmct = False if mtmct_dir is None else True + self.mtmct_dir = mtmct_dir + + def postprocess(self, inputs, result): + # postprocess output of predictor + np_boxes_num = result['boxes_num'] + if np_boxes_num[0] <= 0: + print('[WARNNING] No object detected.') + result = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]} + result = {k: v for k, v in result.items() if v is not None} + return result + + def reidprocess(self, det_results, repeats=1): + pred_dets = det_results['boxes'] + pred_xyxys = pred_dets[:, 2:6] + + ori_image = det_results['ori_image'] + ori_image_shape = ori_image.shape[:2] + pred_xyxys, keep_idx = clip_box(pred_xyxys, ori_image_shape) - pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape) if len(keep_idx[0]) == 0: - pred_dets = np.zeros((1, 6), dtype=np.float32) - pred_xyxys = np.zeros((1, 4), dtype=np.float32) - return pred_dets, pred_xyxys - - pred_scores = boxes[:, 1:2][keep_idx[0]] - pred_cls_ids = boxes[:, 0:1][keep_idx[0]] - pred_tlwhs = np.concatenate( - (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), - axis=1) - - pred_dets = np.concatenate( - (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) - - return pred_dets, pred_xyxys - - def predict(self, - image_path, - ori_image_shape, - threshold=0.5, - scaled=False, - repeats=1, - add_timer=True): - ''' - Args: - image_path (list[str]): path of images, only support one image path - (batch_size=1) in tracking model - ori_image_shape (list[int]: original image shape - threshold (float): threshold of predicted box' score - scaled (bool): whether the coords after detector outputs are scaled, - default False in jde yolov3, set True in general detector. - repeats (int): repeat number for prediction - add_timer (bool): whether add timer during prediction - Returns: - pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id' - pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2' - ''' - # preprocess - if add_timer: - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(image_path) - - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - if add_timer: - self.det_times.preprocess_time_s.end() - self.det_times.inference_time_s.start() - - np_score_list, np_boxes_list = [], [] + det_results['boxes'] = np.zeros((1, 6), dtype=np.float32) + det_results['embeddings'] = None + return det_results - # model prediction - for i in range(repeats): - self.predictor.run() - np_score_list.clear() - np_boxes_list.clear() - output_names = self.predictor.get_output_names() - num_outs = int(len(output_names) / 2) - for out_idx in range(num_outs): - np_score_list.append( - self.predictor.get_output_handle(output_names[out_idx]) - .copy_to_cpu()) - np_boxes_list.append( - self.predictor.get_output_handle(output_names[ - out_idx + num_outs]).copy_to_cpu()) - if add_timer: - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() - - # postprocess - self.picodet_postprocess = PicoDetPostProcess( - inputs['image'].shape[2:], - inputs['im_shape'], - inputs['scale_factor'], - strides=self.pred_config.fpn_stride, - nms_threshold=self.pred_config.nms['nms_threshold']) - boxes, boxes_num = self.picodet_postprocess(np_score_list, - np_boxes_list) - - if len(boxes) == 0: - pred_dets = np.zeros((1, 6), dtype=np.float32) - pred_xyxys = np.zeros((1, 4), dtype=np.float32) - else: - pred_dets, pred_xyxys = self.postprocess(boxes, ori_image_shape, - threshold) - if add_timer: - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 - - return pred_dets, pred_xyxys - - -class SDE_ReID(object): - """ - ReID of SDE methods - - Args: - pred_config (object): config of model, defined by `Config(model_dir)` - model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) - batch_size (int): size of per batch in inference, default 50 means at most - 50 sub images can be made a batch and send into ReID model - trt_min_shape (int): min shape for dynamic shape in trt - trt_max_shape (int): max shape for dynamic shape in trt - trt_opt_shape (int): opt shape for dynamic shape in trt - trt_calib_mode (bool): If the model is produced by TRT offline quantitative - calibration, trt_calib_mode need to set True - cpu_threads (int): cpu threads - enable_mkldnn (bool): whether to open MKLDNN - """ + pred_dets = pred_dets[keep_idx[0]] + pred_xyxys = pred_dets[:, 2:6] - def __init__(self, - pred_config, - model_dir, - device='CPU', - run_mode='paddle', - batch_size=50, - trt_min_shape=1, - trt_max_shape=1088, - trt_opt_shape=608, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False): - self.pred_config = pred_config - self.predictor, self.config = load_predictor( - model_dir, - run_mode=run_mode, - batch_size=batch_size, - min_subgraph_size=self.pred_config.min_subgraph_size, - device=device, - use_dynamic_shape=self.pred_config.use_dynamic_shape, - trt_min_shape=trt_min_shape, - trt_max_shape=trt_max_shape, - trt_opt_shape=trt_opt_shape, - trt_calib_mode=trt_calib_mode, - cpu_threads=cpu_threads, - enable_mkldnn=enable_mkldnn) - self.det_times = Timer() - self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 - self.batch_size = batch_size - assert pred_config.tracker, "Tracking model should have tracker" - pt = pred_config.tracker - max_age = pt['max_age'] if 'max_age' in pt else 30 - max_iou_distance = pt[ - 'max_iou_distance'] if 'max_iou_distance' in pt else 0.7 - self.tracker = DeepSORTTracker( - max_age=max_age, max_iou_distance=max_iou_distance) - - def get_crops(self, xyxy, ori_img): w, h = self.tracker.input_size - self.det_times.preprocess_time_s.start() - crops = [] - xyxy = xyxy.astype(np.int64) - ori_img = ori_img.transpose(1, 0, 2) # [h,w,3]->[w,h,3] - for i, bbox in enumerate(xyxy): - crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] - crops.append(crop) - crops = preprocess_reid(crops, w, h) - self.det_times.preprocess_time_s.end() - - return crops - - def preprocess(self, crops): - # to keep fast speed, only use topk crops - crops = crops[:self.batch_size] - inputs = {} - inputs['crops'] = np.array(crops).astype('float32') - return inputs - - def postprocess(self, pred_dets, pred_embs): - tracker = self.tracker - tracker.predict() - online_targets = tracker.update(pred_dets, pred_embs) - - online_tlwhs, online_scores, online_ids = [], [], [] - for t in online_targets: - if not t.is_confirmed() or t.time_since_update > 1: - continue - tlwh = t.to_tlwh() - tscore = t.score - tid = t.track_id - if tlwh[2] * tlwh[3] <= tracker.min_box_area: - continue - if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ - 3] > tracker.vertical_ratio: - continue - online_tlwhs.append(tlwh) - online_scores.append(tscore) - online_ids.append(tid) - - tracking_outs = { - 'online_tlwhs': online_tlwhs, - 'online_scores': online_scores, - 'online_ids': online_ids, - } - return tracking_outs + crops = get_crops(pred_xyxys, ori_image, w, h) - def postprocess_mtmct(self, pred_dets, pred_embs, frame_id, seq_name): - tracker = self.tracker - tracker.predict() - online_targets = tracker.update(pred_dets, pred_embs) - - online_tlwhs, online_scores, online_ids = [], [], [] - online_tlbrs, online_feats = [], [] - for t in online_targets: - if not t.is_confirmed() or t.time_since_update > 1: - continue - tlwh = t.to_tlwh() - tscore = t.score - tid = t.track_id - if tlwh[2] * tlwh[3] <= tracker.min_box_area: - continue - if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ - 3] > tracker.vertical_ratio: - continue - online_tlwhs.append(tlwh) - online_scores.append(tscore) - online_ids.append(tid) - - online_tlbrs.append(t.to_tlbr()) - online_feats.append(t.feat) - - tracking_outs = { - 'online_tlwhs': online_tlwhs, - 'online_scores': online_scores, - 'online_ids': online_ids, - 'feat_data': {}, - } - for _tlbr, _id, _feat in zip(online_tlbrs, online_ids, online_feats): - feat_data = {} - feat_data['bbox'] = _tlbr - feat_data['frame'] = f"{frame_id:06d}" - feat_data['id'] = _id - _imgname = f'{seq_name}_{_id}_{frame_id}.jpg' - feat_data['imgname'] = _imgname - feat_data['feat'] = _feat - tracking_outs['feat_data'].update({_imgname: feat_data}) - return tracking_outs + # to keep fast speed, only use topk crops + crops = crops[:50] # reid_batch_size + det_results['crops'] = np.array(crops).astype('float32') + det_results['boxes'] = pred_dets[:50] - def predict(self, - crops, - pred_dets, - repeats=1, - add_timer=True, - MTMCT=False, - frame_id=0, - seq_name=''): - # preprocess - if add_timer: - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(crops) - input_names = self.predictor.get_input_names() + input_names = self.reid_predictor.get_input_names() for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - - if add_timer: - self.det_times.preprocess_time_s.end() - self.det_times.inference_time_s.start() + input_tensor = self.reid_predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(det_results[input_names[i]]) # model prediction for i in range(repeats): - self.predictor.run() - output_names = self.predictor.get_output_names() - feature_tensor = self.predictor.get_output_handle(output_names[0]) + self.reid_predictor.run() + output_names = self.reid_predictor.get_output_names() + feature_tensor = self.reid_predictor.get_output_handle(output_names[0]) pred_embs = feature_tensor.copy_to_cpu() - if add_timer: - self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time_s.start() - - # postprocess - if MTMCT == False: - tracking_outs = self.postprocess(pred_dets, pred_embs) - else: - tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs, - frame_id, seq_name) - if add_timer: - self.det_times.postprocess_time_s.end() - self.det_times.img_num += 1 - - return tracking_outs + det_results['embeddings'] = pred_embs + return det_results + + def tracking(self, det_results): + pred_dets = det_results['boxes'] + pred_embs = det_results.get('embeddings', None) + + if self.use_reid: + # use DeepSORTTracker, only support singe class + self.tracker.predict() + online_targets = self.tracker.update(pred_dets, pred_embs) + online_tlwhs, online_scores, online_ids = [], [], [] + if self.do_mtmct: + online_tlbrs, online_feats = [], [] + for t in online_targets: + if not t.is_confirmed() or t.time_since_update > 1: + continue + tlwh = t.to_tlwh() + tscore = t.score + tid = t.track_id + if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > self.tracker.vertical_ratio: + continue + online_tlwhs.append(tlwh) + online_scores.append(tscore) + online_ids.append(tid) + if self.do_mtmct: + online_tlbrs.append(t.to_tlbr()) + online_feats.append(t.feat) + + tracking_outs = { + 'online_tlwhs': online_tlwhs, + 'online_scores': online_scores, + 'online_ids': online_ids, + } + if self.do_mtmct: + seq_name = det_results['seq_name'] + frame_id = det_results['frame_id'] + + tracking_outs['feat_data'] = {} + for _tlbr, _id, _feat in zip(online_tlbrs, online_ids, online_feats): + feat_data = {} + feat_data['bbox'] = _tlbr + feat_data['frame'] = f"{frame_id:06d}" + feat_data['id'] = _id + _imgname = f'{seq_name}_{_id}_{frame_id}.jpg' + feat_data['imgname'] = _imgname + feat_data['feat'] = _feat + tracking_outs['feat_data'].update({_imgname: feat_data}) -def predict_image(detector, - reid_model, - image_list, - threshold, - output_dir, - scaled=True, - save_images=True, - run_benchmark=False): - image_list.sort() - for i, img_file in enumerate(image_list): - frame = cv2.imread(img_file) - ori_image_shape = list(frame.shape[:2]) - if run_benchmark: - # warmup - pred_dets, pred_xyxys = detector.predict( - [img_file], - ori_image_shape, - threshold, - scaled, - repeats=10, - add_timer=False) - # run benchmark - pred_dets, pred_xyxys = detector.predict( - [img_file], - ori_image_shape, - threshold, - scaled, - repeats=10, - add_timer=True) - - cm, gm, gu = get_current_memory_mb() - detector.cpu_mem += cm - detector.gpu_mem += gm - detector.gpu_util += gu - print('Test iter {}, file name:{}'.format(i, img_file)) else: - pred_dets, pred_xyxys = detector.predict( - [img_file], ori_image_shape, threshold, scaled) + # use ByteTracker, support multiple class + online_tlwhs = defaultdict(list) + online_scores = defaultdict(list) + online_ids = defaultdict(list) + online_targets_dict = self.tracker.update(pred_dets, pred_embs) + for cls_id in range(self.num_classes): + online_targets = online_targets_dict[cls_id] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + tscore = t.score + if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: + continue + if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > self.tracker.vertical_ratio: + continue + online_tlwhs[cls_id].append(tlwh) + online_ids[cls_id].append(tid) + online_scores[cls_id].append(tscore) + tracking_outs = { + 'online_tlwhs': online_tlwhs, + 'online_scores': online_scores, + 'online_ids': online_ids, + } + return tracking_outs - if len(pred_dets) == 1 and np.sum(pred_dets) == 0: - print('Frame {} has no object, try to modify score threshold.'. - format(i)) - online_im = frame + def predict_image(self, + image_list, + run_benchmark=False, + repeats=1, + visual=True, + seq_name=None): + num_classes = self.num_classes + image_list.sort() + ids2names = self.pred_config.labels + if self.do_mtmct: + mot_features_dict = {} # cid_tid_fid feats else: - # reid process - crops = reid_model.get_crops(pred_xyxys, frame) - + mot_results = [] + for frame_id, img_file in enumerate(image_list): + if self.do_mtmct: + if frame_id % 10 == 0: + print('Tracking frame: %d' % (frame_id)) + batch_image_list = [img_file] # bs=1 in MOT model + frame, _ = decode_image(img_file, {}) if run_benchmark: - # warmup - tracking_outs = reid_model.predict( - crops, pred_dets, repeats=10, add_timer=False) - # run benchmark - tracking_outs = reid_model.predict( - crops, pred_dets, repeats=10, add_timer=True) + # preprocess + inputs = self.preprocess(batch_image_list) # warmup + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + result_warmup = self.predict(repeats=repeats) # warmup + self.det_times.inference_time_s.start() + result = self.predict(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) + + # postprocess + result_warmup = self.postprocess(inputs, result) # warmup + self.det_times.postprocess_time_s.start() + det_result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + + # tracking + if self.use_reid: + det_result['frame_id'] = frame_id + det_result['seq_name'] = seq_name + det_result['ori_image'] = frame + det_result = self.reidprocess(det_result) + result_warmup = self.tracking(det_result) + self.det_times.tracking_time_s.start() + if self.use_reid: + det_result = self.reidprocess(det_result) + tracking_outs = self.tracking(det_result) + self.det_times.tracking_time_s.end() + self.det_times.img_num += 1 + + cm, gm, gu = get_current_memory_mb() + self.cpu_mem += cm + self.gpu_mem += gm + self.gpu_util += gu else: - tracking_outs = reid_model.predict(crops, pred_dets) - - online_tlwhs = tracking_outs['online_tlwhs'] - online_scores = tracking_outs['online_scores'] - online_ids = tracking_outs['online_ids'] - - online_im = plot_tracking( - frame, online_tlwhs, online_ids, online_scores, frame_id=i) - - if save_images: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - img_name = os.path.split(img_file)[-1] - out_path = os.path.join(output_dir, img_name) - cv2.imwrite(out_path, online_im) - print("save result to: " + out_path) - - -def predict_video(detector, - reid_model, - video_file, - scaled, - threshold, - output_dir, - save_images=True, - save_mot_txts=True, - draw_center_traj=False, - secs_interval=10, - do_entrance_counting=False, - camera_id=-1): - video_name = 'mot_output.mp4' - if camera_id != -1: - capture = cv2.VideoCapture(camera_id) - else: - capture = cv2.VideoCapture(video_file) - video_name = os.path.split(video_file)[-1] - - # Get Video info : resolution, fps, frame count - width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(capture.get(cv2.CAP_PROP_FPS)) - frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) - print("fps: %d, frame_count: %d" % (fps, frame_count)) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - out_path = os.path.join(output_dir, video_name) - if not save_images: - video_format = 'mp4v' - fourcc = cv2.VideoWriter_fourcc(*video_format) - writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) - frame_id = 0 - timer = MOTTimer() - results = defaultdict(list) - id_set = set() - interval_id_set = set() - in_id_list = list() - out_id_list = list() - prev_center = dict() - records = list() - entrance = [0, height / 2., width, height / 2.] - video_fps = fps - - while (1): - ret, frame = capture.read() - if not ret: - break - timer.tic() - ori_image_shape = list(frame.shape[:2]) - pred_dets, pred_xyxys = detector.predict([frame], ori_image_shape, - threshold, scaled) - - if len(pred_dets) == 1 and np.sum(pred_dets) == 0: - print('Frame {} has no object, try to modify score threshold.'. - format(frame_id)) - timer.toc() - im = frame - else: - # reid process - crops = reid_model.get_crops(pred_xyxys, frame) - tracking_outs = reid_model.predict(crops, pred_dets) + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + self.det_times.postprocess_time_s.start() + det_result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + + # tracking process + self.det_times.tracking_time_s.start() + if self.use_reid: + det_result['frame_id'] = frame_id + det_result['seq_name'] = seq_name + det_result['ori_image'] = frame + det_result = self.reidprocess(det_result) + tracking_outs = self.tracking(det_result) + self.det_times.tracking_time_s.end() + self.det_times.img_num += 1 online_tlwhs = tracking_outs['online_tlwhs'] online_scores = tracking_outs['online_scores'] online_ids = tracking_outs['online_ids'] + + if self.do_mtmct: + feat_data_dict = tracking_outs['feat_data'] + mot_features_dict = dict(mot_features_dict, **feat_data_dict) + else: + mot_results.append([online_tlwhs, online_scores, online_ids]) + + if visual: + if frame_id % 10 == 0: + print('Tracking frame {}'.format(frame_id)) + frame, _ = decode_image(img_file, {}) + if num_classes == 1: + im = plot_tracking( + frame, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id) + else: + im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + ids2names=[]) + save_dir = os.path.join(self.output_dir, seq_name) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + cv2.imwrite( + os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) + + if self.do_mtmct: + return mot_features_dict + else: + return mot_results - results[0].append( - (frame_id + 1, online_tlwhs, online_scores, online_ids)) - # NOTE: just implement flow statistic for one class - result = (frame_id + 1, online_tlwhs, online_scores, online_ids) - statistic = flow_statistic( - result, secs_interval, do_entrance_counting, video_fps, - entrance, id_set, interval_id_set, in_id_list, out_id_list, - prev_center, records) - id_set = statistic['id_set'] - interval_id_set = statistic['interval_id_set'] - in_id_list = statistic['in_id_list'] - out_id_list = statistic['out_id_list'] - prev_center = statistic['prev_center'] - records = statistic['records'] + def predict_video(self, video_file, camera_id): + video_out_name = 'output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(video_file) + video_out_name = os.path.split(video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, video_out_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + + frame_id = 1 + timer = MOTTimer() + results = defaultdict(list) # support single class and multi classes + num_classes = self.num_classes + while (1): + ret, frame = capture.read() + if not ret: + break + if frame_id % 10 == 0: + print('Tracking frame: %d' % (frame_id)) + frame_id += 1 + timer.tic() + seq_name = video_out_name.split('.')[0] + mot_results = self.predict_image([frame], visual=False, seq_name=seq_name) timer.toc() + online_tlwhs, online_scores, online_ids = mot_results[0] # bs=1 in MOT model fps = 1. / timer.duration - im = plot_tracking( - frame, - online_tlwhs, - online_ids, - online_scores, - frame_id=frame_id, - fps=fps, - do_entrance_counting=do_entrance_counting, - entrance=entrance) - - if save_images: - save_dir = os.path.join(output_dir, video_name.split('.')[-2]) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - cv2.imwrite( - os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) - else: - writer.write(im) - - frame_id += 1 - print('detect frame:%d, fps: %f' % (frame_id, fps)) - - if camera_id != -1: - cv2.imshow('Tracking Detection', im) - if cv2.waitKey(1) & 0xFF == ord('q'): - break + if num_classes == 1 and self.use_reid: + # use DeepSORTTracker, only support singe class + results[0].append((frame_id + 1, online_tlwhs, online_scores, online_ids)) + im = plot_tracking( + frame, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=fps) + else: + # use ByteTracker, support multiple class + for cls_id in range(num_classes): + results[cls_id].append( + (frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id], + online_ids[cls_id])) + im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=fps, + ids2names=[]) - if save_mot_txts: - result_filename = os.path.join(output_dir, - video_name.split('.')[-2] + '.txt') - write_mot_results(result_filename, results) - - result_filename = os.path.join( - output_dir, video_name.split('.')[-2] + '_flow_statistic.txt') - f = open(result_filename, 'w') - for line in records: - f.write(line) - print('Flow statistic save in {}'.format(result_filename)) - f.close() - - if save_images: - save_dir = os.path.join(output_dir, video_name.split('.')[-2]) - cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir, - out_path) - os.system(cmd_str) - print('Save video in {}.'.format(out_path)) - else: + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break writer.release() + def predict_mtmct(self, mtmct_dir, mtmct_cfg): + cameras_bias = mtmct_cfg['cameras_bias'] + cid_bias = parse_bias(cameras_bias) + scene_cluster = list(cid_bias.keys()) + # 1.zone releated parameters + use_zone = mtmct_cfg.get('use_zone', False) + zone_path = mtmct_cfg.get('zone_path', None) -def predict_mtmct_seq(detector, - reid_model, - mtmct_dir, - seq_name, - scaled, - threshold, - output_dir, - save_images=True, - save_mot_txts=True): - fpath = os.path.join(mtmct_dir, seq_name) - if os.path.exists(os.path.join(fpath, 'img1')): - fpath = os.path.join(fpath, 'img1') - - assert os.path.isdir(fpath), '{} should be a directory'.format(fpath) - image_list = os.listdir(fpath) - image_list.sort() - assert len(image_list) > 0, '{} has no images.'.format(fpath) - - results = defaultdict(list) - mot_features_dict = {} # cid_tid_fid feats - print('Totally {} frames found in seq {}.'.format( - len(image_list), seq_name)) - - for frame_id, img_file in enumerate(image_list): - if frame_id % 10 == 0: - print('Processing frame {} of seq {}.'.format(frame_id, seq_name)) - frame = cv2.imread(os.path.join(fpath, img_file)) - ori_image_shape = list(frame.shape[:2]) - frame_path = os.path.join(fpath, img_file) - pred_dets, pred_xyxys = detector.predict([frame_path], ori_image_shape, - threshold, scaled) - - if len(pred_dets) == 1 and np.sum(pred_dets) == 0: - print('Frame {} has no object, try to modify score threshold.'. - format(frame_id)) - online_im = frame - else: - # reid process - crops = reid_model.get_crops(pred_xyxys, frame) + # 2.tricks parameters, can be used for other mtmct dataset + use_ff = mtmct_cfg.get('use_ff', False) + use_rerank = mtmct_cfg.get('use_rerank', False) - tracking_outs = reid_model.predict( - crops, - pred_dets, - MTMCT=True, - frame_id=frame_id, - seq_name=seq_name) + # 3.camera releated parameters + use_camera = mtmct_cfg.get('use_camera', False) + use_st_filter = mtmct_cfg.get('use_st_filter', False) - feat_data_dict = tracking_outs['feat_data'] - mot_features_dict = dict(mot_features_dict, **feat_data_dict) + # 4.zone releated parameters + use_roi = mtmct_cfg.get('use_roi', False) + roi_dir = mtmct_cfg.get('roi_dir', False) - online_tlwhs = tracking_outs['online_tlwhs'] - online_scores = tracking_outs['online_scores'] - online_ids = tracking_outs['online_ids'] + mot_list_breaks = [] + cid_tid_dict = dict() - online_im = plot_tracking(frame, online_tlwhs, online_ids, - online_scores, frame_id) - results[0].append( - (frame_id + 1, online_tlwhs, online_scores, online_ids)) - - if save_images: - save_dir = os.path.join(output_dir, seq_name) - if not os.path.exists(save_dir): os.makedirs(save_dir) - img_name = os.path.split(img_file)[-1] - out_path = os.path.join(save_dir, img_name) - cv2.imwrite(out_path, online_im) - - if save_mot_txts: - result_filename = os.path.join(output_dir, seq_name + '.txt') - write_mot_results(result_filename, results) - - return mot_features_dict - - -def predict_mtmct(detector, - reid_model, - mtmct_dir, - mtmct_cfg, - scaled, - threshold, - output_dir, - save_images=True, - save_mot_txts=True): - MTMCT = mtmct_cfg['MTMCT'] - assert MTMCT == True, 'predict_mtmct should be used for MTMCT.' - - cameras_bias = mtmct_cfg['cameras_bias'] - cid_bias = parse_bias(cameras_bias) - scene_cluster = list(cid_bias.keys()) - - # 1.zone releated parameters - use_zone = mtmct_cfg['use_zone'] - zone_path = mtmct_cfg['zone_path'] - - # 2.tricks parameters, can be used for other mtmct dataset - use_ff = mtmct_cfg['use_ff'] - use_rerank = mtmct_cfg['use_rerank'] - - # 3.camera releated parameters - use_camera = mtmct_cfg['use_camera'] - use_st_filter = mtmct_cfg['use_st_filter'] - - # 4.zone releated parameters - use_roi = mtmct_cfg['use_roi'] - roi_dir = mtmct_cfg['roi_dir'] - - mot_list_breaks = [] - cid_tid_dict = dict() - - if not os.path.exists(output_dir): os.makedirs(output_dir) - - seqs = os.listdir(mtmct_dir) - seqs.sort() - - for seq in seqs: - fpath = os.path.join(mtmct_dir, seq) - if os.path.isfile(fpath) and _is_valid_video(fpath): - ext = seq.split('.')[-1] - seq = seq.split('.')[-2] - print('ffmpeg processing of video {}'.format(fpath)) - frames_path = video2frames( - video_path=fpath, outpath=mtmct_dir, frame_rate=25) - fpath = os.path.join(mtmct_dir, seq) + output_dir = self.output_dir + if not os.path.exists(output_dir): + os.makedirs(output_dir) - if os.path.isdir(fpath) == False: - print('{} is not a image folder.'.format(fpath)) - continue - - mot_features_dict = predict_mtmct_seq( - detector, reid_model, mtmct_dir, seq, scaled, threshold, output_dir, - save_images, save_mot_txts) - - cid = int(re.sub('[a-z,A-Z]', "", seq)) - tid_data, mot_list_break = trajectory_fusion( - mot_features_dict, - cid, - cid_bias, - use_zone=use_zone, - zone_path=zone_path) - mot_list_breaks.append(mot_list_break) - # single seq process - for line in tid_data: - tracklet = tid_data[line] - tid = tracklet['tid'] - if (cid, tid) not in cid_tid_dict: - cid_tid_dict[(cid, tid)] = tracklet - - map_tid = sub_cluster( - cid_tid_dict, - scene_cluster, - use_ff=use_ff, - use_rerank=use_rerank, - use_camera=use_camera, - use_st_filter=use_st_filter) - - pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt') - if use_camera: - gen_res(pred_mtmct_file, scene_cluster, map_tid, mot_list_breaks) - else: - gen_res( - pred_mtmct_file, + seqs = os.listdir(mtmct_dir) + for seq in sorted(seqs): + fpath = os.path.join(mtmct_dir, seq) + if os.path.isfile(fpath) and _is_valid_video(fpath): + seq = seq.split('.')[-2] + print('ffmpeg processing of video {}'.format(fpath)) + frames_path = video2frames( + video_path=fpath, outpath=mtmct_dir, frame_rate=25) + fpath = os.path.join(mtmct_dir, seq) + + if os.path.isdir(fpath) == False: + print('{} is not a image folder.'.format(fpath)) + continue + if os.path.exists(os.path.join(fpath, 'img1')): + fpath = os.path.join(fpath, 'img1') + assert os.path.isdir(fpath), '{} should be a directory'.format(fpath) + image_list = glob.glob(os.path.join(fpath, '*.jpg')) + image_list.sort() + assert len(image_list) > 0, '{} has no images.'.format(fpath) + print('start tracking seq: {}'.format(seq)) + + mot_features_dict = self.predict_image(image_list, visual=False, seq_name=seq) + + cid = int(re.sub('[a-z,A-Z]', "", seq)) + tid_data, mot_list_break = trajectory_fusion( + mot_features_dict, + cid, + cid_bias, + use_zone=use_zone, + zone_path=zone_path) + mot_list_breaks.append(mot_list_break) + # single seq process + for line in tid_data: + tracklet = tid_data[line] + tid = tracklet['tid'] + if (cid, tid) not in cid_tid_dict: + cid_tid_dict[(cid, tid)] = tracklet + + map_tid = sub_cluster( + cid_tid_dict, scene_cluster, - map_tid, - mot_list_breaks, - use_roi=use_roi, - roi_dir=roi_dir) + use_ff=use_ff, + use_rerank=use_rerank, + use_camera=use_camera, + use_st_filter=use_st_filter) + + pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt') + if use_camera: + gen_res(pred_mtmct_file, scene_cluster, map_tid, mot_list_breaks) + else: + gen_res( + pred_mtmct_file, + scene_cluster, + map_tid, + mot_list_breaks, + use_roi=use_roi, + roi_dir=roi_dir) - if FLAGS.save_images: camera_results, cid_tid_fid_res = get_mtmct_matching_results( pred_mtmct_file) @@ -942,160 +559,55 @@ def predict_mtmct(detector, save_dir=save_dir, save_videos=FLAGS.save_images) - # evalution metrics - data_root_gt = os.path.join(mtmct_dir, '..', 'gt', 'gt.txt') - if os.path.exists(data_root_gt): - print_mtmct_result(data_root_gt, pred_mtmct_file) - - -def predict_naive(model_dir, - reid_model_dir, - video_file, - image_dir, - mtmct_dir=None, - mtmct_cfg=None, - scaled=True, - device='gpu', - threshold=0.5, - output_dir='output'): - pred_config = PredictConfig(model_dir) - detector_func = 'SDE_Detector' - if pred_config.arch == 'PicoDet': - detector_func = 'SDE_DetectorPicoDet' - detector = eval(detector_func)(pred_config, model_dir, device=device) - - pred_config = PredictConfig(reid_model_dir) - reid_model = SDE_ReID(pred_config, reid_model_dir, device=device) - - if video_file is not None: - predict_video( - detector, - reid_model, - video_file, - scaled=scaled, - threshold=threshold, - output_dir=output_dir, - save_images=True, - save_mot_txts=True, - draw_center_traj=False, - secs_interval=10, - do_entrance_counting=False) - elif mtmct_dir is not None: - with open(mtmct_cfg) as f: - mtmct_cfg_file = yaml.safe_load(f) - predict_mtmct( - detector, - reid_model, - mtmct_dir, - mtmct_cfg_file, - scaled=scaled, - threshold=threshold, - output_dir=output_dir, - save_images=True, - save_mot_txts=True) - else: - img_list = get_test_images(image_dir, infer_img=None) - predict_image( - detector, - reid_model, - img_list, - threshold=threshold, - output_dir=output_dir, - save_images=True) - def main(): - pred_config = PredictConfig(FLAGS.model_dir) - detector_func = 'SDE_Detector' - if pred_config.arch == 'PicoDet': - detector_func = 'SDE_DetectorPicoDet' - - detector = eval(detector_func)(pred_config, - FLAGS.model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - batch_size=FLAGS.batch_size, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn) - - pred_config = PredictConfig(FLAGS.reid_model_dir) - reid_model = SDE_ReID( - pred_config, - FLAGS.reid_model_dir, + deploy_file = os.path.join(FLAGS.model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + arch = yml_conf['arch'] + detector = SDE_Detector( + FLAGS.model_dir, + FLAGS.tracker_config, device=FLAGS.device, run_mode=FLAGS.run_mode, - batch_size=FLAGS.reid_batch_size, + batch_size=FLAGS.batch_size, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, trt_calib_mode=FLAGS.trt_calib_mode, cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn) + enable_mkldnn=FLAGS.enable_mkldnn, + threshold=FLAGS.threshold, + output_dir=FLAGS.output_dir, + reid_model_dir=FLAGS.reid_model_dir, + mtmct_dir=FLAGS.mtmct_dir, + ) # predict from video file or camera video stream if FLAGS.video_file is not None or FLAGS.camera_id != -1: - predict_video( - detector, - reid_model, - FLAGS.video_file, - scaled=FLAGS.scaled, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir, - save_images=FLAGS.save_images, - save_mot_txts=FLAGS.save_mot_txts, - draw_center_traj=FLAGS.draw_center_traj, - secs_interval=FLAGS.secs_interval, - do_entrance_counting=FLAGS.do_entrance_counting, - camera_id=FLAGS.camera_id) - + detector.predict_video(FLAGS.video_file, FLAGS.camera_id) elif FLAGS.mtmct_dir is not None: - mtmct_cfg_file = FLAGS.mtmct_cfg - with open(mtmct_cfg_file) as f: + with open(FLAGS.mtmct_cfg) as f: mtmct_cfg = yaml.safe_load(f) - predict_mtmct( - detector, - reid_model, - FLAGS.mtmct_dir, - mtmct_cfg, - scaled=FLAGS.scaled, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir, - save_images=FLAGS.save_images, - save_mot_txts=FLAGS.save_mot_txts) + detector.predict_mtmct(FLAGS.mtmct_dir, mtmct_cfg) else: # predict from image + if FLAGS.image_dir is None and FLAGS.image_file is not None: + assert FLAGS.batch_size == 1, "--batch_size should be 1 in MOT models." img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - predict_image( - detector, - reid_model, - img_list, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir, - save_images=FLAGS.save_images, - run_benchmark=FLAGS.run_benchmark) + seq_name = FLAGS.image_dir.split('/')[-1] + detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10, seq_name=seq_name) if not FLAGS.run_benchmark: detector.det_times.info(average=True) - reid_model.det_times.info(average=True) else: mode = FLAGS.run_mode - det_model_dir = FLAGS.model_dir - det_model_info = { - 'model_name': det_model_dir.strip('/').split('/')[-1], - 'precision': mode.split('_')[-1] - } - bench_log(detector, img_list, det_model_info, name='Det') - - reid_model_dir = FLAGS.reid_model_dir - reid_model_info = { - 'model_name': reid_model_dir.strip('/').split('/')[-1], + model_dir = FLAGS.model_dir + model_info = { + 'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1] } - bench_log(reid_model, img_list, reid_model_info, name='ReID') + bench_log(detector, img_list, model_info, name='MOT') if __name__ == '__main__': diff --git a/deploy/pptracking/python/tracker_config.yml b/deploy/pptracking/python/tracker_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..d92510148ec175d9dd7c19fd191e43f13cebe2ce --- /dev/null +++ b/deploy/pptracking/python/tracker_config.yml @@ -0,0 +1,10 @@ +# config of tracker for MOT SDE Detector, use ByteTracker as default. +# The tracker of MOT JDE Detector is exported together with the model. +# Here 'min_box_area' and 'vertical_ratio' are set for pedestrian, you can modify for other objects tracking. +tracker: + use_byte: true + conf_thres: 0.6 + low_conf_thres: 0.1 + match_thres: 0.9 + min_box_area: 100 + vertical_ratio: 1.6 diff --git a/deploy/pptracking/python/utils.py b/deploy/pptracking/python/utils.py index 192b880ca151d93d19f178e2cf9f7b0da4b78c65..a4eddfe1b6e4893321448a52c07fe909cc4e2441 100644 --- a/deploy/pptracking/python/utils.py +++ b/deploy/pptracking/python/utils.py @@ -66,6 +66,11 @@ def argsparser(): default='cpu', help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." ) + parser.add_argument( + "--use_gpu", + type=ast.literal_eval, + default=False, + help="Deprecated, please use `--device`.") parser.add_argument( "--run_benchmark", type=ast.literal_eval, @@ -104,12 +109,18 @@ def argsparser(): '--save_mot_txts', action='store_true', help='Save tracking results (txt).') + parser.add_argument( + '--save_mot_txt_per_img', + action='store_true', + help='Save tracking results (txt) for each image.') parser.add_argument( '--scaled', type=bool, default=False, help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 " "True in general detector.") + parser.add_argument( + "--tracker_config", type=str, default=None, help=("tracker donfig")) parser.add_argument( "--reid_model_dir", type=str, @@ -122,20 +133,10 @@ def argsparser(): default=50, help="max batch_size for reid model inference.") parser.add_argument( - "--do_entrance_counting", - action='store_true', - help="Whether counting the numbers of identifiers entering " - "or getting out from the entrance. Note that only support one-class" - "counting, multi-class counting is coming soon.") - parser.add_argument( - "--secs_interval", - type=int, - default=2, - help="The seconds interval to count after tracking") - parser.add_argument( - "--draw_center_traj", - action='store_true', - help="Whether drawing the trajectory of center") + '--use_dark', + type=ast.literal_eval, + default=True, + help='whether to use darkpose to get better keypoint position predict ') parser.add_argument( "--mtmct_dir", type=str, @@ -146,6 +147,7 @@ def argsparser(): return parser + class Times(object): def __init__(self): self.time = 0. @@ -174,29 +176,36 @@ class Times(object): class Timer(Times): - def __init__(self): + def __init__(self, with_tracker=False): super(Timer, self).__init__() + self.with_tracker = with_tracker self.preprocess_time_s = Times() self.inference_time_s = Times() self.postprocess_time_s = Times() + self.tracking_time_s = Times() self.img_num = 0 def info(self, average=False): - total_time = self.preprocess_time_s.value( - ) + self.inference_time_s.value() + self.postprocess_time_s.value() + pre_time = self.preprocess_time_s.value() + infer_time = self.inference_time_s.value() + post_time = self.postprocess_time_s.value() + track_time = self.tracking_time_s.value() + + total_time = pre_time + infer_time + post_time + if self.with_tracker: + total_time = total_time + track_time total_time = round(total_time, 4) print("------------------ Inference Time Info ----------------------") print("total_time(ms): {}, img_num: {}".format(total_time * 1000, self.img_num)) - preprocess_time = round( - self.preprocess_time_s.value() / max(1, self.img_num), - 4) if average else self.preprocess_time_s.value() - postprocess_time = round( - self.postprocess_time_s.value() / max(1, self.img_num), - 4) if average else self.postprocess_time_s.value() - inference_time = round(self.inference_time_s.value() / - max(1, self.img_num), - 4) if average else self.inference_time_s.value() + preprocess_time = round(pre_time / max(1, self.img_num), + 4) if average else pre_time + postprocess_time = round(post_time / max(1, self.img_num), + 4) if average else post_time + inference_time = round(infer_time / max(1, self.img_num), + 4) if average else infer_time + tracking_time = round(track_time / max(1, self.img_num), + 4) if average else track_time average_latency = total_time / max(1, self.img_num) qps = 0 @@ -204,25 +213,36 @@ class Timer(Times): qps = 1 / average_latency print("average latency time(ms): {:.2f}, QPS: {:2f}".format( average_latency * 1000, qps)) - print( - "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}". - format(preprocess_time * 1000, inference_time * 1000, - postprocess_time * 1000)) + if self.with_tracker: + print( + "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}, tracking_time(ms): {:.2f}". + format(preprocess_time * 1000, inference_time * 1000, + postprocess_time * 1000, tracking_time * 1000)) + else: + print( + "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}". + format(preprocess_time * 1000, inference_time * 1000, + postprocess_time * 1000)) def report(self, average=False): dic = {} - dic['preprocess_time_s'] = round( - self.preprocess_time_s.value() / max(1, self.img_num), - 4) if average else self.preprocess_time_s.value() - dic['postprocess_time_s'] = round( - self.postprocess_time_s.value() / max(1, self.img_num), - 4) if average else self.postprocess_time_s.value() - dic['inference_time_s'] = round( - self.inference_time_s.value() / max(1, self.img_num), - 4) if average else self.inference_time_s.value() + pre_time = self.preprocess_time_s.value() + infer_time = self.inference_time_s.value() + post_time = self.postprocess_time_s.value() + track_time = self.tracking_time_s.value() + + dic['preprocess_time_s'] = round(pre_time / max(1, self.img_num), + 4) if average else pre_time + dic['inference_time_s'] = round(infer_time / max(1, self.img_num), + 4) if average else infer_time + dic['postprocess_time_s'] = round(post_time / max(1, self.img_num), + 4) if average else post_time dic['img_num'] = self.img_num - total_time = self.preprocess_time_s.value( - ) + self.inference_time_s.value() + self.postprocess_time_s.value() + total_time = pre_time + infer_time + post_time + if self.with_tracker: + dic['tracking_time_s'] = round(track_time / max(1, self.img_num), + 4) if average else track_time + total_time = total_time + track_time dic['total_time_s'] = round(total_time, 4) return dic diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 9b3ec009b9dbfef72371796fa65b5f84c528bfe0..660a2644cf1f9af80f26ac807343dc7ec03be726 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -31,7 +31,7 @@ sys.path.insert(0, parent_path) from benchmark_utils import PaddleInferBenchmark from picodet_postprocess import PicoDetPostProcess -from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize, WarpAffine +from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize, WarpAffine, decode_image from keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop from visualize import visualize_box_mask from utils import argsparser, Timer, get_current_memory_mb diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py index dee9c172eec8161f6654529657b63fe2b3c892c3..3eea4bd6b22c148b09be9cf794116725bf6e89d6 100644 --- a/deploy/python/mot_keypoint_unite_infer.py +++ b/deploy/python/mot_keypoint_unite_infer.py @@ -25,7 +25,7 @@ from collections import defaultdict from mot_keypoint_unite_utils import argsparser from preprocess import decode_image from infer import print_arguments, get_test_images -from mot_sde_infer import SDE_Detector, MOT_SDE_SUPPORT_MODELS +from mot_sde_infer import SDE_Detector from mot_jde_infer import JDE_Detector, MOT_JDE_SUPPORT_MODELS from keypoint_infer import KeyPointDetector, KEYPOINT_SUPPORT_MODELS from det_keypoint_unite_infer import predict_with_given_det diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index 3b9464561c10b8c7207faea9e0a2f03730909330..3394db188064bd614116c1e49b9c4259d15b9a5c 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -34,13 +34,6 @@ from pptracking.python.mot import JDETracker from pptracking.python.mot.utils import MOTTimer, write_mot_results from pptracking.python.visualize import plot_tracking, plot_tracking_dict -# Global dictionary -MOT_SDE_SUPPORT_MODELS = { - 'DeepSORT', - 'ByteTrack', - 'YOLO', -} - class SDE_Detector(Detector): """ @@ -287,7 +280,6 @@ def main(): with open(deploy_file) as f: yml_conf = yaml.safe_load(f) arch = yml_conf['arch'] - assert arch in MOT_SDE_SUPPORT_MODELS, '{} is not supported.'.format(arch) detector = SDE_Detector( FLAGS.model_dir, FLAGS.tracker_config, diff --git a/deploy/python/utils.py b/deploy/python/utils.py index a0f8e352dd6af22696bf9f157b76aff66199b311..89a68ba45c17f2160d3e178d652734e62222b778 100644 --- a/deploy/python/utils.py +++ b/deploy/python/utils.py @@ -228,11 +228,11 @@ class Timer(Times): 4) if average else infer_time dic['postprocess_time_s'] = round(post_time / max(1, self.img_num), 4) if average else post_time - dic['tracking_time_s'] = round(post_time / max(1, self.img_num), - 4) if average else track_time dic['img_num'] = self.img_num total_time = pre_time + infer_time + post_time if self.with_tracker: + dic['tracking_time_s'] = round(track_time / max(1, self.img_num), + 4) if average else track_time total_time = total_time + track_time dic['total_time_s'] = round(total_time, 4) return dic diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py index 3671ab847d45ffc8d7f5e82e8be7b84a1099ec8a..b82c335f8611560fc08d010cedd22d9660af3aa6 100644 --- a/deploy/python/visualize.py +++ b/deploy/python/visualize.py @@ -1,5 +1,4 @@ -# coding: utf-8 -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py index 496fd07731fbf5c9059a528a0e14c03fa97357cc..9742d9665d48c841c77350f0510031d2c6e937d4 100644 --- a/ppdet/engine/export_utils.py +++ b/ppdet/engine/export_utils.py @@ -41,6 +41,7 @@ TRT_MIN_SUBGRAPH = { 'HigherHRNet': 3, 'HRNet': 3, 'DeepSORT': 3, + 'ByteTrack':10, 'JDE': 10, 'FairMOT': 5, 'GFL': 16, @@ -50,7 +51,7 @@ TRT_MIN_SUBGRAPH = { } KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet'] -MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT'] +MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack'] def _prune_input_spec(input_spec, program, targets): diff --git a/ppdet/engine/tracker.py b/ppdet/engine/tracker.py index 6d6407418c0487c54c229fe4d2a61569ff56dfe0..fda38b1841b46076d3d80917c0a030a336e1ba49 100644 --- a/ppdet/engine/tracker.py +++ b/ppdet/engine/tracker.py @@ -29,6 +29,7 @@ from ppdet.core.workspace import create from ppdet.utils.checkpoint import load_weight, load_pretrain_weight from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box from ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results +from ppdet.modeling.mot.tracker import JDETracker, DeepSORTTracker from ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric from ppdet.metrics import MCMOTMetric @@ -39,6 +40,11 @@ from .callbacks import Callback, ComposeCallback from ppdet.utils.logger import setup_logger logger = setup_logger(__name__) +MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack'] +MOT_ARCH_JDE = ['JDE', 'FairMOT'] +MOT_ARCH_SDE = ['DeepSORT', 'ByteTrack'] +MOT_DATA_TYPE = ['mot', 'mcmot', 'kitti'] + __all__ = ['Tracker'] @@ -109,11 +115,15 @@ class Tracker(object): load_weight(self.model, weights, self.optimizer) def load_weights_sde(self, det_weights, reid_weights): - if self.model.detector: - load_weight(self.model.detector, det_weights) - load_weight(self.model.reid, reid_weights) + with_detector = self.model.detector is not None + with_reid = self.model.reid is not None + + if with_detector: + load_weight(self.model.detector, det_weights, self.optimizer) + if with_reid: + load_weight(self.model.reid, reid_weights) else: - load_weight(self.model.reid, reid_weights, self.optimizer) + load_weight(self.model.reid, reid_weights) def _eval_seq_jde(self, dataloader, @@ -185,18 +195,21 @@ class Tracker(object): if save_dir: if not os.path.exists(save_dir): os.makedirs(save_dir) use_detector = False if not self.model.detector else True + use_reid = False if not self.model.reid else True timer = MOTTimer() results = defaultdict(list) frame_id = 0 self.status['mode'] = 'track' self.model.eval() - self.model.reid.eval() + if use_reid: + self.model.reid.eval() if not use_detector: dets_list = load_det_results(det_file, len(dataloader)) logger.info('Finish loading detection results file {}.'.format( det_file)) + tracker = self.model.tracker for step_id, data in enumerate(dataloader): self.status['step_id'] = step_id if frame_id % 40 == 0: @@ -257,6 +270,8 @@ class Tracker(object): scale_factor) else: pred_bboxes = outs['bbox'][:, 2:] + pred_dets_old = np.concatenate( + (pred_cls_ids, pred_scores, pred_bboxes), axis=1) else: logger.warning( 'Frame {} has not detected object, try to modify score threshold.'. @@ -284,50 +299,80 @@ class Tracker(object): pred_cls_ids = pred_cls_ids[keep_idx[0]] pred_scores = pred_scores[keep_idx[0]] - pred_tlwhs = np.concatenate( - (pred_xyxys[:, 0:2], - pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), - axis=1) pred_dets = np.concatenate( - (pred_cls_ids, pred_scores, pred_tlwhs), axis=1) - - tracker = self.model.tracker - crops = get_crops( - pred_xyxys, - ori_image, - w=tracker.input_size[0], - h=tracker.input_size[1]) - crops = paddle.to_tensor(crops) - - data.update({'crops': crops}) - pred_embs = self.model(data).numpy() - - tracker.predict() - online_targets = tracker.update(pred_dets, pred_embs) - - online_tlwhs, online_scores, online_ids = [], [], [] - for t in online_targets: - if not t.is_confirmed() or t.time_since_update > 1: - continue - tlwh = t.to_tlwh() - tscore = t.score - tid = t.track_id - if tscore < draw_threshold: continue - if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue - if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ - 3] > tracker.vertical_ratio: - continue - online_tlwhs.append(tlwh) - online_scores.append(tscore) - online_ids.append(tid) - timer.toc() + (pred_cls_ids, pred_scores, pred_xyxys), axis=1) + + if use_reid: + crops = get_crops( + pred_xyxys, + ori_image, + w=tracker.input_size[0], + h=tracker.input_size[1]) + crops = paddle.to_tensor(crops) + + data.update({'crops': crops}) + pred_embs = self.model(data).numpy() + else: + pred_embs = None + + if isinstance(tracker, DeepSORTTracker): + online_tlwhs, online_scores, online_ids = [], [], [] + tracker.predict() + online_targets = tracker.update(pred_dets, pred_embs) + for t in online_targets: + if not t.is_confirmed() or t.time_since_update > 1: + continue + tlwh = t.to_tlwh() + tscore = t.score + tid = t.track_id + if tscore < draw_threshold: continue + if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue + if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > tracker.vertical_ratio: + continue + online_tlwhs.append(tlwh) + online_scores.append(tscore) + online_ids.append(tid) + timer.toc() + + # save results + results[0].append( + (frame_id + 1, online_tlwhs, online_scores, online_ids)) + save_vis_results(data, frame_id, online_ids, online_tlwhs, + online_scores, timer.average_time, show_image, + save_dir, self.cfg.num_classes) + + elif isinstance(tracker, JDETracker): + # trick hyperparams only used for MOTChallenge (MOT17, MOT20) Test-set + tracker.track_buffer, tracker.conf_thres = get_trick_hyperparams( + seq_name, tracker.track_buffer, tracker.conf_thres) + + online_targets_dict = tracker.update(pred_dets_old, pred_embs) + online_tlwhs = defaultdict(list) + online_scores = defaultdict(list) + online_ids = defaultdict(list) + for cls_id in range(self.cfg.num_classes): + online_targets = online_targets_dict[cls_id] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + tscore = t.score + if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue + if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > tracker.vertical_ratio: + continue + online_tlwhs[cls_id].append(tlwh) + online_ids[cls_id].append(tid) + online_scores[cls_id].append(tscore) + # save results + results[cls_id].append( + (frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id], + online_ids[cls_id])) + timer.toc() + save_vis_results(data, frame_id, online_ids, online_tlwhs, + online_scores, timer.average_time, show_image, + save_dir, self.cfg.num_classes) - # save results - results[0].append( - (frame_id + 1, online_tlwhs, online_scores, online_ids)) - save_vis_results(data, frame_id, online_ids, online_tlwhs, - online_scores, timer.average_time, show_image, - save_dir, self.cfg.num_classes) frame_id += 1 return results, frame_id, timer.average_time, timer.calls @@ -346,10 +391,10 @@ class Tracker(object): if not os.path.exists(output_dir): os.makedirs(output_dir) result_root = os.path.join(output_dir, 'mot_results') if not os.path.exists(result_root): os.makedirs(result_root) - assert data_type in ['mot', 'mcmot', 'kitti'], \ + assert data_type in MOT_DATA_TYPE, \ "data_type should be 'mot', 'mcmot' or 'kitti'" - assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \ - "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'" + assert model_type in MOT_ARCH, \ + "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'" # run tracking n_frame = 0 @@ -380,13 +425,13 @@ class Tracker(object): result_filename = os.path.join(result_root, '{}.txt'.format(seq)) with paddle.no_grad(): - if model_type in ['JDE', 'FairMOT']: + if model_type in MOT_ARCH_JDE: results, nf, ta, tc = self._eval_seq_jde( dataloader, save_dir=save_dir, show_image=show_image, frame_rate=frame_rate) - elif model_type in ['DeepSORT']: + elif model_type in MOT_ARCH_SDE: results, nf, ta, tc = self._eval_seq_sde( dataloader, save_dir=save_dir, @@ -472,10 +517,10 @@ class Tracker(object): if not os.path.exists(output_dir): os.makedirs(output_dir) result_root = os.path.join(output_dir, 'mot_results') if not os.path.exists(result_root): os.makedirs(result_root) - assert data_type in ['mot', 'mcmot', 'kitti'], \ + assert data_type in MOT_DATA_TYPE, \ "data_type should be 'mot', 'mcmot' or 'kitti'" - assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \ - "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'" + assert model_type in MOT_ARCH, \ + "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'" # run tracking if video_file: @@ -505,14 +550,14 @@ class Tracker(object): frame_rate = self.dataset.frame_rate with paddle.no_grad(): - if model_type in ['JDE', 'FairMOT']: + if model_type in MOT_ARCH_JDE: results, nf, ta, tc = self._eval_seq_jde( dataloader, save_dir=save_dir, show_image=show_image, frame_rate=frame_rate, draw_threshold=draw_threshold) - elif model_type in ['DeepSORT']: + elif model_type in MOT_ARCH_SDE: results, nf, ta, tc = self._eval_seq_sde( dataloader, save_dir=save_dir, @@ -536,3 +581,34 @@ class Tracker(object): write_mot_results(result_filename, results, data_type, self.cfg.num_classes) + +def get_trick_hyperparams(video_name, ori_buffer, ori_thresh): + if video_name[:3] != 'MOT': + # only used for MOTChallenge (MOT17, MOT20) Test-set + return ori_buffer, ori_thresh + + video_name = video_name[:8] + if 'MOT17-05' in video_name: + track_buffer = 14 + elif 'MOT17-13' in video_name: + track_buffer = 25 + else: + track_buffer = ori_buffer + + if 'MOT17-01' in video_name: + track_thresh = 0.65 + elif 'MOT17-06' in video_name: + track_thresh = 0.65 + elif 'MOT17-12' in video_name: + track_thresh = 0.7 + elif 'MOT17-14' in video_name: + track_thresh = 0.67 + else: + track_thresh = ori_thresh + + if 'MOT20-06' in video_name or 'MOT20-08' in video_name: + track_thresh = 0.3 + else: + track_thresh = ori_thresh + + return track_buffer, ori_thresh diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index 8a447132d89419e6c8bb8e4f7785a45cbd0148cf..2573ada018828907e4cc21666f512a5ac2c55872 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -51,7 +51,7 @@ logger = setup_logger('ppdet.engine') __all__ = ['Trainer'] -MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT'] +MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack'] class Trainer(object): diff --git a/ppdet/metrics/mcmot_metrics.py b/ppdet/metrics/mcmot_metrics.py index 9f329c8e0f4d18265bc588de989102c7dfda3997..5bcfb923470a1f94a2bd951fb721221a8f339354 100644 --- a/ppdet/metrics/mcmot_metrics.py +++ b/ppdet/metrics/mcmot_metrics.py @@ -308,10 +308,10 @@ class MCMOTEvaluator(object): def load_annotations(self): assert self.data_type == 'mcmot' - self.gt_filename = os.path.join(self.data_root, '../', '../', + self.gt_filename = os.path.join(self.data_root, '../', 'sequences', '{}.txt'.format(self.seq_name)) - + def reset_accumulator(self): import motmetrics as mm mm.lap.default_solver = 'lap' diff --git a/ppdet/modeling/architectures/__init__.py b/ppdet/modeling/architectures/__init__.py index 30aecac61434e89a17351bccd104a23e1e6d79b9..71c53067f72f96c1d24da19aa4313449e91f4b95 100644 --- a/ppdet/modeling/architectures/__init__.py +++ b/ppdet/modeling/architectures/__init__.py @@ -27,6 +27,7 @@ from . import detr from . import sparse_rcnn from . import tood from . import retinanet +from . import bytetrack from .meta_arch import * from .faster_rcnn import * @@ -51,3 +52,4 @@ from .detr import * from .sparse_rcnn import * from .tood import * from .retinanet import * +from .bytetrack import * diff --git a/ppdet/modeling/architectures/bytetrack.py b/ppdet/modeling/architectures/bytetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..09b92b0ae97ca7358bcc6dc9e4bfac5f3074fe52 --- /dev/null +++ b/ppdet/modeling/architectures/bytetrack.py @@ -0,0 +1,83 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ppdet.core.workspace import register, create +from .meta_arch import BaseArch + +__all__ = ['ByteTrack'] + + +@register +class ByteTrack(BaseArch): + """ + ByteTrack network, see https://arxiv.org/abs/ + + Args: + detector (object): detector model instance + reid (object): reid model instance + tracker (object): tracker instance + """ + __category__ = 'architecture' + + def __init__(self, + detector='YOLOX', + reid=None, + tracker='JDETracker'): + super(ByteTrack, self).__init__() + self.detector = detector + self.reid = reid + self.tracker = tracker + + @classmethod + def from_config(cls, cfg, *args, **kwargs): + detector = create(cfg['detector']) + + if cfg['reid'] != 'None': + reid = create(cfg['reid']) + else: + reid = None + + tracker = create(cfg['tracker']) + + return { + "detector": detector, + "reid": reid, + "tracker": tracker, + } + + def _forward(self): + det_outs = self.detector(self.inputs) + + if self.training: + return det_outs + else: + if self.reid is not None: + assert 'crops' in self.inputs + crops = self.inputs['crops'] + pred_embs = self.reid(crops) + else: + pred_embs = None + det_outs['embeddings'] = pred_embs + return det_outs + + def get_loss(self): + return self._forward() + + def get_pred(self): + return self._forward() + diff --git a/ppdet/modeling/mot/tracker/deepsort_tracker.py b/ppdet/modeling/mot/tracker/deepsort_tracker.py index fe5aa25b3a59bcbc89c98ea5a5512aaa34ebc630..28850d6fd9f3c2e11dd16602a8844a0e2498058d 100644 --- a/ppdet/modeling/mot/tracker/deepsort_tracker.py +++ b/ppdet/modeling/mot/tracker/deepsort_tracker.py @@ -102,7 +102,8 @@ class DeepSORTTracker(object): """ pred_cls_ids = pred_dets[:, 0:1] pred_scores = pred_dets[:, 1:2] - pred_tlwhs = pred_dets[:, 2:6] + pred_xyxys = pred_dets[:, 2:6] + pred_tlwhs = np.concatenate((pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), axis=1) detections = [ Detection(tlwh, score, feat, cls_id) diff --git a/tools/eval_mot.py b/tools/eval_mot.py index 14e15ebb2b41d3a1fb54f4dd597a72e29b28bf70..15563866dab6fb9e0252c29710a60ef4a8750183 100644 --- a/tools/eval_mot.py +++ b/tools/eval_mot.py @@ -34,9 +34,6 @@ from ppdet.engine import Tracker from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser -from ppdet.utils.logger import setup_logger -logger = setup_logger('eval') - def parse_args(): parser = ArgsParser() @@ -83,11 +80,8 @@ def run(FLAGS, cfg): tracker = Tracker(cfg, mode='eval') # load weights - if cfg.architecture in ['DeepSORT']: - if cfg.det_weights != 'None': - tracker.load_weights_sde(cfg.det_weights, cfg.reid_weights) - else: - tracker.load_weights_sde(None, cfg.reid_weights) + if cfg.architecture in ['DeepSORT', 'ByteTrack']: + tracker.load_weights_sde(cfg.det_weights, cfg.reid_weights) else: tracker.load_weights_jde(cfg.weights) diff --git a/tools/export_model.py b/tools/export_model.py index deac2ea128b94668f0d2b743910b09cf09e1f1a0..3a417a37cc93aa41143d5906a5e674a19d69d2c4 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -28,7 +28,6 @@ import warnings warnings.filterwarnings('ignore') import paddle - from ppdet.core.workspace import load_config, merge_config from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser @@ -65,11 +64,8 @@ def run(FLAGS, cfg): trainer = Trainer(cfg, mode='test') # load weights - if cfg.architecture in ['DeepSORT']: - if cfg.det_weights != 'None': - trainer.load_weights_sde(cfg.det_weights, cfg.reid_weights) - else: - trainer.load_weights_sde(None, cfg.reid_weights) + if cfg.architecture in ['DeepSORT', 'ByteTrack']: + trainer.load_weights_sde(cfg.det_weights, cfg.reid_weights) else: trainer.load_weights(cfg.weights) diff --git a/tools/infer_mot.py b/tools/infer_mot.py index 29122f01fd625ec01f42d0e83a159277b4a2c88b..aa2e3f88fa56c88d46fa4e83f4bd781d367a6e84 100644 --- a/tools/infer_mot.py +++ b/tools/infer_mot.py @@ -34,9 +34,6 @@ from ppdet.engine import Tracker from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser -from ppdet.utils.logger import setup_logger -logger = setup_logger('train') - def parse_args(): parser = ArgsParser() @@ -94,11 +91,8 @@ def run(FLAGS, cfg): tracker = Tracker(cfg, mode='test') # load weights - if cfg.architecture in ['DeepSORT']: - if cfg.det_weights != 'None': - tracker.load_weights_sde(cfg.det_weights, cfg.reid_weights) - else: - tracker.load_weights_sde(None, cfg.reid_weights) + if cfg.architecture in ['DeepSORT', 'ByteTrack']: + tracker.load_weights_sde(cfg.det_weights, cfg.reid_weights) else: tracker.load_weights_jde(cfg.weights)