mot_sde_infer.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import time
import yaml
import cv2
import re
import numpy as np
from collections import defaultdict

import paddle
from paddle.inference import Config
from paddle.inference import create_predictor

from picodet_postprocess import PicoDetPostProcess
from utils import argsparser, Timer, get_current_memory_mb, _is_valid_video, video2frames
from det_infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig
from det_infer import load_predictor
from benchmark_utils import PaddleInferBenchmark
from visualize import plot_tracking

from mot.tracker import DeepSORTTracker
from mot.utils import MOTTimer, write_mot_results, flow_statistic, scale_coords, clip_box, preprocess_reid

from mot.mtmct.utils import parse_bias
from mot.mtmct.postprocess import trajectory_fusion, sub_cluster, gen_res, print_mtmct_result
from mot.mtmct.postprocess import get_mtmct_matching_results, save_mtmct_crops, save_mtmct_vis_results

# Global dictionary
MOT_SUPPORT_MODELS = {'DeepSORT'}


def bench_log(detector, img_list, model_info, batch_size=1, name=None):
    mems = {
        'cpu_rss_mb': detector.cpu_mem / len(img_list),
        'gpu_rss_mb': detector.gpu_mem / len(img_list),
        'gpu_util': detector.gpu_util * 100 / len(img_list)
    }
    perf_info = detector.det_times.report(average=True)
    data_info = {
        'batch_size': batch_size,
        'shape': "dynamic_shape",
        'data_num': perf_info['img_num']
    }
    log = PaddleInferBenchmark(detector.config, model_info, data_info,
                               perf_info, mems)
    log(name)


class SDE_Detector(Detector):
    """
    Detector of SDE methods

    Args:
        pred_config (object): config of model, defined by `Config(model_dir)`
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
        batch_size (int): size of per batch in inference, default is 1 in tracking models
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
        cpu_threads (int): cpu threads
        enable_mkldnn (bool): whether to open MKLDNN
    """

    def __init__(self,
                 pred_config,
                 model_dir,
                 device='CPU',
                 run_mode='fluid',
                 batch_size=1,
                 trt_min_shape=1,
                 trt_max_shape=1088,
                 trt_opt_shape=608,
                 trt_calib_mode=False,
                 cpu_threads=1,
                 enable_mkldnn=False):
        super(SDE_Detector, self).__init__(
            pred_config=pred_config,
            model_dir=model_dir,
            device=device,
            run_mode=run_mode,
            batch_size=batch_size,
            trt_min_shape=trt_min_shape,
            trt_max_shape=trt_max_shape,
            trt_opt_shape=trt_opt_shape,
            trt_calib_mode=trt_calib_mode,
            cpu_threads=cpu_threads,
            enable_mkldnn=enable_mkldnn)
        assert batch_size == 1, "The detector of tracking models only supports batch_size=1 now"
        self.pred_config = pred_config

    def postprocess(self,
                    boxes,
                    ori_image_shape,
                    threshold,
                    inputs,
                    scaled=False):
        over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
        if len(over_thres_idx) == 0:
            pred_dets = np.zeros((1, 6), dtype=np.float32)
            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
            return pred_dets, pred_xyxys
        else:
            boxes = boxes[over_thres_idx]

        if not scaled:
            # scaled means whether the coords after detector outputs
            # have been scaled back to the original image, set True 
            # in general detector, set False in JDE YOLOv3.
            input_shape = inputs['image'].shape[2:]
            im_shape = inputs['im_shape'][0]
            scale_factor = inputs['scale_factor'][0]
            pred_bboxes = scale_coords(boxes[:, 2:], input_shape, im_shape,
                                       scale_factor)
        else:
            pred_bboxes = boxes[:, 2:]

        pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)

        if len(keep_idx[0]) == 0:
            pred_dets = np.zeros((1, 6), dtype=np.float32)
            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
            return pred_dets, pred_xyxys

        pred_scores = boxes[:, 1:2][keep_idx[0]]
        pred_cls_ids = boxes[:, 0:1][keep_idx[0]]
        pred_tlwhs = np.concatenate(
            (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
            axis=1)

        pred_dets = np.concatenate(
            (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)

        return pred_dets, pred_xyxys

    def predict(self,
                image_path,
                ori_image_shape,
                threshold=0.5,
                scaled=False,
                repeats=1,
                add_timer=True):
        '''
        Args:
            image_path (list[str]): path of images, only support one image path
                (batch_size=1) in tracking model
            ori_image_shape (list[int]: original image shape
            threshold (float): threshold of predicted box' score
            scaled (bool): whether the coords after detector outputs are scaled,
                default False in jde yolov3, set True in general detector.
            repeats (int): repeat number for prediction
            add_timer (bool): whether add timer during prediction
           
        Returns:
            pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id'
            pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2'
        '''
        # preprocess
        if add_timer:
            self.det_times.preprocess_time_s.start()
        inputs = self.preprocess(image_path)

        input_names = self.predictor.get_input_names()
        for i in range(len(input_names)):
            input_tensor = self.predictor.get_input_handle(input_names[i])
            input_tensor.copy_from_cpu(inputs[input_names[i]])
        if add_timer:
            self.det_times.preprocess_time_s.end()
            self.det_times.inference_time_s.start()

        # model prediction
        for i in range(repeats):
            self.predictor.run()
            output_names = self.predictor.get_output_names()
            boxes_tensor = self.predictor.get_output_handle(output_names[0])
            boxes = boxes_tensor.copy_to_cpu()
        if add_timer:
            self.det_times.inference_time_s.end(repeats=repeats)
            self.det_times.postprocess_time_s.start()

        # postprocess
        if len(boxes) == 0:
            pred_dets = np.zeros((1, 6), dtype=np.float32)
            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
        else:
            pred_dets, pred_xyxys = self.postprocess(
                boxes, ori_image_shape, threshold, inputs, scaled=scaled)
        if add_timer:
            self.det_times.postprocess_time_s.end()
            self.det_times.img_num += 1
        return pred_dets, pred_xyxys


class SDE_DetectorPicoDet(DetectorPicoDet):
    """
    PicoDet of SDE methods, the postprocess of PicoDet has not been exported as
        other detectors, so do postprocess here.

    Args:
        pred_config (object): config of model, defined by `Config(model_dir)`
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
        batch_size (int): size of per batch in inference, default is 1 in tracking models
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
        cpu_threads (int): cpu threads
        enable_mkldnn (bool): whether to open MKLDNN
    """

    def __init__(self,
                 pred_config,
                 model_dir,
                 device='CPU',
                 run_mode='fluid',
                 batch_size=1,
                 trt_min_shape=1,
                 trt_max_shape=1088,
                 trt_opt_shape=608,
                 trt_calib_mode=False,
                 cpu_threads=1,
                 enable_mkldnn=False):
        super(SDE_DetectorPicoDet, self).__init__(
            pred_config=pred_config,
            model_dir=model_dir,
            device=device,
            run_mode=run_mode,
            batch_size=batch_size,
            trt_min_shape=trt_min_shape,
            trt_max_shape=trt_max_shape,
            trt_opt_shape=trt_opt_shape,
            trt_calib_mode=trt_calib_mode,
            cpu_threads=cpu_threads,
            enable_mkldnn=enable_mkldnn)
        assert batch_size == 1, "The detector of tracking models only supports batch_size=1 now"
        self.pred_config = pred_config

    def postprocess(self, boxes, ori_image_shape, threshold):
        over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
        if len(over_thres_idx) == 0:
            pred_dets = np.zeros((1, 6), dtype=np.float32)
            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
            return pred_dets, pred_xyxys
        else:
            boxes = boxes[over_thres_idx]

        pred_bboxes = boxes[:, 2:]

        pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)
        if len(keep_idx[0]) == 0:
            pred_dets = np.zeros((1, 6), dtype=np.float32)
            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
            return pred_dets, pred_xyxys

        pred_scores = boxes[:, 1:2][keep_idx[0]]
        pred_cls_ids = boxes[:, 0:1][keep_idx[0]]
        pred_tlwhs = np.concatenate(
            (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
            axis=1)

        pred_dets = np.concatenate(
            (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)

        return pred_dets, pred_xyxys

    def predict(self,
                image_path,
                ori_image_shape,
                threshold=0.5,
                scaled=False,
                repeats=1,
                add_timer=True):
        '''
        Args:
            image_path (list[str]): path of images, only support one image path
                (batch_size=1) in tracking model
            ori_image_shape (list[int]: original image shape
            threshold (float): threshold of predicted box' score
            scaled (bool): whether the coords after detector outputs are scaled,
                default False in jde yolov3, set True in general detector.
            repeats (int): repeat number for prediction
            add_timer (bool): whether add timer during prediction
        Returns:
            pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id'
            pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2'
        '''
        # preprocess
        if add_timer:
            self.det_times.preprocess_time_s.start()
        inputs = self.preprocess(image_path)

        input_names = self.predictor.get_input_names()
        for i in range(len(input_names)):
            input_tensor = self.predictor.get_input_handle(input_names[i])
            input_tensor.copy_from_cpu(inputs[input_names[i]])
        if add_timer:
            self.det_times.preprocess_time_s.end()
            self.det_times.inference_time_s.start()

        np_score_list, np_boxes_list = [], []

        # model prediction
        for i in range(repeats):
            self.predictor.run()
            np_score_list.clear()
            np_boxes_list.clear()
            output_names = self.predictor.get_output_names()
            num_outs = int(len(output_names) / 2)
            for out_idx in range(num_outs):
                np_score_list.append(
                    self.predictor.get_output_handle(output_names[out_idx])
                    .copy_to_cpu())
                np_boxes_list.append(
                    self.predictor.get_output_handle(output_names[
                        out_idx + num_outs]).copy_to_cpu())
        if add_timer:
            self.det_times.inference_time_s.end(repeats=repeats)
            self.det_times.postprocess_time_s.start()

        # postprocess
        self.picodet_postprocess = PicoDetPostProcess(
            inputs['image'].shape[2:],
            inputs['im_shape'],
            inputs['scale_factor'],
            strides=self.pred_config.fpn_stride,
            nms_threshold=self.pred_config.nms['nms_threshold'])
        boxes, boxes_num = self.picodet_postprocess(np_score_list,
                                                    np_boxes_list)

        if len(boxes) == 0:
            pred_dets = np.zeros((1, 6), dtype=np.float32)
            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
        else:
            pred_dets, pred_xyxys = self.postprocess(boxes, ori_image_shape,
                                                     threshold)
        if add_timer:
            self.det_times.postprocess_time_s.end()
            self.det_times.img_num += 1

        return pred_dets, pred_xyxys


class SDE_ReID(object):
    """
    ReID of SDE methods

    Args:
        pred_config (object): config of model, defined by `Config(model_dir)`
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
        batch_size (int): size of per batch in inference, default 50 means at most
            50 sub images can be made a batch and send into ReID model
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
        cpu_threads (int): cpu threads
        enable_mkldnn (bool): whether to open MKLDNN
    """

    def __init__(self,
                 pred_config,
                 model_dir,
                 device='CPU',
                 run_mode='fluid',
                 batch_size=50,
                 trt_min_shape=1,
                 trt_max_shape=1088,
                 trt_opt_shape=608,
                 trt_calib_mode=False,
                 cpu_threads=1,
                 enable_mkldnn=False):
        self.pred_config = pred_config
        self.predictor, self.config = load_predictor(
            model_dir,
            run_mode=run_mode,
            batch_size=batch_size,
            min_subgraph_size=self.pred_config.min_subgraph_size,
            device=device,
            use_dynamic_shape=self.pred_config.use_dynamic_shape,
            trt_min_shape=trt_min_shape,
            trt_max_shape=trt_max_shape,
            trt_opt_shape=trt_opt_shape,
            trt_calib_mode=trt_calib_mode,
            cpu_threads=cpu_threads,
            enable_mkldnn=enable_mkldnn)
        self.det_times = Timer()
        self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
        self.batch_size = batch_size
        assert pred_config.tracker, "Tracking model should have tracker"
        pt = pred_config.tracker
        max_age = pt['max_age'] if 'max_age' in pt else 30
        max_iou_distance = pt[
            'max_iou_distance'] if 'max_iou_distance' in pt else 0.7
        self.tracker = DeepSORTTracker(
            max_age=max_age, max_iou_distance=max_iou_distance)

    def get_crops(self, xyxy, ori_img):
        w, h = self.tracker.input_size
        self.det_times.preprocess_time_s.start()
        crops = []
        xyxy = xyxy.astype(np.int64)
        ori_img = ori_img.transpose(1, 0, 2)  # [h,w,3]->[w,h,3]
        for i, bbox in enumerate(xyxy):
            crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
            crops.append(crop)
        crops = preprocess_reid(crops, w, h)
        self.det_times.preprocess_time_s.end()

        return crops

    def preprocess(self, crops):
        # to keep fast speed, only use topk crops
        crops = crops[:self.batch_size]
        inputs = {}
        inputs['crops'] = np.array(crops).astype('float32')
        return inputs

    def postprocess(self, pred_dets, pred_embs):
        tracker = self.tracker
        tracker.predict()
        online_targets = tracker.update(pred_dets, pred_embs)

        online_tlwhs, online_scores, online_ids = [], [], []
        for t in online_targets:
            if not t.is_confirmed() or t.time_since_update > 1:
                continue
            tlwh = t.to_tlwh()
            tscore = t.score
            tid = t.track_id
            if tlwh[2] * tlwh[3] <= tracker.min_box_area:
                continue
            if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
                    3] > tracker.vertical_ratio:
                continue
            online_tlwhs.append(tlwh)
            online_scores.append(tscore)
            online_ids.append(tid)

        tracking_outs = {
            'online_tlwhs': online_tlwhs,
            'online_scores': online_scores,
            'online_ids': online_ids,
        }
        return tracking_outs

    def postprocess_mtmct(self, pred_dets, pred_embs, frame_id, seq_name):
        tracker = self.tracker
        tracker.predict()
        online_targets = tracker.update(pred_dets, pred_embs)

        online_tlwhs, online_scores, online_ids = [], [], []
        online_tlbrs, online_feats = [], []
        for t in online_targets:
            if not t.is_confirmed() or t.time_since_update > 1:
                continue
            tlwh = t.to_tlwh()
            tscore = t.score
            tid = t.track_id
            if tlwh[2] * tlwh[3] <= tracker.min_box_area:
                continue
            if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
                    3] > tracker.vertical_ratio:
                continue
            online_tlwhs.append(tlwh)
            online_scores.append(tscore)
            online_ids.append(tid)

            online_tlbrs.append(t.to_tlbr())
            online_feats.append(t.feat)

        tracking_outs = {
            'online_tlwhs': online_tlwhs,
            'online_scores': online_scores,
            'online_ids': online_ids,
            'feat_data': {},
        }
        for _tlbr, _id, _feat in zip(online_tlbrs, online_ids, online_feats):
            feat_data = {}
            feat_data['bbox'] = _tlbr
            feat_data['frame'] = f"{frame_id:06d}"
            feat_data['id'] = _id
            _imgname = f'{seq_name}_{_id}_{frame_id}.jpg'
            feat_data['imgname'] = _imgname
            feat_data['feat'] = _feat
            tracking_outs['feat_data'].update({_imgname: feat_data})
        return tracking_outs

    def predict(self,
                crops,
                pred_dets,
                repeats=1,
                add_timer=True,
                MTMCT=False,
                frame_id=0,
                seq_name=''):
        # preprocess
        if add_timer:
            self.det_times.preprocess_time_s.start()
        inputs = self.preprocess(crops)
        input_names = self.predictor.get_input_names()
        for i in range(len(input_names)):
            input_tensor = self.predictor.get_input_handle(input_names[i])
            input_tensor.copy_from_cpu(inputs[input_names[i]])

        if add_timer:
            self.det_times.preprocess_time_s.end()
            self.det_times.inference_time_s.start()

        # model prediction
        for i in range(repeats):
            self.predictor.run()
            output_names = self.predictor.get_output_names()
            feature_tensor = self.predictor.get_output_handle(output_names[0])
            pred_embs = feature_tensor.copy_to_cpu()
        if add_timer:
            self.det_times.inference_time_s.end(repeats=repeats)
            self.det_times.postprocess_time_s.start()

        # postprocess
        if MTMCT == False:
            tracking_outs = self.postprocess(pred_dets, pred_embs)
        else:
            tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs,
                                                   frame_id, seq_name)
        if add_timer:
            self.det_times.postprocess_time_s.end()
            self.det_times.img_num += 1

        return tracking_outs


def predict_image(detector,
                  reid_model,
                  image_list,
                  threshold,
                  output_dir,
                  scaled=True,
                  save_images=True,
                  run_benchmark=False):
    image_list.sort()
    for i, img_file in enumerate(image_list):
        frame = cv2.imread(img_file)
        ori_image_shape = list(frame.shape[:2])
        if run_benchmark:
            # warmup
            pred_dets, pred_xyxys = detector.predict(
                [img_file],
                ori_image_shape,
                threshold,
                scaled,
                repeats=10,
                add_timer=False)
            # run benchmark
            pred_dets, pred_xyxys = detector.predict(
                [img_file],
                ori_image_shape,
                threshold,
                scaled,
                repeats=10,
                add_timer=True)

            cm, gm, gu = get_current_memory_mb()
            detector.cpu_mem += cm
            detector.gpu_mem += gm
            detector.gpu_util += gu
            print('Test iter {}, file name:{}'.format(i, img_file))
        else:
            pred_dets, pred_xyxys = detector.predict(
                [img_file], ori_image_shape, threshold, scaled)

        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
            print('Frame {} has no object, try to modify score threshold.'.
                  format(i))
            online_im = frame
        else:
            # reid process
            crops = reid_model.get_crops(pred_xyxys, frame)

            if run_benchmark:
                # warmup
                tracking_outs = reid_model.predict(
                    crops, pred_dets, repeats=10, add_timer=False)
                # run benchmark 
                tracking_outs = reid_model.predict(
                    crops, pred_dets, repeats=10, add_timer=True)

            else:
                tracking_outs = reid_model.predict(crops, pred_dets)

                online_tlwhs = tracking_outs['online_tlwhs']
                online_scores = tracking_outs['online_scores']
                online_ids = tracking_outs['online_ids']

                online_im = plot_tracking(
                    frame, online_tlwhs, online_ids, online_scores, frame_id=i)

        if save_images:
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            img_name = os.path.split(img_file)[-1]
            out_path = os.path.join(output_dir, img_name)
            cv2.imwrite(out_path, online_im)
            print("save result to: " + out_path)


def predict_video(detector,
                  reid_model,
                  video_file,
                  scaled,
                  threshold,
                  output_dir,
                  save_images=True,
                  save_mot_txts=True,
                  draw_center_traj=False,
                  secs_interval=10,
                  do_entrance_counting=False,
                  camera_id=-1):
    video_name = 'mot_output.mp4'
    if camera_id != -1:
        capture = cv2.VideoCapture(camera_id)
    else:
        capture = cv2.VideoCapture(video_file)
        video_name = os.path.split(video_file)[-1]

    # Get Video info : resolution, fps, frame count
    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(capture.get(cv2.CAP_PROP_FPS))
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("fps: %d, frame_count: %d" % (fps, frame_count))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    out_path = os.path.join(output_dir, video_name)
    if not save_images:
        video_format = 'mp4v'
        fourcc = cv2.VideoWriter_fourcc(*video_format)
        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
    frame_id = 0
    timer = MOTTimer()
    results = defaultdict(list)
    id_set = set()
    interval_id_set = set()
    in_id_list = list()
    out_id_list = list()
    prev_center = dict()
    records = list()
    entrance = [0, height / 2., width, height / 2.]
    video_fps = fps

    while (1):
        ret, frame = capture.read()
        if not ret:
            break
        timer.tic()
        ori_image_shape = list(frame.shape[:2])
        pred_dets, pred_xyxys = detector.predict([frame], ori_image_shape,
                                                 threshold, scaled)

        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
            print('Frame {} has no object, try to modify score threshold.'.
                  format(frame_id))
            timer.toc()
            im = frame
        else:
            # reid process
            crops = reid_model.get_crops(pred_xyxys, frame)
            tracking_outs = reid_model.predict(crops, pred_dets)

            online_tlwhs = tracking_outs['online_tlwhs']
            online_scores = tracking_outs['online_scores']
            online_ids = tracking_outs['online_ids']

            results[0].append(
                (frame_id + 1, online_tlwhs, online_scores, online_ids))
            # NOTE: just implement flow statistic for one class
            result = (frame_id + 1, online_tlwhs, online_scores, online_ids)
            statistic = flow_statistic(
                result, secs_interval, do_entrance_counting, video_fps,
                entrance, id_set, interval_id_set, in_id_list, out_id_list,
                prev_center, records)
            id_set = statistic['id_set']
            interval_id_set = statistic['interval_id_set']
            in_id_list = statistic['in_id_list']
            out_id_list = statistic['out_id_list']
            prev_center = statistic['prev_center']
            records = statistic['records']

            timer.toc()

            fps = 1. / timer.duration
            im = plot_tracking(
                frame,
                online_tlwhs,
                online_ids,
                online_scores,
                frame_id=frame_id,
                fps=fps,
                do_entrance_counting=do_entrance_counting,
                entrance=entrance)

        if save_images:
            save_dir = os.path.join(output_dir, video_name.split('.')[-2])
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            cv2.imwrite(
                os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im)
        else:
            writer.write(im)

        frame_id += 1
        print('detect frame:%d, fps: %f' % (frame_id, fps))

        if camera_id != -1:
            cv2.imshow('Tracking Detection', im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    if save_mot_txts:
        result_filename = os.path.join(output_dir,
                                       video_name.split('.')[-2] + '.txt')
        write_mot_results(result_filename, results)

        result_filename = os.path.join(
            output_dir, video_name.split('.')[-2] + '_flow_statistic.txt')
        f = open(result_filename, 'w')
        for line in records:
            f.write(line)
        print('Flow statistic save in {}'.format(result_filename))
        f.close()

    if save_images:
        save_dir = os.path.join(output_dir, video_name.split('.')[-2])
        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir,
                                                              out_path)
        os.system(cmd_str)
        print('Save video in {}.'.format(out_path))
    else:
        writer.release()


def predict_mtmct_seq(detector,
                      reid_model,
                      mtmct_dir,
                      seq_name,
                      scaled,
                      threshold,
                      output_dir,
                      save_images=True,
                      save_mot_txts=True):
    fpath = os.path.join(mtmct_dir, seq_name)
    if os.path.exists(os.path.join(fpath, 'img1')):
        fpath = os.path.join(fpath, 'img1')

    assert os.path.isdir(fpath), '{} should be a directory'.format(fpath)
    image_list = os.listdir(fpath)
    image_list.sort()
    assert len(image_list) > 0, '{} has no images.'.format(fpath)

    results = defaultdict(list)
    mot_features_dict = {}  # cid_tid_fid feats
    print('Totally {} frames found in seq {}.'.format(
        len(image_list), seq_name))

    for frame_id, img_file in enumerate(image_list):
        if frame_id % 10 == 0:
            print('Processing frame {} of seq {}.'.format(frame_id, seq_name))
        frame = cv2.imread(os.path.join(fpath, img_file))
        ori_image_shape = list(frame.shape[:2])
        frame_path = os.path.join(fpath, img_file)
        pred_dets, pred_xyxys = detector.predict([frame_path], ori_image_shape,
                                                 threshold, scaled)

        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
            print('Frame {} has no object, try to modify score threshold.'.
                  format(frame_id))
            online_im = frame
        else:
            # reid process
            crops = reid_model.get_crops(pred_xyxys, frame)

            tracking_outs = reid_model.predict(
                crops,
                pred_dets,
                MTMCT=True,
                frame_id=frame_id,
                seq_name=seq_name)

            feat_data_dict = tracking_outs['feat_data']
            mot_features_dict = dict(mot_features_dict, **feat_data_dict)

            online_tlwhs = tracking_outs['online_tlwhs']
            online_scores = tracking_outs['online_scores']
            online_ids = tracking_outs['online_ids']

            online_im = plot_tracking(frame, online_tlwhs, online_ids,
                                      online_scores, frame_id)
            results[0].append(
                (frame_id + 1, online_tlwhs, online_scores, online_ids))

        if save_images:
            save_dir = os.path.join(output_dir, seq_name)
            if not os.path.exists(save_dir): os.makedirs(save_dir)
            img_name = os.path.split(img_file)[-1]
            out_path = os.path.join(save_dir, img_name)
            cv2.imwrite(out_path, online_im)

    if save_mot_txts:
        result_filename = os.path.join(output_dir, seq_name + '.txt')
        write_mot_results(result_filename, results)

    return mot_features_dict


def predict_mtmct(detector,
                  reid_model,
                  mtmct_dir,
                  mtmct_cfg,
                  scaled,
                  threshold,
                  output_dir,
                  save_images=True,
                  save_mot_txts=True):
    MTMCT = mtmct_cfg['MTMCT']
    assert MTMCT == True, 'predict_mtmct should be used for MTMCT.'

    cameras_bias = mtmct_cfg['cameras_bias']
    cid_bias = parse_bias(cameras_bias)
    scene_cluster = list(cid_bias.keys())

    # 1.zone releated parameters
    use_zone = mtmct_cfg['use_zone']
    zone_path = mtmct_cfg['zone_path']

    # 2.tricks parameters, can be used for other mtmct dataset
    use_ff = mtmct_cfg['use_ff']
    use_rerank = mtmct_cfg['use_rerank']

    # 3.camera releated parameters
    use_camera = mtmct_cfg['use_camera']
    use_st_filter = mtmct_cfg['use_st_filter']

    # 4.zone releated parameters
    use_roi = mtmct_cfg['use_roi']
    roi_dir = mtmct_cfg['roi_dir']

    mot_list_breaks = []
    cid_tid_dict = dict()

    if not os.path.exists(output_dir): os.makedirs(output_dir)

    seqs = os.listdir(mtmct_dir)
    seqs.sort()

    for seq in seqs:
        fpath = os.path.join(mtmct_dir, seq)
        if os.path.isfile(fpath) and _is_valid_video(fpath):
            ext = seq.split('.')[-1]
            seq = seq.split('.')[-2]
            print('ffmpeg processing of video {}'.format(fpath))
            frames_path = video2frames(
                video_path=fpath, outpath=mtmct_dir, frame_rate=25)
            fpath = os.path.join(mtmct_dir, seq)

        if os.path.isdir(fpath) == False:
            print('{} is not a image folder.'.format(fpath))
            continue

        mot_features_dict = predict_mtmct_seq(
            detector, reid_model, mtmct_dir, seq, scaled, threshold, output_dir,
            save_images, save_mot_txts)

        cid = int(re.sub('[a-z,A-Z]', "", seq))
        tid_data, mot_list_break = trajectory_fusion(
            mot_features_dict,
            cid,
            cid_bias,
            use_zone=use_zone,
            zone_path=zone_path)
        mot_list_breaks.append(mot_list_break)
        # single seq process
        for line in tid_data:
            tracklet = tid_data[line]
            tid = tracklet['tid']
            if (cid, tid) not in cid_tid_dict:
                cid_tid_dict[(cid, tid)] = tracklet

    map_tid = sub_cluster(
        cid_tid_dict,
        scene_cluster,
        use_ff=use_ff,
        use_rerank=use_rerank,
        use_camera=use_camera,
        use_st_filter=use_st_filter)

    pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt')
    if use_camera:
        gen_res(pred_mtmct_file, scene_cluster, map_tid, mot_list_breaks)
    else:
        gen_res(
            pred_mtmct_file,
            scene_cluster,
            map_tid,
            mot_list_breaks,
            use_roi=use_roi,
            roi_dir=roi_dir)

    if FLAGS.save_images:
        camera_results, cid_tid_fid_res = get_mtmct_matching_results(
            pred_mtmct_file)

        crops_dir = os.path.join(output_dir, 'mtmct_crops')
        save_mtmct_crops(
            cid_tid_fid_res, images_dir=mtmct_dir, crops_dir=crops_dir)

        save_dir = os.path.join(output_dir, 'mtmct_vis')
        save_mtmct_vis_results(
            camera_results,
            images_dir=mtmct_dir,
            save_dir=save_dir,
            save_videos=FLAGS.save_images)

    # evalution metrics
    data_root_gt = os.path.join(mtmct_dir, '..', 'gt', 'gt.txt')
    if os.path.exists(data_root_gt):
        print_mtmct_result(data_root_gt, pred_mtmct_file)


def predict_naive(model_dir,
                  reid_model_dir,
                  video_file,
                  image_dir,
                  mtmct_dir=None,
                  mtmct_cfg=None,
                  scaled=True,
                  device='gpu',
                  threshold=0.5,
                  output_dir='output'):
    pred_config = PredictConfig(model_dir)
    detector_func = 'SDE_Detector'
    if pred_config.arch == 'PicoDet':
        detector_func = 'SDE_DetectorPicoDet'
    detector = eval(detector_func)(pred_config, model_dir, device=device)

    pred_config = PredictConfig(reid_model_dir)
    reid_model = SDE_ReID(pred_config, reid_model_dir, device=device)

    if video_file is not None:
        predict_video(
            detector,
            reid_model,
            video_file,
            scaled=scaled,
            threshold=threshold,
            output_dir=output_dir,
            save_images=True,
            save_mot_txts=True,
            draw_center_traj=False,
            secs_interval=10,
            do_entrance_counting=False)
    elif mtmct_dir is not None:
        with open(mtmct_cfg) as f:
            mtmct_cfg_file = yaml.safe_load(f)
        predict_mtmct(
            detector,
            reid_model,
            mtmct_dir,
            mtmct_cfg_file,
            scaled=scaled,
            threshold=threshold,
            output_dir=output_dir,
            save_images=True,
            save_mot_txts=True)
    else:
        img_list = get_test_images(image_dir, infer_img=None)
        predict_image(
            detector,
            reid_model,
            img_list,
            threshold=threshold,
            output_dir=output_dir,
            save_images=True)


def main():
    pred_config = PredictConfig(FLAGS.model_dir)
    detector_func = 'SDE_Detector'
    if pred_config.arch == 'PicoDet':
        detector_func = 'SDE_DetectorPicoDet'

    detector = eval(detector_func)(pred_config,
                                   FLAGS.model_dir,
                                   device=FLAGS.device,
                                   run_mode=FLAGS.run_mode,
                                   batch_size=FLAGS.batch_size,
                                   trt_min_shape=FLAGS.trt_min_shape,
                                   trt_max_shape=FLAGS.trt_max_shape,
                                   trt_opt_shape=FLAGS.trt_opt_shape,
                                   trt_calib_mode=FLAGS.trt_calib_mode,
                                   cpu_threads=FLAGS.cpu_threads,
                                   enable_mkldnn=FLAGS.enable_mkldnn)

    pred_config = PredictConfig(FLAGS.reid_model_dir)
    reid_model = SDE_ReID(
        pred_config,
        FLAGS.reid_model_dir,
        device=FLAGS.device,
        run_mode=FLAGS.run_mode,
        batch_size=FLAGS.reid_batch_size,
        trt_min_shape=FLAGS.trt_min_shape,
        trt_max_shape=FLAGS.trt_max_shape,
        trt_opt_shape=FLAGS.trt_opt_shape,
        trt_calib_mode=FLAGS.trt_calib_mode,
        cpu_threads=FLAGS.cpu_threads,
        enable_mkldnn=FLAGS.enable_mkldnn)

    # predict from video file or camera video stream
    if FLAGS.video_file is not None or FLAGS.camera_id != -1:
        predict_video(
            detector,
            reid_model,
            FLAGS.video_file,
            scaled=FLAGS.scaled,
            threshold=FLAGS.threshold,
            output_dir=FLAGS.output_dir,
            save_images=FLAGS.save_images,
            save_mot_txts=FLAGS.save_mot_txts,
            draw_center_traj=FLAGS.draw_center_traj,
            secs_interval=FLAGS.secs_interval,
            do_entrance_counting=FLAGS.do_entrance_counting,
            camera_id=FLAGS.camera_id)

    elif FLAGS.mtmct_dir is not None:
        mtmct_cfg_file = FLAGS.mtmct_cfg
        with open(mtmct_cfg_file) as f:
            mtmct_cfg = yaml.safe_load(f)
        predict_mtmct(
            detector,
            reid_model,
            FLAGS.mtmct_dir,
            mtmct_cfg,
            scaled=FLAGS.scaled,
            threshold=FLAGS.threshold,
            output_dir=FLAGS.output_dir,
            save_images=FLAGS.save_images,
            save_mot_txts=FLAGS.save_mot_txts)
    else:
        # predict from image
        img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
        predict_image(
            detector,
            reid_model,
            img_list,
            threshold=FLAGS.threshold,
            output_dir=FLAGS.output_dir,
            save_images=FLAGS.save_images,
            run_benchmark=FLAGS.run_benchmark)

        if not FLAGS.run_benchmark:
            detector.det_times.info(average=True)
            reid_model.det_times.info(average=True)
        else:
            mode = FLAGS.run_mode
            det_model_dir = FLAGS.model_dir
            det_model_info = {
                'model_name': det_model_dir.strip('/').split('/')[-1],
                'precision': mode.split('_')[-1]
            }
            bench_log(detector, img_list, det_model_info, name='Det')

            reid_model_dir = FLAGS.reid_model_dir
            reid_model_info = {
                'model_name': reid_model_dir.strip('/').split('/')[-1],
                'precision': mode.split('_')[-1]
            }
            bench_log(reid_model, img_list, reid_model_info, name='ReID')


if __name__ == '__main__':
    paddle.enable_static()
    parser = argsparser()
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
                            ], "device should be CPU, GPU or XPU"

    main()