From 9f9df6f79048bce1591e1189d2051383aaf081e3 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Wed, 9 Mar 2022 13:22:34 +0800 Subject: [PATCH] [WIP] add pedestrian pipeline (#5313) * add pedestrian pipeline * add pipe_timer --- deploy/pphuman/__init__.py | 13 + deploy/pphuman/config/infer_cfg.yml | 16 + deploy/pphuman/pipe_utils.py | 273 ++++++++++++++++ deploy/pphuman/pipeline.py | 478 ++++++++++++++++++++++++++++ deploy/pphuman/tracker_config.yml | 10 + deploy/python/mot_sde_infer.py | 2 +- deploy/python/visualize.py | 25 +- 7 files changed, 815 insertions(+), 2 deletions(-) create mode 100644 deploy/pphuman/__init__.py create mode 100644 deploy/pphuman/config/infer_cfg.yml create mode 100644 deploy/pphuman/pipe_utils.py create mode 100644 deploy/pphuman/pipeline.py create mode 100644 deploy/pphuman/tracker_config.yml diff --git a/deploy/pphuman/__init__.py b/deploy/pphuman/__init__.py new file mode 100644 index 000000000..97043fd7b --- /dev/null +++ b/deploy/pphuman/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/deploy/pphuman/config/infer_cfg.yml b/deploy/pphuman/config/infer_cfg.yml new file mode 100644 index 000000000..45db7a9bb --- /dev/null +++ b/deploy/pphuman/config/infer_cfg.yml @@ -0,0 +1,16 @@ +crop_thresh: 0.5 +attr_thresh: 0.5 +visual: True + +DET: + model_dir: output_inference/pedestrian_yolov3_darknet/ + batch_size: 1 + +ATTR: + model_dir: output_inference/strongbaseline_r50_30e_pa100k/ + batch_size: 8 + +MOT: + model_dir: output_inference/pedestrian_yolov3_darknet/ + tracker_config: deploy/pphuman/tracker_config.yml + batch_size: 1 diff --git a/deploy/pphuman/pipe_utils.py b/deploy/pphuman/pipe_utils.py new file mode 100644 index 000000000..5cdb3943d --- /dev/null +++ b/deploy/pphuman/pipe_utils.py @@ -0,0 +1,273 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import os +import ast +import argparse +import glob +import yaml +import copy +import numpy as np + +from python.keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--config", + type=str, + default=None, + help=("Path of configure"), + required=True) + parser.add_argument( + "--image_file", type=str, default=None, help="Path of image file.") + parser.add_argument( + "--image_dir", + type=str, + default=None, + help="Dir of image file, `image_file` has a higher priority.") + parser.add_argument( + "--video_file", + type=str, + default=None, + help="Path of video file, `video_file` or `camera_id` has a highest priority." + ) + parser.add_argument( + "--camera_id", + type=int, + default=-1, + help="device id of camera to predict.") + parser.add_argument( + "--output_dir", + type=str, + default="output", + help="Directory of output visualization files.") + parser.add_argument( + "--run_mode", + type=str, + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." + ) + parser.add_argument( + "--enable_mkldnn", + type=ast.literal_eval, + default=False, + help="Whether use mkldnn with CPU.") + parser.add_argument( + "--cpu_threads", type=int, default=1, help="Num of threads with CPU.") + parser.add_argument( + "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") + parser.add_argument( + "--trt_max_shape", + type=int, + default=1280, + help="max_shape for TensorRT.") + parser.add_argument( + "--trt_opt_shape", + type=int, + default=640, + help="opt_shape for TensorRT.") + parser.add_argument( + "--trt_calib_mode", + type=bool, + default=False, + help="If the model is produced by TRT offline quantitative " + "calibration, trt_calib_mode need to set True.") + return parser + + +class Times(object): + def __init__(self): + self.time = 0. + # start time + self.st = 0. + # end time + self.et = 0. + + def start(self): + self.st = time.time() + + def end(self, repeats=1, accumulative=True): + self.et = time.time() + if accumulative: + self.time += (self.et - self.st) / repeats + else: + self.time = (self.et - self.st) / repeats + + def reset(self): + self.time = 0. + self.st = 0. + self.et = 0. + + def value(self): + return round(self.time, 4) + + +class PipeTimer(Times): + def __init__(self): + super(PipeTimer, self).__init__() + self.total_time = Times() + self.module_time = { + 'det': Times(), + 'mot': Times(), + 'attr': Times(), + 'kpt': Times(), + 'action': Times(), + } + self.img_num = 0 + + def info(self, average=False): + total_time = self.total_time.value() + total_time = round(total_time, 4) + print("------------------ Inference Time Info ----------------------") + print("total_time(ms): {}, img_num: {}".format(total_time * 1000, + self.img_num)) + + for k, v in self.module_time.items(): + v_time = round(v.value(), 4) + if v_time > 0: + print("{} time(ms): {}".format(k, v_time * 1000)) + + average_latency = total_time / max(1, self.img_num) + qps = 0 + if total_time > 0: + qps = 1 / average_latency + + print("average latency time(ms): {:.2f}, QPS: {:2f}".format( + average_latency * 1000, qps)) + + def report(self, average=False): + dic = {} + dic['total'] = round(self.total_time.value() / max(1, self.img_num), + 4) if average else self.total_time.value() + dic['det'] = round(self.module_time['det'].value() / + max(1, self.img_num), + 4) if average else self.module_time['det'].value() + dic['mot'] = round(self.module_time['mot'].value() / + max(1, self.img_num), + 4) if average else self.module_time['mot'].value() + dic['attr'] = round(self.module_time['attr'].value() / + max(1, self.img_num), + 4) if average else self.module_time['attr'].value() + dic['kpt'] = round(self.module_time['kpt'].value() / + max(1, self.img_num), + 4) if average else self.module_time['kpt'].value() + dic['action'] = round( + self.module_time['action'].value() / max(1, self.img_num), + 4) if average else self.module_time['action'].value() + + dic['img_num'] = self.img_num + return dic + + +def merge_cfg(args): + with open(args.config) as f: + pred_config = yaml.safe_load(f) + + def merge(cfg, arg): + merge_cfg = copy.deepcopy(cfg) + for k, v in cfg.items(): + if k in arg: + merge_cfg[k] = arg[k] + else: + if isinstance(v, dict): + merge_cfg[k] = merge(v, arg) + return merge_cfg + + pred_config = merge(pred_config, vars(args)) + return pred_config + + +def print_arguments(cfg): + print('----------- Running Arguments -----------') + for arg, value in sorted(cfg.items()): + print('%s: %s' % (arg, value)) + print('------------------------------------------') + + +def get_test_images(infer_dir, infer_img): + """ + Get image path list in TEST mode + """ + assert infer_img is not None or infer_dir is not None, \ + "--infer_img or --infer_dir should be set" + assert infer_img is None or os.path.isfile(infer_img), \ + "{} is not a file".format(infer_img) + assert infer_dir is None or os.path.isdir(infer_dir), \ + "{} is not a directory".format(infer_dir) + + # infer_img has a higher priority + if infer_img and os.path.isfile(infer_img): + return [infer_img] + + images = set() + infer_dir = os.path.abspath(infer_dir) + assert os.path.isdir(infer_dir), \ + "infer_dir {} is not a directory".format(infer_dir) + exts = ['jpg', 'jpeg', 'png', 'bmp'] + exts += [ext.upper() for ext in exts] + for ext in exts: + images.update(glob.glob('{}/*.{}'.format(infer_dir, ext))) + images = list(images) + + assert len(images) > 0, "no image found in {}".format(infer_dir) + print("Found {} inference images in total.".format(len(images))) + + return images + + +def crop_image_with_det(batch_input, det_res): + boxes = det_res['boxes'] + score = det_res['boxes'][:, 1] + boxes_num = det_res['boxes_num'] + start_idx = 0 + crop_res = [] + for b_id, input in enumerate(batch_input): + boxes_num_i = boxes_num[b_id] + boxes_i = boxes[start_idx:start_idx + boxes_num_i, :] + score_i = score[start_idx:start_idx + boxes_num_i] + res = [] + for box in boxes_i: + crop_image, new_box, ori_box = expand_crop(input, box) + if crop_image is not None: + res.append(crop_image) + crop_res.append(res) + return crop_res + + +def crop_image_with_mot(input, mot_res): + res = mot_res['boxes'] + crop_res = [] + for box in res: + crop_image, new_box, ori_box = expand_crop(input, box[1:]) + if crop_image is not None: + crop_res.append(crop_image) + return crop_res + + +def parse_mot_res(input): + mot_res = [] + boxes, scores, ids = input[0] + for box, score, i in zip(boxes[0], scores[0], ids[0]): + xmin, ymin, w, h = box + res = [i, 0, score, xmin, ymin, xmin + w, ymin + h] + mot_res.append(res) + return {'boxes': np.array(mot_res)} diff --git a/deploy/pphuman/pipeline.py b/deploy/pphuman/pipeline.py new file mode 100644 index 000000000..9b1b04503 --- /dev/null +++ b/deploy/pphuman/pipeline.py @@ -0,0 +1,478 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml +import glob + +import cv2 +import numpy as np +import math +import paddle +import sys +from collections import Sequence + +# add deploy path of PadleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +sys.path.insert(0, parent_path) + +from python.infer import Detector, DetectorPicoDet +from python.mot_sde_infer import SDE_Detector +from python.attr_infer import AttrDetector +from pipe_utils import argsparser, print_arguments, merge_cfg, PipeTimer +from pipe_utils import get_test_images, crop_image_with_det, crop_image_with_mot, parse_mot_res +from python.preprocess import decode_image +from python.visualize import visualize_box_mask, visualize_attr +from pptracking.python.visualize import plot_tracking + + +class Pipeline(object): + """ + Pipeline + + Args: + cfg (dict): config of models in pipeline + image_file (string|None): the path of image file, default as None + image_dir (string|None): the path of image directory, if not None, + then all the images in directory will be predicted, default as None + video_file (string|None): the path of video file, default as None + camera_id (int): the device id of camera to predict, default as -1 + device (string): the device to predict, options are: CPU/GPU/XPU, + default as CPU + run_mode (string): the mode of prediction, options are: + paddle/trt_fp32/trt_fp16, default as paddle + trt_min_shape (int): min shape for dynamic shape in trt, default as 1 + trt_max_shape (int): max shape for dynamic shape in trt, default as 1280 + trt_opt_shape (int): opt shape for dynamic shape in trt, default as 640 + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True. default as False + cpu_threads (int): cpu threads, default as 1 + enable_mkldnn (bool): whether to open MKLDNN, default as False + output_dir (string): The path of output, default as 'output' + """ + + def __init__(self, + cfg, + image_file=None, + image_dir=None, + video_file=None, + camera_id=-1, + device='CPU', + run_mode='paddle', + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + output_dir='output'): + self.multi_camera = False + self.is_video = False + self.input = self._parse_input(image_file, image_dir, video_file, + camera_id) + if self.multi_camera: + self.predictor = [ + PipePredictor( + cfg, + is_video=True, + multi_camera=True, + device=device, + run_mode=run_mode, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn, + output_dir=output_dir) for i in self.input + ] + else: + self.predictor = PipePredictor( + cfg, + self.is_video, + device=device, + run_mode=run_mode, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn, + output_dir=output_dir) + + def _parse_input(self, image_file, image_dir, video_file, camera_id): + + # parse input as is_video and multi_camera + + if image_file is not None or image_dir is not None: + input = get_test_images(image_dir, image_file) + self.is_video = False + self.multi_camera = False + + elif video_file is not None: + if isinstance(video_file, list): + self.multi_camera = True + input = [cv2.VideoCapture(v) for v in video_file] + else: + input = cv2.VideoCapture(video_file) + self.is_video = True + + elif camera_id != -1: + if isinstance(camera_id, Sequence): + self.multi_camera = True + input = [cv2.VideoCapture(i) for i in camera_id] + else: + input = cv2.VideoCapture(camera_id) + self.is_video = True + + else: + raise ValueError( + "Illegal Input, please set one of ['video_file','camera_id','image_file', 'image_dir']" + ) + + return input + + def run(self): + if self.multi_camera: + multi_res = [] + for predictor, input in zip(self.predictor, self.input): + predictor.run(input) + res = predictor.get_result() + multi_res.append(res) + + mtmct_process(multi_res) + + else: + self.predictor.run(self.input) + + +class Result(object): + def __init__(self): + self.res_dict = { + 'det': dict(), + 'mot': dict(), + 'attr': dict(), + 'kpt': dict(), + 'action': dict() + } + + def update(self, res, name): + self.res_dict[name].update(res) + + def get(self, name): + if name in self.res_dict: + return self.res_dict[name] + return None + + +class PipePredictor(object): + """ + Predictor in single camera + + The pipeline for image input: + + 1. Detection + 2. Detection -> Attribute + + The pipeline for video input: + + 1. Tracking + 2. Tracking -> Attribute + 3. Tracking -> KeyPoint -> Action Recognition + + Args: + cfg (dict): config of models in pipeline + is_video (bool): whether the input is video, default as False + multi_camera (bool): whether to use multi camera in pipeline, + default as False + camera_id (int): the device id of camera to predict, default as -1 + device (string): the device to predict, options are: CPU/GPU/XPU, + default as CPU + run_mode (string): the mode of prediction, options are: + paddle/trt_fp32/trt_fp16, default as paddle + trt_min_shape (int): min shape for dynamic shape in trt, default as 1 + trt_max_shape (int): max shape for dynamic shape in trt, default as 1280 + trt_opt_shape (int): opt shape for dynamic shape in trt, default as 640 + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True. default as False + cpu_threads (int): cpu threads, default as 1 + enable_mkldnn (bool): whether to open MKLDNN, default as False + output_dir (string): The path of output, default as 'output' + """ + + def __init__(self, + cfg, + is_video=True, + multi_camera=False, + device='CPU', + run_mode='paddle', + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + output_dir='output'): + + self.with_attr = cfg.get('ATTR', False) + self.with_action = cfg.get('ACTION', False) + self.is_video = is_video + self.multi_camera = multi_camera + self.cfg = cfg + self.output_dir = output_dir + + self.warmup_frame = 1 + self.pipeline_res = Result() + self.pipe_timer = PipeTimer() + + if not is_video: + det_cfg = self.cfg['DET'] + model_dir = det_cfg['model_dir'] + batch_size = det_cfg['batch_size'] + self.det_predictor = Detector( + model_dir, device, run_mode, batch_size, trt_min_shape, + trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads, + enable_mkldnn) + if self.with_attr: + attr_cfg = self.cfg['ATTR'] + model_dir = attr_cfg['model_dir'] + batch_size = attr_cfg['batch_size'] + self.attr_predictor = AttrDetector( + model_dir, device, run_mode, batch_size, trt_min_shape, + trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads, + enable_mkldnn) + + else: + mot_cfg = self.cfg['MOT'] + model_dir = mot_cfg['model_dir'] + tracker_config = mot_cfg['tracker_config'] + batch_size = mot_cfg['batch_size'] + self.mot_predictor = SDE_Detector( + model_dir, tracker_config, device, run_mode, batch_size, + trt_min_shape, trt_max_shape, trt_opt_shape, trt_calib_mode, + cpu_threads, enable_mkldnn) + if self.with_attr: + attr_cfg = self.cfg['ATTR'] + model_dir = attr_cfg['model_dir'] + batch_size = attr_cfg['batch_size'] + self.attr_predictor = AttrDetector( + model_dir, device, run_mode, batch_size, trt_min_shape, + trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads, + enable_mkldnn) + if self.with_action: + self.kpt_predictor = KeyPointDetector() + self.kpt_collector = KeyPointCollector() + self.action_predictor = ActionDetector() + + def get_result(self): + return self.pipeline_res + + def run(self, input): + if self.is_video: + self.predict_video(input) + else: + self.predict_image(input) + self.pipe_timer.info(True) + + def predict_image(self, input): + # det + # det -> attr + batch_loop_cnt = math.ceil( + float(len(input)) / self.det_predictor.batch_size) + for i in range(batch_loop_cnt): + start_index = i * self.det_predictor.batch_size + end_index = min((i + 1) * self.det_predictor.batch_size, len(input)) + batch_file = input[start_index:end_index] + batch_input = [decode_image(f, {})[0] for f in batch_file] + + if i > self.warmup_frame: + self.pipe_timer.total_time.start() + self.pipe_timer.module_time['det'].start() + # det output format: class, score, xmin, ymin, xmax, ymax + det_res = self.det_predictor.predict_image( + batch_input, visual=False) + if i > self.warmup_frame: + self.pipe_timer.module_time['det'].end() + self.pipeline_res.update(det_res, 'det') + + if self.with_attr: + crop_inputs = crop_image_with_det(batch_input, det_res) + attr_res_list = [] + + if i > self.warmup_frame: + self.pipe_timer.module_time['attr'].start() + + for crop_input in crop_inputs: + attr_res = self.attr_predictor.predict_image( + crop_input, visual=False) + attr_res_list.extend(attr_res['output']) + + if i > self.warmup_frame: + self.pipe_timer.module_time['attr'].end() + + attr_res = {'output': attr_res_list} + self.pipeline_res.update(attr_res, 'attr') + + self.pipe_timer.img_num += len(batch_input) + if i > self.warmup_frame: + self.pipe_timer.total_time.end() + + if self.cfg['visual']: + self.visualize_image(batch_file, batch_input, self.pipeline_res) + + def predict_video(self, capture): + # mot + # mot -> attr + # mot -> pose -> action + video_out_name = 'output.mp4' + + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, video_out_name) + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + frame_id = 0 + while (1): + if frame_id % 10 == 0: + print('frame id: ', frame_id) + ret, frame = capture.read() + if not ret: + break + + if frame_id > self.warmup_frame: + self.pipe_timer.total_time.start() + self.pipe_timer.module_time['mot'].start() + res = self.mot_predictor.predict_image([frame], visual=False) + + if frame_id > self.warmup_frame: + self.pipe_timer.module_time['mot'].end() + + # mot output format: id, class, score, xmin, ymin, xmax, ymax + mot_res = parse_mot_res(res) + + self.pipeline_res.update(mot_res, 'mot') + if self.with_attr or self.with_action: + crop_input = crop_image_with_mot(frame, mot_res) + + if self.with_attr: + if frame_id > self.warmup_frame: + self.pipe_timer.module_time['attr'].start() + attr_res = self.attr_predictor.predict_image( + crop_input, visual=False) + if frame_id > self.warmup_frame: + self.pipe_timer.module_time['attr'].end() + self.pipeline_res.update(attr_res, 'attr') + + if self.with_action: + kpt_result = self.kpt_predictor.predict_image(crop_input) + self.pipeline_res.update(kpt_result, 'kpt') + + self.kpt_collector.update(kpt_result) # collect kpt output + state = self.kpt_collector.state() # whether frame num is enough + + if state: + action_input = self.kpt_collector.collate( + ) # reorgnize kpt output in ID + action_res = self.action_predictor.predict_kpt(action_input) + self.pipeline_res.update(action, 'action') + + if frame_id > self.warmup_frame: + self.pipe_timer.img_num += 1 + self.pipe_timer.total_time.end() + frame_id += 1 + + if self.multi_camera: + self.get_valid_instance( + frame, + self.pipeline_res) # parse output result for multi-camera + + if self.cfg['visual']: + im = self.visualize_video(frame, self.pipeline_res, + frame_id) # visualize + writer.write(im) + + writer.release() + print('save result to {}'.format(out_path)) + + def visualize_video(self, image, result, frame_id): + mot_res = result.get('mot') + ids = mot_res['boxes'][:, 0] + boxes = mot_res['boxes'][:, 3:] + boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + image = plot_tracking(image, boxes, ids, frame_id=frame_id) + + attr_res = result.get('attr') + if attr_res is not None: + boxes = mot_res['boxes'][:, 1:] + attr_res = attr_res['output'] + image = visualize_attr(image, attr_res, boxes) + image = np.array(image) + + return image + + def visualize_image(self, im_files, images, result): + start_idx, boxes_num_i = 0, 0 + det_res = result.get('det') + attr_res = result.get('attr') + for i, (im_file, im) in enumerate(zip(im_files, images)): + if det_res is not None: + det_res_i = {} + boxes_num_i = det_res['boxes_num'][i] + det_res_i['boxes'] = det_res['boxes'][start_idx:start_idx + + boxes_num_i, :] + im = visualize_box_mask( + im, + det_res_i, + labels=['person'], + threshold=self.cfg['crop_thresh']) + if attr_res is not None: + attr_res_i = attr_res['output'][start_idx:start_idx + + boxes_num_i] + im = visualize_attr(im, attr_res_i, det_res_i['boxes']) + img_name = os.path.split(im_file)[-1] + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, img_name) + im.save(out_path, quality=95) + print("save result to: " + out_path) + start_idx += boxes_num_i + + +def main(): + cfg = merge_cfg(FLAGS) + print_arguments(cfg) + pipeline = Pipeline( + cfg, FLAGS.image_file, FLAGS.image_dir, FLAGS.video_file, + FLAGS.camera_id, FLAGS.device, FLAGS.run_mode, FLAGS.trt_min_shape, + FLAGS.trt_max_shape, FLAGS.trt_opt_shape, FLAGS.trt_calib_mode, + FLAGS.cpu_threads, FLAGS.enable_mkldnn, FLAGS.output_dir) + + pipeline.run() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + + main() diff --git a/deploy/pphuman/tracker_config.yml b/deploy/pphuman/tracker_config.yml new file mode 100644 index 000000000..d92510148 --- /dev/null +++ b/deploy/pphuman/tracker_config.yml @@ -0,0 +1,10 @@ +# config of tracker for MOT SDE Detector, use ByteTracker as default. +# The tracker of MOT JDE Detector is exported together with the model. +# Here 'min_box_area' and 'vertical_ratio' are set for pedestrian, you can modify for other objects tracking. +tracker: + use_byte: true + conf_thres: 0.6 + low_conf_thres: 0.1 + match_thres: 0.9 + min_box_area: 100 + vertical_ratio: 1.6 diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index 37c4cdae0..3b9464561 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -238,7 +238,7 @@ class SDE_Detector(Detector): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) out_path = os.path.join(self.output_dir, video_out_name) - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) frame_id = 1 diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py index e049a4dc1..f080d06ff 100644 --- a/deploy/python/visualize.py +++ b/deploy/python/visualize.py @@ -38,7 +38,7 @@ def visualize_box_mask(im, results, labels, threshold=0.5): """ if isinstance(im, str): im = Image.open(im).convert('RGB') - else: + elif isinstance(im, np.ndarray): im = Image.fromarray(im) if 'masks' in results and 'boxes' in results and len(results['boxes']) > 0: im = draw_mask( @@ -328,3 +328,26 @@ def visualize_pose(imgfile, plt.imsave(save_name, canvas[:, :, ::-1]) print("keypoint visualize image saved to: " + save_name) plt.close() + + +def visualize_attr(im, results, boxes=None): + + if isinstance(im, str): + im = Image.open(im).convert('RGB') + elif isinstance(im, np.ndarray): + im = Image.fromarray(im) + + draw = ImageDraw.Draw(im) + for i, res in enumerate(results): + text = "" + for k, v in res.items(): + if len(v) == 0: continue + test_line = "{}: {}\n".format(k, *v) + text += test_line + if boxes is None: + text_loc = (1, 1) + else: + box = boxes[i] + text_loc = (box[2], box[3]) + draw.text(text_loc, text, fill=(0, 0, 255)) + return im -- GitLab