diff --git a/configs/keypoint/README.md b/configs/keypoint/README.md index 6076193cdcc39127ac2b47bcd1cb65b829fd42c9..3a3cb3720f6a48ad18e44836f988a0b53c0e0843 100644 --- a/configs/keypoint/README.md +++ b/configs/keypoint/README.md @@ -41,7 +41,7 @@ MPII数据集 ​ 目前KeyPoint模型支持[COCO](https://cocodataset.org/#keypoints-2017)数据集和[MPII](http://human-pose.mpi-inf.mpg.de/#overview)数据集,数据集的准备方式请参考[关键点数据准备](../../docs/tutorials/PrepareKeypointDataSet_cn.md)。 - + - 请注意,Top-Down方案使用检测框测试时,需要通过检测模型生成bbox.json文件。COCO val2017的检测结果可以参考[Detector having human AP of 56.4 on COCO val2017 dataset](https://paddledet.bj.bcebos.com/data/bbox.json),下载后放在根目录(PaddleDetection)下,然后修改config配置文件中`use_gt_bbox: False`后生效。然后正常执行测试命令即可。 @@ -96,8 +96,8 @@ python tools/export_model.py -c configs/keypoint/higherhrnet/higherhrnet_hrnet_w python deploy/python/keypoint_infer.py --model_dir=output_inference/higherhrnet_hrnet_w32_512/ --image_file=./demo/000000014439_640x640.jpg --device=gpu --threshold=0.5 python deploy/python/keypoint_infer.py --model_dir=output_inference/hrnet_w32_384x288/ --image_file=./demo/hrnet_demo.jpg --device=gpu --threshold=0.5 -#keypoint top-down模型 + detector 检测联合部署推理(联合推理只支持top-down方式) -python deploy/python/keypoint_det_unite_infer.py --det_model_dir=output_inference/ppyolo_r50vd_dcn_2x_coco/ --keypoint_model_dir=output_inference/hrnet_w32_384x288/ --video_file=../video/xxx.mp4 --device=gpu +#detector 检测 + keypoint top-down模型联合部署(联合推理只支持top-down方式) +python deploy/python/det_keypoint_unite_infer.py --det_model_dir=output_inference/ppyolo_r50vd_dcn_2x_coco/ --keypoint_model_dir=output_inference/hrnet_w32_384x288/ --video_file=../video/xxx.mp4 --device=gpu ``` ​ **与多目标跟踪模型FairMOT联合部署预测:** diff --git a/configs/mot/README.md b/configs/mot/README.md index 49aa10bc4fedb95b8b1efc11e7611d5d196b3b8e..1a34f9d07082a3da5569262b5f82b449f6de7550 100644 --- a/configs/mot/README.md +++ b/configs/mot/README.md @@ -255,7 +255,7 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairm ### 5. Using exported model for python inference ```bash -python deploy/python/mot_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts ``` **Notes:** The tracking model is used to predict the video, and does not support the prediction of a single image. The visualization video of the tracking results is saved by default. You can add `--save_mot_txts` to save the txt result file, or `--save_images` to save the visualization images. diff --git a/configs/mot/README_cn.md b/configs/mot/README_cn.md index aedf4f3291318385a4441cf15e3773236de51c87..8025f54acee9c3faa43974bca11bdc1d6eee5e17 100644 --- a/configs/mot/README_cn.md +++ b/configs/mot/README_cn.md @@ -253,7 +253,7 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairm ### 5. 用导出的模型基于Python去预测 ```bash -python deploy/python/mot_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts ``` **注意:** 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。 diff --git a/configs/mot/fairmot/README.md b/configs/mot/fairmot/README.md index b9b979f56f6b6dbab65e37cec4db67f645b7af79..c1d5616e3f4d1e59a5565969ad0d54baa553e8c9 100644 --- a/configs/mot/fairmot/README.md +++ b/configs/mot/fairmot/README.md @@ -86,9 +86,9 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairm ### 5. Using exported model for python inference ```bash -python deploy/python/mot_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts ``` -**Notes:** +**Notes:** The tracking model is used to predict the video, and does not support the prediction of a single image. The visualization video of the tracking results is saved by default. You can add `--save_mot_txts` to save the txt result file, or `--save_images` to save the visualization images. diff --git a/configs/mot/fairmot/README_cn.md b/configs/mot/fairmot/README_cn.md index 7a670a30f6998c990e472d4e5e76fb765b4f18ba..18b3428bfc9a19e446b607f48abab9173afc34c6 100644 --- a/configs/mot/fairmot/README_cn.md +++ b/configs/mot/fairmot/README_cn.md @@ -84,9 +84,9 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairm ### 5. 用导出的模型基于Python去预测 ```bash -python deploy/python/mot_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +python deploy/python/mot_jde_infer.py --model_dir=output_inference/fairmot_dla34_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts ``` -**注意:** +**注意:** 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。 ## 引用 diff --git a/configs/mot/jde/README.md b/configs/mot/jde/README.md index 62b655628221e17d09e178246ee90dd0df5ac588..2b66e727360f93fcbdd60d54cfe08b4124f72294 100644 --- a/configs/mot/jde/README.md +++ b/configs/mot/jde/README.md @@ -92,9 +92,9 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/jde/jde_darkn ### 5. Using exported model for python inference ```bash -python deploy/python/mot_infer.py --model_dir=output_inference/jde_darknet53_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +python deploy/python/mot_jde_infer.py --model_dir=output_inference/jde_darknet53_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts ``` -**Notes:** +**Notes:** The tracking model is used to predict the video, and does not support the prediction of a single image. The visualization video of the tracking results is saved by default. You can add `--save_mot_txts` to save the txt result file, or `--save_images` to save the visualization images. diff --git a/configs/mot/jde/README_cn.md b/configs/mot/jde/README_cn.md index c77ac7c7d20025bdba398643437e54c6af988da0..e441c4478a1cc7ae024109e3e67c957a80eda345 100644 --- a/configs/mot/jde/README_cn.md +++ b/configs/mot/jde/README_cn.md @@ -93,9 +93,9 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/jde/jde_darkn ### 5. 用导出的模型基于Python去预测 ```bash -python deploy/python/mot_infer.py --model_dir=output_inference/jde_darknet53_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +python deploy/python/mot_jde_infer.py --model_dir=output_inference/jde_darknet53_30e_1088x608 --video_file={your video name}.mp4 --device=GPU --save_mot_txts ``` -**注意:** +**注意:** 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。。 ## 引用 diff --git a/deploy/python/keypoint_det_unite_infer.py b/deploy/python/det_keypoint_unite_infer.py similarity index 71% rename from deploy/python/keypoint_det_unite_infer.py rename to deploy/python/det_keypoint_unite_infer.py index 056821e5c871d081c77146c7a95f7ea4dcca86d2..6470ecf087a0064bae6661a10b673705aaeff194 100644 --- a/deploy/python/keypoint_det_unite_infer.py +++ b/deploy/python/det_keypoint_unite_infer.py @@ -19,13 +19,19 @@ import math import numpy as np import paddle -from topdown_unite_utils import argsparser +from det_keypoint_unite_utils import argsparser from preprocess import decode_image from infer import Detector, PredictConfig, print_arguments, get_test_images from keypoint_infer import KeyPoint_Detector, PredictConfig_KeyPoint -from keypoint_visualize import draw_pose +from visualize import draw_pose from benchmark_utils import PaddleInferBenchmark from utils import get_current_memory_mb +from keypoint_postprocess import translate_to_ori_images + +KEYPOINT_SUPPORT_MODELS = { + 'HigherHRNet': 'keypoint_bottomup', + 'HRNet': 'keypoint_topdown' +} def bench_log(detector, img_list, model_info, batch_size=1, name=None): @@ -46,11 +52,38 @@ def bench_log(detector, img_list, model_info, batch_size=1, name=None): log(name) -def affine_backto_orgimages(keypoint_result, batch_records): - kpts, scores = keypoint_result['keypoint'] - kpts[..., 0] += batch_records[:, 0:1] - kpts[..., 1] += batch_records[:, 1:2] - return kpts, scores +def predict_with_given_det(image, det_res, keypoint_detector, + keypoint_batch_size, det_threshold, + keypoint_threshold, run_benchmark): + rec_images, records, det_rects = keypoint_detector.get_person_from_rect( + image, det_res, det_threshold) + keypoint_vector = [] + score_vector = [] + rect_vector = det_rects + batch_loop_cnt = math.ceil(float(len(rec_images)) / keypoint_batch_size) + + for i in range(batch_loop_cnt): + start_index = i * keypoint_batch_size + end_index = min((i + 1) * keypoint_batch_size, len(rec_images)) + batch_images = rec_images[start_index:end_index] + batch_records = np.array(records[start_index:end_index]) + if run_benchmark: + keypoint_result = keypoint_detector.predict( + batch_images, keypoint_threshold, warmup=10, repeats=10) + else: + keypoint_result = keypoint_detector.predict(batch_images, + keypoint_threshold) + orgkeypoints, scores = translate_to_ori_images(keypoint_result, + batch_records) + keypoint_vector.append(orgkeypoints) + score_vector.append(scores) + + keypoint_res = {} + keypoint_res['keypoint'] = [ + np.vstack(keypoint_vector), np.vstack(score_vector) + ] if len(keypoint_vector) > 0 else [[], []] + keypoint_res['bbox'] = rect_vector + return keypoint_res def topdown_unite_predict(detector, @@ -76,42 +109,17 @@ def topdown_unite_predict(detector, if results['boxes_num'] == 0: continue - rec_images, records, det_rects = topdown_keypoint_detector.get_person_from_rect( - image, results, FLAGS.det_threshold) - keypoint_vector = [] - score_vector = [] - rect_vector = det_rects - batch_loop_cnt = math.ceil(float(len(rec_images)) / keypoint_batch_size) - for i in range(batch_loop_cnt): - start_index = i * keypoint_batch_size - end_index = min((i + 1) * keypoint_batch_size, len(rec_images)) - batch_images = rec_images[start_index:end_index] - batch_records = np.array(records[start_index:end_index]) - if FLAGS.run_benchmark: - keypoint_result = topdown_keypoint_detector.predict( - batch_images, - FLAGS.keypoint_threshold, - warmup=10, - repeats=10) - else: - keypoint_result = topdown_keypoint_detector.predict( - batch_images, FLAGS.keypoint_threshold) - orgkeypoints, scores = affine_backto_orgimages(keypoint_result, - batch_records) - keypoint_vector.append(orgkeypoints) - score_vector.append(scores) + keypoint_res = predict_with_given_det( + image, results, topdown_keypoint_detector, keypoint_batch_size, + FLAGS.det_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark) + if FLAGS.run_benchmark: cm, gm, gu = get_current_memory_mb() topdown_keypoint_detector.cpu_mem += cm topdown_keypoint_detector.gpu_mem += gm topdown_keypoint_detector.gpu_util += gu else: - keypoint_res = {} - keypoint_res['keypoint'] = [ - np.vstack(keypoint_vector), np.vstack(score_vector) - ] - keypoint_res['bbox'] = rect_vector if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) draw_pose( @@ -152,27 +160,11 @@ def topdown_unite_predict_video(detector, frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results = detector.predict([frame2], FLAGS.det_threshold) - rec_images, records, rect_vector = topdown_keypoint_detector.get_person_from_rect( - frame2, results) - keypoint_vector = [] - score_vector = [] - batch_loop_cnt = math.ceil(float(len(rec_images)) / keypoint_batch_size) - for i in range(batch_loop_cnt): - start_index = i * keypoint_batch_size - end_index = min((i + 1) * keypoint_batch_size, len(rec_images)) - batch_images = rec_images[start_index:end_index] - batch_records = np.array(records[start_index:end_index]) - keypoint_result = topdown_keypoint_detector.predict( - batch_images, FLAGS.keypoint_threshold) - orgkeypoints, scores = affine_backto_orgimages(keypoint_result, - batch_records) - keypoint_vector.append(orgkeypoints) - score_vector.append(scores) - keypoint_res = {} - keypoint_res['keypoint'] = [ - np.vstack(keypoint_vector), np.vstack(score_vector) - ] if len(keypoint_vector) > 0 else [[], []] - keypoint_res['bbox'] = rect_vector + + keypoint_res = predict_with_given_det( + frame2, results, topdown_keypoint_detector, keypoint_batch_size, + FLAGS.det_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark) + im = draw_pose( frame, keypoint_res, @@ -202,11 +194,15 @@ def main(): enable_mkldnn=FLAGS.enable_mkldnn) pred_config = PredictConfig_KeyPoint(FLAGS.keypoint_model_dir) + assert KEYPOINT_SUPPORT_MODELS[ + pred_config. + arch] == 'keypoint_topdown', 'Detection-Keypoint unite inference only supports topdown models.' topdown_keypoint_detector = KeyPoint_Detector( pred_config, FLAGS.keypoint_model_dir, device=FLAGS.device, run_mode=FLAGS.run_mode, + batch_size=FLAGS.keypoint_batch_size, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/topdown_unite_utils.py b/deploy/python/det_keypoint_unite_utils.py similarity index 100% rename from deploy/python/topdown_unite_utils.py rename to deploy/python/det_keypoint_unite_utils.py diff --git a/deploy/python/infer.py b/deploy/python/infer.py index e270bbddf6367c29945ef9cfad99154fc36f8705..6d56ee585c2e9b71d086905a9a0e9b0ad606a161 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -50,7 +50,7 @@ SUPPORT_MODELS = { class Detector(object): """ Args: - config (object): config of model, defined by `Config(model_dir)` + pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) @@ -58,8 +58,10 @@ class Detector(object): trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) - threshold (float): threshold to reserve the result for output. + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN """ def __init__(self, @@ -124,7 +126,7 @@ class Detector(object): def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): ''' Args: - image_list (list): ,list of image + image_list (list): list of image threshold (float): threshold of predicted box' score Returns: results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, @@ -189,7 +191,10 @@ class DetectorSOLOv2(Detector): trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt - threshold (float): threshold to reserve the result for output. + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN """ def __init__(self, @@ -284,6 +289,14 @@ def create_inputs(imgs, im_info): im_shape = [] scale_factor = [] + if len(imgs) == 1: + inputs['image'] = np.array((imgs[0], )).astype('float32') + inputs['im_shape'] = np.array( + (im_info[0]['im_shape'], )).astype('float32') + inputs['scale_factor'] = np.array( + (im_info[0]['scale_factor'], )).astype('float32') + return inputs + for e in im_info: im_shape.append(np.array((e['im_shape'], )).astype('float32')) scale_factor.append(np.array((e['scale_factor'], )).astype('float32')) diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py index b3a9c9a377f0e88e9071495417db20b4bc066bcb..6d325cc6f0a9b000fef25163f7cb72d923ec6cc5 100644 --- a/deploy/python/keypoint_infer.py +++ b/deploy/python/keypoint_infer.py @@ -26,12 +26,12 @@ import paddle from preprocess import preprocess, NormalizeImage, Permute from keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop from keypoint_postprocess import HrHRNetPostProcess, HRNetPostProcess -from keypoint_visualize import draw_pose +from visualize import draw_pose from paddle.inference import Config from paddle.inference import create_predictor from utils import argsparser, Timer, get_current_memory_mb from benchmark_utils import PaddleInferBenchmark -from infer import get_test_images, print_arguments +from infer import Detector, get_test_images, print_arguments # Global dictionary KEYPOINT_SUPPORT_MODELS = { @@ -40,7 +40,7 @@ KEYPOINT_SUPPORT_MODELS = { } -class KeyPoint_Detector(object): +class KeyPoint_Detector(Detector): """ Args: config (object): config of model, defined by `Config(model_dir)` @@ -50,8 +50,11 @@ class KeyPoint_Detector(object): trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) - threshold (float): threshold to reserve the result for output. + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + use_dark(bool): whether to use postprocess in DarkPose """ def __init__(self, @@ -59,6 +62,7 @@ class KeyPoint_Detector(object): model_dir, device='CPU', run_mode='fluid', + batch_size=1, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -66,21 +70,18 @@ class KeyPoint_Detector(object): cpu_threads=1, enable_mkldnn=False, use_dark=True): - self.pred_config = pred_config - self.predictor, self.config = load_predictor( - model_dir, - run_mode=run_mode, - min_subgraph_size=self.pred_config.min_subgraph_size, + super(KeyPoint_Detector, self).__init__( + pred_config=pred_config, + model_dir=model_dir, device=device, - use_dynamic_shape=self.pred_config.use_dynamic_shape, + run_mode=run_mode, + batch_size=batch_size, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, trt_calib_mode=trt_calib_mode, cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn) - self.det_times = Timer() - self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 self.use_dark = use_dark def get_person_from_rect(self, image, results, det_threshold=0.5): @@ -91,13 +92,11 @@ class KeyPoint_Detector(object): valid_rects = det_results[mask] rect_images = [] new_rects = [] - #image_buff = [] org_rects = [] for rect in valid_rects: rect_image, new_rect, org_rect = expand_crop(image, rect) if rect_image is None or rect_image.size == 0: continue - #image_buff.append([rect_image, new_rect]) rect_images.append(rect_image) new_rects.append(new_rect) org_rects.append(org_rect) @@ -264,94 +263,6 @@ class PredictConfig_KeyPoint(): print('--------------------------------------------') -def load_predictor(model_dir, - run_mode='fluid', - batch_size=1, - device='CPU', - min_subgraph_size=3, - use_dynamic_shape=False, - trt_min_shape=1, - trt_max_shape=1280, - trt_opt_shape=640, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False): - """set AnalysisConfig, generate AnalysisPredictor - Args: - model_dir (str): root path of __model__ and __params__ - device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) - use_dynamic_shape (bool): use dynamic shape or not - trt_min_shape (int): min shape for dynamic shape in trt - trt_max_shape (int): max shape for dynamic shape in trt - trt_opt_shape (int): opt shape for dynamic shape in trt - trt_calib_mode (bool): If the model is produced by TRT offline quantitative - calibration, trt_calib_mode need to set True - Returns: - predictor (PaddlePredictor): AnalysisPredictor - Raises: - ValueError: predict by TensorRT need device == 'GPU'. - """ - if device != 'GPU' and run_mode != 'fluid': - raise ValueError( - "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" - .format(run_mode, device)) - config = Config( - os.path.join(model_dir, 'model.pdmodel'), - os.path.join(model_dir, 'model.pdiparams')) - if device == 'GPU': - # initial GPU memory(M), device ID - config.enable_use_gpu(200, 0) - # optimize graph and fuse op - config.switch_ir_optim(True) - elif device == 'XPU': - config.enable_xpu(10 * 1024 * 1024) - else: - config.disable_gpu() - config.set_cpu_math_library_num_threads(cpu_threads) - if enable_mkldnn: - try: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - except Exception as e: - print( - "The current environment does not support `mkldnn`, so disable mkldnn." - ) - pass - - precision_map = { - 'trt_int8': Config.Precision.Int8, - 'trt_fp32': Config.Precision.Float32, - 'trt_fp16': Config.Precision.Half - } - if run_mode in precision_map.keys(): - config.enable_tensorrt_engine( - workspace_size=1 << 10, - max_batch_size=batch_size, - min_subgraph_size=min_subgraph_size, - precision_mode=precision_map[run_mode], - use_static=False, - use_calib_mode=trt_calib_mode) - - if use_dynamic_shape: - min_input_shape = {'image': [1, 3, trt_min_shape, trt_min_shape]} - max_input_shape = {'image': [1, 3, trt_max_shape, trt_max_shape]} - opt_input_shape = {'image': [1, 3, trt_opt_shape, trt_opt_shape]} - config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, - opt_input_shape) - print('trt set dynamic shape done!') - - # disable print log when predict - config.disable_glog_info() - # enable shared memory - config.enable_memory_optim() - # disable feed, fetch OP, needed by zero_copy_run - config.switch_use_feed_fetch_ops(False) - predictor = create_predictor(config) - return predictor, config - - def predict_image(detector, image_list): for i, img_file in enumerate(image_list): if FLAGS.run_benchmark: @@ -378,8 +289,7 @@ def predict_video(detector, camera_id): video_name = 'output.mp4' else: capture = cv2.VideoCapture(FLAGS.video_file) - video_name = os.path.splitext(os.path.basename(FLAGS.video_file))[ - 0] + '.mp4' + video_name = os.path.split(FLAGS.video_file)[-1] fps = 30 width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) @@ -395,10 +305,9 @@ def predict_video(detector, camera_id): ret, frame = capture.read() if not ret: break - print('detect frame:%d' % (index)) index += 1 - results = detector.predict(frame, FLAGS.threshold) + results = detector.predict([frame], FLAGS.threshold) im = draw_pose( frame, results, visual_thread=FLAGS.threshold, returnimg=True) writer.write(im) diff --git a/deploy/python/keypoint_postprocess.py b/deploy/python/keypoint_postprocess.py index fe51d1ab261a1433eb14c3d0c75b4557c44343f4..cd9b92895f2b400c28c0b7a7ab25717b7469271f 100644 --- a/deploy/python/keypoint_postprocess.py +++ b/deploy/python/keypoint_postprocess.py @@ -354,3 +354,10 @@ def affine_transform(pt, t): new_pt = np.array([pt[0], pt[1], 1.]).T new_pt = np.dot(t, new_pt) return new_pt[:2] + + +def translate_to_ori_images(keypoint_result, batch_records): + kpts, scores = keypoint_result['keypoint'] + kpts[..., 0] += batch_records[:, 0:1] + kpts[..., 1] += batch_records[:, 1:2] + return kpts, scores diff --git a/deploy/python/keypoint_visualize.py b/deploy/python/keypoint_visualize.py deleted file mode 100644 index 828207621d694037c83e01a520b7a640ab7da336..0000000000000000000000000000000000000000 --- a/deploy/python/keypoint_visualize.py +++ /dev/null @@ -1,113 +0,0 @@ -# coding: utf-8 -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cv2 -import os -import numpy as np -import math - - -def draw_pose(imgfile, - results, - visual_thread=0.6, - save_name='pose.jpg', - save_dir='output', - returnimg=False): - try: - import matplotlib.pyplot as plt - import matplotlib - plt.switch_backend('agg') - except Exception as e: - logger.error('Matplotlib not found, please install matplotlib.' - 'for example: `pip install matplotlib`.') - raise e - - skeletons, scores = results['keypoint'] - kpt_nums = len(skeletons[0]) - if kpt_nums == 17: #plot coco keypoint - EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 8), - (7, 9), (8, 10), (5, 11), (6, 12), (11, 13), (12, 14), - (13, 15), (14, 16), (11, 12)] - else: #plot mpii keypoint - EDGES = [(0, 1), (1, 2), (3, 4), (4, 5), (2, 6), (3, 6), (6, 7), (7, 8), - (8, 9), (10, 11), (11, 12), (13, 14), (14, 15), (8, 12), - (8, 13)] - NUM_EDGES = len(EDGES) - - colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ - [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ - [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] - cmap = matplotlib.cm.get_cmap('hsv') - plt.figure() - - img = cv2.imread(imgfile) if type(imgfile) == str else imgfile - - color_set = results['colors'] if 'colors' in results else None - - if 'bbox' in results: - bboxs = results['bbox'] - for j, rect in enumerate(bboxs): - xmin, ymin, xmax, ymax = rect - color = colors[0] if color_set is None else colors[color_set[j] % - len(colors)] - cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 1) - - canvas = img.copy() - for i in range(kpt_nums): - for j in range(len(skeletons)): - if skeletons[j][i, 2] < visual_thread: - continue - color = colors[i] if color_set is None else colors[color_set[j] % - len(colors)] - cv2.circle( - canvas, - tuple(skeletons[j][i, 0:2].astype('int32')), - 2, - color, - thickness=-1) - - to_plot = cv2.addWeighted(img, 0.3, canvas, 0.7, 0) - fig = matplotlib.pyplot.gcf() - - stickwidth = 2 - - for i in range(NUM_EDGES): - for j in range(len(skeletons)): - edge = EDGES[i] - if skeletons[j][edge[0], 2] < visual_thread or skeletons[j][edge[ - 1], 2] < visual_thread: - continue - - cur_canvas = canvas.copy() - X = [skeletons[j][edge[0], 1], skeletons[j][edge[1], 1]] - Y = [skeletons[j][edge[0], 0], skeletons[j][edge[1], 0]] - mX = np.mean(X) - mY = np.mean(Y) - length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 - angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) - polygon = cv2.ellipse2Poly((int(mY), int(mX)), - (int(length / 2), stickwidth), - int(angle), 0, 360, 1) - color = colors[i] if color_set is None else colors[color_set[j] % - len(colors)] - cv2.fillConvexPoly(cur_canvas, polygon, color) - canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) - if returnimg: - return canvas - save_name = os.path.join( - save_dir, os.path.splitext(os.path.basename(imgfile))[0] + '_vis.jpg') - plt.imsave(save_name, canvas[:, :, ::-1]) - print("keypoint visualize image saved to: " + save_name) - plt.close() diff --git a/deploy/python/mot_infer.py b/deploy/python/mot_jde_infer.py similarity index 65% rename from deploy/python/mot_infer.py rename to deploy/python/mot_jde_infer.py index 288cb9d5c07a5ae9c3cb5ceab0cbfaecc97ec589..97cceb31f883c4c221569744eb44eb3ea24d788a 100644 --- a/deploy/python/mot_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -19,7 +19,7 @@ import cv2 import numpy as np import paddle from benchmark_utils import PaddleInferBenchmark -from preprocess import preprocess, NormalizeImage, Permute, LetterBoxResize +from preprocess import preprocess from tracker import JDETracker from ppdet.modeling.mot import visualization as mot_vis @@ -28,7 +28,7 @@ from ppdet.modeling.mot.utils import Timer as MOTTimer from paddle.inference import Config from paddle.inference import create_predictor from utils import argsparser, Timer, get_current_memory_mb -from infer import get_test_images, print_arguments, PredictConfig +from infer import Detector, get_test_images, print_arguments, PredictConfig # Global dictionary MOT_SUPPORT_MODELS = { @@ -37,13 +37,14 @@ MOT_SUPPORT_MODELS = { } -class MOT_Detector(object): +class JDE_Detector(Detector): """ Args: pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -58,40 +59,28 @@ class MOT_Detector(object): model_dir, device='CPU', run_mode='fluid', + batch_size=1, trt_min_shape=1, trt_max_shape=1088, trt_opt_shape=608, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): - self.pred_config = pred_config - self.predictor, self.config = load_predictor( - model_dir, - run_mode=run_mode, + super(JDE_Detector, self).__init__( + pred_config=pred_config, + model_dir=model_dir, device=device, - min_subgraph_size=self.pred_config.min_subgraph_size, - use_dynamic_shape=self.pred_config.use_dynamic_shape, + run_mode=run_mode, + batch_size=batch_size, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, trt_calib_mode=trt_calib_mode, cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn) - self.det_times = Timer() - self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 - + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" self.tracker = JDETracker() - def preprocess(self, im): - preprocess_ops = [] - for op_info in self.pred_config.preprocess_infos: - new_op_info = op_info.copy() - op_type = new_op_info.pop('type') - preprocess_ops.append(eval(op_type)(**new_op_info)) - im, im_info = preprocess(im, preprocess_ops) - inputs = create_inputs(im, im_info) - return inputs - def postprocess(self, pred_dets, pred_embs, threshold): online_targets = self.tracker.update(pred_dets, pred_embs) online_tlwhs, online_ids = [], [] @@ -108,16 +97,16 @@ class MOT_Detector(object): online_scores.append(tscore) return online_tlwhs, online_scores, online_ids - def predict(self, image, threshold=0.5, warmup=0, repeats=1): + def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): ''' Args: - image (np.ndarray): numpy image data + image_list (list): list of image threshold (float): threshold of predicted box' score Returns: - online_tlwhs, online_ids (np.ndarray) + online_tlwhs, online_scores, online_ids (np.ndarray) ''' self.det_times.preprocess_time_s.start() - inputs = self.preprocess(image) + inputs = self.preprocess(image_list) self.det_times.preprocess_time_s.end() pred_dets, pred_embs = None, None @@ -150,114 +139,6 @@ class MOT_Detector(object): return online_tlwhs, online_scores, online_ids -def create_inputs(im, im_info): - """generate input for different model type - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - inputs (dict): input of model - """ - inputs = {} - inputs['image'] = np.array((im, )).astype('float32') - inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32') - inputs['scale_factor'] = np.array( - (im_info['scale_factor'], )).astype('float32') - return inputs - - -def load_predictor(model_dir, - run_mode='fluid', - batch_size=1, - device='CPU', - min_subgraph_size=3, - use_dynamic_shape=False, - trt_min_shape=1, - trt_max_shape=1088, - trt_opt_shape=608, - trt_calib_mode=False, - cpu_threads=1, - enable_mkldnn=False): - """set AnalysisConfig, generate AnalysisPredictor - Note: only support batch_size=1 now - Args: - model_dir (str): root path of __model__ and __params__ - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) - batch_size (int): size of pre batch in inference - device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - use_dynamic_shape (bool): use dynamic shape or not - trt_min_shape (int): min shape for dynamic shape in trt - trt_max_shape (int): max shape for dynamic shape in trt - trt_opt_shape (int): opt shape for dynamic shape in trt - trt_calib_mode (bool): If the model is produced by TRT offline quantitative - calibration, trt_calib_mode need to set True - cpu_threads (int): cpu threads - enable_mkldnn (bool): whether to open MKLDNN - Returns: - predictor (PaddlePredictor): AnalysisPredictor - Raises: - ValueError: predict by TensorRT need use_gpu == True. - """ - if device != 'GPU' and run_mode != 'fluid': - raise ValueError( - "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" - .format(run_mode, device)) - config = Config( - os.path.join(model_dir, 'model.pdmodel'), - os.path.join(model_dir, 'model.pdiparams')) - if device == 'GPU': - # initial GPU memory(M), device ID - config.enable_use_gpu(200, 0) - # optimize graph and fuse op - config.switch_ir_optim(True) - elif device == 'XPU': - config.enable_xpu(10 * 1024 * 1024) - else: - config.disable_gpu() - config.set_cpu_math_library_num_threads(cpu_threads) - if enable_mkldnn: - try: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - except Exception as e: - print( - "The current environment does not support `mkldnn`, so disable mkldnn." - ) - pass - - precision_map = { - 'trt_int8': Config.Precision.Int8, - 'trt_fp32': Config.Precision.Float32, - 'trt_fp16': Config.Precision.Half - } - if run_mode in precision_map.keys(): - config.enable_tensorrt_engine( - workspace_size=1 << 10, - max_batch_size=batch_size, - min_subgraph_size=min_subgraph_size, - precision_mode=precision_map[run_mode], - use_static=False, - use_calib_mode=trt_calib_mode) - - if use_dynamic_shape: - min_input_shape = {'image': [1, 3, trt_min_shape, trt_min_shape]} - max_input_shape = {'image': [1, 3, trt_max_shape, trt_max_shape]} - opt_input_shape = {'image': [1, 3, trt_opt_shape, trt_opt_shape]} - config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, - opt_input_shape) - print('trt set dynamic shape done!') - - # disable print log when predict - config.disable_glog_info() - # enable shared memory - config.enable_memory_optim() - # disable feed, fetch OP, needed by zero_copy_run - config.switch_use_feed_fetch_ops(False) - predictor = create_predictor(config) - return predictor, config - - def write_mot_results(filename, results, data_type='mot'): if data_type in ['mot', 'mcmot', 'lab']: save_format = '{frame},{id},{x1},{y1},{w},{h},{score},-1,-1,-1\n' @@ -293,7 +174,7 @@ def predict_image(detector, image_list): for i, img_file in enumerate(image_list): frame = cv2.imread(img_file) if FLAGS.run_benchmark: - detector.predict(frame, FLAGS.threshold, warmup=10, repeats=10) + detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm @@ -301,15 +182,16 @@ def predict_image(detector, image_list): print('Test iter {}, file name:{}'.format(i, img_file)) else: online_tlwhs, online_scores, online_ids = detector.predict( - frame, FLAGS.threshold) - + [frame], FLAGS.threshold) online_im = mot_vis.plot_tracking( frame, online_tlwhs, online_ids, online_scores, frame_id=i) - if FLAGS.save_images: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) - cv2.imwrite(os.path.join(FLAGS.output_dir, img_file), online_im) + img_name = os.path.split(img_file)[-1] + out_path = os.path.join(FLAGS.output_dir, img_name) + cv2.imwrite(out_path, online_im) + print("save result to: " + out_path) def predict_video(detector, camera_id): @@ -340,7 +222,7 @@ def predict_video(detector, camera_id): break timer.tic() online_tlwhs, online_scores, online_ids = detector.predict( - frame, FLAGS.threshold) + [frame], FLAGS.threshold) timer.toc() results.append((frame_id + 1, online_tlwhs, online_scores, online_ids)) @@ -376,7 +258,7 @@ def predict_video(detector, camera_id): def main(): pred_config = PredictConfig(FLAGS.model_dir) - detector = MOT_Detector( + detector = JDE_Detector( pred_config, FLAGS.model_dir, device=FLAGS.device, diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py index 58411df4a5e7ce131a6d9c481ea2b4b317251a86..1ef333aca9313d3c47d97e85798d74e845d173fa 100644 --- a/deploy/python/mot_keypoint_unite_infer.py +++ b/deploy/python/mot_keypoint_unite_infer.py @@ -17,61 +17,108 @@ import cv2 import math import numpy as np import paddle +import copy from mot_keypoint_unite_utils import argsparser from keypoint_infer import KeyPoint_Detector, PredictConfig_KeyPoint -from keypoint_det_unite_infer import bench_log -from keypoint_visualize import draw_pose +from visualize import draw_pose from benchmark_utils import PaddleInferBenchmark from utils import Timer from tracker import JDETracker -from preprocess import LetterBoxResize -from mot_infer import MOT_Detector, write_mot_results +from mot_jde_infer import JDE_Detector, write_mot_results from infer import Detector, PredictConfig, print_arguments, get_test_images from ppdet.modeling.mot import visualization as mot_vis from ppdet.modeling.mot.utils import Timer as FPSTimer from utils import get_current_memory_mb +from det_keypoint_unite_infer import predict_with_given_det, bench_log +# Global dictionary +KEYPOINT_SUPPORT_MODELS = { + 'HigherHRNet': 'keypoint_bottomup', + 'HRNet': 'keypoint_topdown' +} -def mot_keypoint_unite_predict_image(mot_model, keypoint_model, image_list): + +def convert_mot_to_det(tlwhs, scores): + results = {} + num_mot = len(tlwhs) + xyxys = copy.deepcopy(tlwhs) + for xyxy in xyxys.copy(): + xyxy[2:] = xyxy[2:] + xyxy[:2] + # support single class now + results['boxes'] = np.vstack( + [np.hstack([0, scores[i], xyxys[i]]) for i in range(num_mot)]) + return results + + +def mot_keypoint_unite_predict_image(mot_model, + keypoint_model, + image_list, + keypoint_batch_size=1): for i, img_file in enumerate(image_list): frame = cv2.imread(img_file) if FLAGS.run_benchmark: - mot_model.predict(frame, FLAGS.mot_threshold, warmup=10, repeats=10) + online_tlwhs, online_scores, online_ids = mot_model.predict( + [frame], FLAGS.mot_threshold, warmup=10, repeats=10) cm, gm, gu = get_current_memory_mb() mot_model.cpu_mem += cm mot_model.gpu_mem += gm mot_model.gpu_util += gu - keypoint_model.predict( - [frame], FLAGS.keypoint_threshold, warmup=10, repeats=10) + else: + online_tlwhs, online_scores, online_ids = mot_model.predict( + [frame], FLAGS.mot_threshold) + + keypoint_arch = keypoint_model.pred_config.arch + if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown': + results = convert_mot_to_det(online_tlwhs, online_scores) + keypoint_results = predict_with_given_det( + frame, results, keypoint_model, keypoint_batch_size, + FLAGS.mot_threshold, FLAGS.keypoint_threshold, + FLAGS.run_benchmark) + + else: + warmup = 10 if FLAGS.run_benchmark else 0 + repeats = 10 if FLAGS.run_benchmark else 1 + keypoint_results = keypoint_model.predict( + [frame], + FLAGS.keypoint_threshold, + warmup=warmup, + repeats=repeats) + + if FLAGS.run_benchmark: cm, gm, gu = get_current_memory_mb() keypoint_model.cpu_mem += cm keypoint_model.gpu_mem += gm keypoint_model.gpu_util += gu else: - online_tlwhs, online_scores, online_ids = mot_model.predict( - frame, FLAGS.mot_threshold) - keypoint_results = keypoint_model.predict([frame], - FLAGS.keypoint_threshold) - im = draw_pose( frame, keypoint_results, visual_thread=FLAGS.keypoint_threshold, - returnimg=True) + returnimg=True, + ids=online_ids + if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' + else None) online_im = mot_vis.plot_tracking( im, online_tlwhs, online_ids, online_scores, frame_id=i) + if FLAGS.save_images: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) - cv2.imwrite(os.path.join(FLAGS.output_dir, img_file), online_im) + img_name = os.path.split(img_file)[-1] + out_path = os.path.join(FLAGS.output_dir, img_name) + cv2.imwrite(out_path, online_im) + print("save result to: " + out_path) -def mot_keypoint_unite_predict_video(mot_model, keypoint_model, camera_id): +def mot_keypoint_unite_predict_video(mot_model, + keypoint_model, + camera_id, + keypoint_batch_size=1): if camera_id != -1: capture = cv2.VideoCapture(camera_id) video_name = 'output.mp4' @@ -102,16 +149,25 @@ def mot_keypoint_unite_predict_video(mot_model, keypoint_model, camera_id): timer_mot_kp.tic() timer_mot.tic() online_tlwhs, online_scores, online_ids = mot_model.predict( - frame, FLAGS.mot_threshold) + [frame], FLAGS.mot_threshold) timer_mot.toc() - mot_results.append( (frame_id + 1, online_tlwhs, online_scores, online_ids)) mot_fps = 1. / timer_mot.average_time timer_kp.tic() - keypoint_results = keypoint_model.predict([frame], - FLAGS.keypoint_threshold) + + keypoint_arch = keypoint_model.pred_config.arch + if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown': + results = convert_mot_to_det(online_tlwhs, online_scores) + keypoint_results = predict_with_given_det( + frame, results, keypoint_model, keypoint_batch_size, + FLAGS.mot_threshold, FLAGS.keypoint_threshold, + FLAGS.run_benchmark) + + else: + keypoint_results = keypoint_model.predict([frame], + FLAGS.keypoint_threshold) timer_kp.toc() timer_mot_kp.toc() kp_fps = 1. / timer_kp.average_time @@ -121,7 +177,8 @@ def mot_keypoint_unite_predict_video(mot_model, keypoint_model, camera_id): frame, keypoint_results, visual_thread=FLAGS.keypoint_threshold, - returnimg=True) + returnimg=True, + ids=online_ids) online_im = mot_vis.plot_tracking( im, @@ -157,7 +214,7 @@ def mot_keypoint_unite_predict_video(mot_model, keypoint_model, camera_id): def main(): pred_config = PredictConfig(FLAGS.mot_model_dir) - mot_model = MOT_Detector( + mot_model = JDE_Detector( pred_config, FLAGS.mot_model_dir, device=FLAGS.device, @@ -175,6 +232,7 @@ def main(): FLAGS.keypoint_model_dir, device=FLAGS.device, run_mode=FLAGS.run_mode, + batch_size=FLAGS.keypoint_batch_size, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, @@ -186,11 +244,13 @@ def main(): # predict from video file or camera video stream if FLAGS.video_file is not None or FLAGS.camera_id != -1: mot_keypoint_unite_predict_video(mot_model, keypoint_model, - FLAGS.camera_id) + FLAGS.camera_id, + FLAGS.keypoint_batch_size) else: # predict from image img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - mot_keypoint_unite_predict_image(mot_model, keypoint_model, img_list) + mot_keypoint_unite_predict_image(mot_model, keypoint_model, img_list, + FLAGS.keypoint_batch_size) if not FLAGS.run_benchmark: mot_model.det_times.info(average=True) diff --git a/deploy/python/mot_reid_infer.py b/deploy/python/mot_sde_infer.py similarity index 89% rename from deploy/python/mot_reid_infer.py rename to deploy/python/mot_sde_infer.py index 176acb0c5e241cf01fc5e2130547a5be6515160a..6e3a58dc41326951dac339eaa22be9393325c603 100644 --- a/deploy/python/mot_reid_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -19,7 +19,7 @@ import cv2 import numpy as np import paddle from benchmark_utils import PaddleInferBenchmark -from preprocess import preprocess, NormalizeImage, Permute, LetterBoxResize +from preprocess import preprocess from tracker import DeepSORTTracker from ppdet.modeling.mot import visualization as mot_vis from ppdet.modeling.mot.utils import Timer as MOTTimer @@ -29,7 +29,8 @@ from paddle.inference import Config from paddle.inference import create_predictor from utils import argsparser, Timer, get_current_memory_mb from infer import get_test_images, print_arguments, PredictConfig, Detector -from mot_infer import create_inputs, load_predictor, write_mot_results +from mot_jde_infer import write_mot_results +from infer import load_predictor # Global dictionary MOT_SUPPORT_MODELS = {'DeepSORT'} @@ -73,23 +74,6 @@ def clip_box(xyxy, input_shape, im_shape, scale_factor): return xyxy -def get_crops(xyxy, ori_img, pred_scores, w, h): - crops = [] - keep_scores = [] - xyxy = xyxy.astype(np.int64) - ori_img = ori_img.transpose(1, 0, 2) # [h,w,3]->[w,h,3] - for i, bbox in enumerate(xyxy): - if bbox[2] <= bbox[0] or bbox[3] <= bbox[1]: - continue - crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] - crops.append(crop) - keep_scores.append(pred_scores[i]) - if len(crops) == 0: - return [], [] - crops = preprocess_reid(crops, w, h) - return crops, keep_scores - - def preprocess_reid(imgs, w=64, h=192, @@ -109,7 +93,7 @@ def preprocess_reid(imgs, return im_batch -class MOT_Detector(object): +class SDE_Detector(Detector): """ Args: pred_config (object): config of model, defined by `Config(model_dir)` @@ -130,37 +114,26 @@ class MOT_Detector(object): model_dir, device='CPU', run_mode='fluid', + batch_size=1, trt_min_shape=1, trt_max_shape=1088, trt_opt_shape=608, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): - self.pred_config = pred_config - self.predictor, self.config = load_predictor( - model_dir, - run_mode=run_mode, + super(SDE_Detector, self).__init__( + pred_config=pred_config, + model_dir=model_dir, device=device, - min_subgraph_size=self.pred_config.min_subgraph_size, - use_dynamic_shape=self.pred_config.use_dynamic_shape, + run_mode=run_mode, + batch_size=batch_size, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, trt_calib_mode=trt_calib_mode, cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn) - self.det_times = Timer() - self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 - - def preprocess(self, im): - preprocess_ops = [] - for op_info in self.pred_config.preprocess_infos: - new_op_info = op_info.copy() - op_type = new_op_info.pop('type') - preprocess_ops.append(eval(op_type)(**new_op_info)) - im, im_info = preprocess(im, preprocess_ops) - inputs = create_inputs(im, im_info) - return inputs + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" def postprocess(self, boxes, input_shape, im_shape, scale_factor, threshold): @@ -214,7 +187,7 @@ class MOT_Detector(object): return pred_bboxes, pred_scores -class MOT_ReID(object): +class SDE_ReID(object): def __init__(self, pred_config, model_dir, @@ -300,6 +273,24 @@ class MOT_ReID(object): self.det_times.img_num += 1 return online_tlwhs, online_scores, online_ids + def get_crops(self, xyxy, ori_img, pred_scores, w, h): + self.det_times.preprocess_time_s.start() + crops = [] + keep_scores = [] + xyxy = xyxy.astype(np.int64) + ori_img = ori_img.transpose(1, 0, 2) # [h,w,3]->[w,h,3] + for i, bbox in enumerate(xyxy): + if bbox[2] <= bbox[0] or bbox[3] <= bbox[1]: + continue + crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] + crops.append(crop) + keep_scores.append(pred_scores[i]) + if len(crops) == 0: + return [], [] + crops = preprocess_reid(crops, w, h) + self.det_times.preprocess_time_s.end() + return crops, keep_scores + def predict_image(detector, reid_model, image_list): results = [] @@ -307,21 +298,22 @@ def predict_image(detector, reid_model, image_list): frame = cv2.imread(img_file) if FLAGS.run_benchmark: pred_bboxes, pred_scores = detector.predict( - frame, FLAGS.threshold, warmup=10, repeats=10) + [frame], FLAGS.threshold, warmup=10, repeats=10) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm detector.gpu_util += gu print('Test iter {}, file name:{}'.format(i, img_file)) else: - pred_bboxes, pred_scores = detector.predict(frame, FLAGS.threshold) + pred_bboxes, pred_scores = detector.predict([frame], + FLAGS.threshold) # process bbox_tlwh = np.concatenate( (pred_bboxes[:, 0:2], pred_bboxes[:, 2:4] - pred_bboxes[:, 0:2] + 1), axis=1) - crops, pred_scores = get_crops( + crops, pred_scores = reid_model.get_crops( pred_bboxes, frame, pred_scores, w=64, h=192) if FLAGS.run_benchmark: @@ -330,14 +322,16 @@ def predict_image(detector, reid_model, image_list): else: online_tlwhs, online_scores, online_ids = reid_model.predict( crops, bbox_tlwh, pred_scores) - online_im = mot_vis.plot_tracking( frame, online_tlwhs, online_ids, online_scores, frame_id=i) if FLAGS.save_images: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) - cv2.imwrite(os.path.join(FLAGS.output_dir, img_file), online_im) + img_name = os.path.split(img_file)[-1] + out_path = os.path.join(FLAGS.output_dir, img_name) + cv2.imwrite(out_path, online_im) + print("save result to: " + out_path) def predict_video(detector, reid_model, camera_id): @@ -367,14 +361,13 @@ def predict_video(detector, reid_model, camera_id): if not ret: break timer.tic() - pred_bboxes, pred_scores = detector.predict(frame, FLAGS.threshold) + pred_bboxes, pred_scores = detector.predict([frame], FLAGS.threshold) timer.toc() - bbox_tlwh = np.concatenate( (pred_bboxes[:, 0:2], pred_bboxes[:, 2:4] - pred_bboxes[:, 0:2] + 1), axis=1) - crops, pred_scores = get_crops( + crops, pred_scores = reid_model.get_crops( pred_bboxes, frame, pred_scores, w=64, h=192) online_tlwhs, online_scores, online_ids = reid_model.predict( @@ -413,7 +406,7 @@ def predict_video(detector, reid_model, camera_id): def main(): pred_config = PredictConfig(FLAGS.model_dir) - detector = MOT_Detector( + detector = SDE_Detector( pred_config, FLAGS.model_dir, device=FLAGS.device, @@ -426,7 +419,7 @@ def main(): enable_mkldnn=FLAGS.enable_mkldnn) pred_config = PredictConfig(FLAGS.reid_model_dir) - reid_model = MOT_ReID( + reid_model = SDE_ReID( pred_config, FLAGS.reid_model_dir, device=FLAGS.device, diff --git a/deploy/python/preprocess.py b/deploy/python/preprocess.py index d4fdd5fe7557217264498ca09d559c5b8cdd4404..e6c0b8c1fdccde33183096c051a38ab68268343a 100644 --- a/deploy/python/preprocess.py +++ b/deploy/python/preprocess.py @@ -135,7 +135,6 @@ class NormalizeImage(object): if self.is_scale: im = im / 255.0 - im -= mean im /= std return im, im_info diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py index 016b660a6fc3ffa97179c5f24b5d05c88ab54128..ca9a12d9ee0c64e1fd551cc6348cfbd1dc38125d 100644 --- a/deploy/python/visualize.py +++ b/deploy/python/visualize.py @@ -15,10 +15,12 @@ from __future__ import division +import os import cv2 import numpy as np from PIL import Image, ImageDraw from scipy import ndimage +import math def visualize_box_mask(im, results, labels, threshold=0.5): @@ -214,3 +216,113 @@ def draw_segm(im, 1, lineType=cv2.LINE_AA) return Image.fromarray(im.astype('uint8')) + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + return color + + +def draw_pose(imgfile, + results, + visual_thread=0.6, + save_name='pose.jpg', + save_dir='output', + returnimg=False, + ids=None): + try: + import matplotlib.pyplot as plt + import matplotlib + plt.switch_backend('agg') + except Exception as e: + logger.error('Matplotlib not found, please install matplotlib.' + 'for example: `pip install matplotlib`.') + raise e + + skeletons, scores = results['keypoint'] + kpt_nums = skeletons.shape[1] + if kpt_nums == 17: #plot coco keypoint + EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 8), + (7, 9), (8, 10), (5, 11), (6, 12), (11, 13), (12, 14), + (13, 15), (14, 16), (11, 12)] + else: #plot mpii keypoint + EDGES = [(0, 1), (1, 2), (3, 4), (4, 5), (2, 6), (3, 6), (6, 7), (7, 8), + (8, 9), (10, 11), (11, 12), (13, 14), (14, 15), (8, 12), + (8, 13)] + NUM_EDGES = len(EDGES) + + colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ + [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ + [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] + cmap = matplotlib.cm.get_cmap('hsv') + plt.figure() + + img = cv2.imread(imgfile) if type(imgfile) == str else imgfile + + color_set = results['colors'] if 'colors' in results else None + + if 'bbox' in results and ids is None: + bboxs = results['bbox'] + for j, rect in enumerate(bboxs): + xmin, ymin, xmax, ymax = rect + color = colors[0] if color_set is None else colors[color_set[j] % + len(colors)] + cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 1) + + canvas = img.copy() + for i in range(kpt_nums): + for j in range(len(skeletons)): + if skeletons[j][i, 2] < visual_thread: + continue + if ids is None: + color = colors[i] if color_set is None else colors[color_set[j] + % + len(colors)] + else: + color = get_color(ids[j]) + + cv2.circle( + canvas, + tuple(skeletons[j][i, 0:2].astype('int32')), + 2, + color, + thickness=-1) + + to_plot = cv2.addWeighted(img, 0.3, canvas, 0.7, 0) + fig = matplotlib.pyplot.gcf() + + stickwidth = 2 + + for i in range(NUM_EDGES): + for j in range(len(skeletons)): + edge = EDGES[i] + if skeletons[j][edge[0], 2] < visual_thread or skeletons[j][edge[ + 1], 2] < visual_thread: + continue + + cur_canvas = canvas.copy() + X = [skeletons[j][edge[0], 1], skeletons[j][edge[1], 1]] + Y = [skeletons[j][edge[0], 0], skeletons[j][edge[1], 0]] + mX = np.mean(X) + mY = np.mean(Y) + length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), + (int(length / 2), stickwidth), + int(angle), 0, 360, 1) + if ids is None: + color = colors[i] if color_set is None else colors[color_set[j] + % + len(colors)] + else: + color = get_color(ids[j]) + cv2.fillConvexPoly(cur_canvas, polygon, color) + canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) + if returnimg: + return canvas + save_name = os.path.join( + save_dir, os.path.splitext(os.path.basename(imgfile))[0] + '_vis.jpg') + plt.imsave(save_name, canvas[:, :, ::-1]) + print("keypoint visualize image saved to: " + save_name) + plt.close() diff --git a/ppdet/utils/visualizer.py b/ppdet/utils/visualizer.py index e264c478c4454a48ad77b1d4b40727d843f33900..fc1c53164256abbbabc4a636a9b9f672f0cbbaaf 100644 --- a/ppdet/utils/visualizer.py +++ b/ppdet/utils/visualizer.py @@ -243,7 +243,6 @@ def draw_pose(image, results, visual_thread=0.6, save_name='pose.jpg'): [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] cmap = matplotlib.cm.get_cmap('hsv') plt.figure() - skeletons = np.array([item['keypoints'] for item in results]).reshape(-1, 17, 3) img = np.array(image).astype('float32')