未验证 提交 186ecc7c 编写于 作者: L LokeZhou 提交者: GitHub

add PP-TInypose PP-Vehicle PP-HuamnV2 (#5573)

* add PP-TInyPose

* add PP-TInyPose info.yaml app.yaml

* * add PP-HumanV2

* add PP-Vehicle

* PP-TInyPose PP-Vehicle PP-HumanV2 add English version

* fixed info.yaml

* Update info.yaml

* fix PP-TInyPose/info.yaml
Co-authored-by: NliuTINA0907 <65896652+liuTINA0907@users.noreply.github.com>
上级 3aaa5593
import gradio as gr
import base64
from io import BytesIO
from PIL import Image
import numpy as np
from pipeline.pipeline import pp_humanv2
# UGC: Define the inference fn() for your models
def model_inference(input_date, avtivity_list):
result = pp_humanv2(input_date, avtivity_list)
return result
def clear_all():
return None, None, None
with gr.Blocks() as demo:
gr.Markdown("PP-Human Pipeline")
with gr.Tabs():
with gr.TabItem("image"):
img_in = gr.Image(label="Input")
img_out = gr.Image(label="Output")
img_avtivity_list = gr.CheckboxGroup(["ATTR"])
img_button1 = gr.Button("Submit")
img_button2 = gr.Button("Clear")
with gr.TabItem("video"):
video_in = gr.Video(label="Input")
video_out = gr.Video(label="Output")
video_avtivity_list = gr.CheckboxGroup(["MOT","ATTR","VIDEO_ACTION","SKELETON_ACTION","ID_BASED_DETACTION","ID_BASED_CLSACTION","REID",\
"do_entrance_counting","draw_center_traj"])
video_button1 = gr.Button("Submit")
video_button2 = gr.Button("Clear")
img_button1.click(
fn=model_inference,
inputs=[img_in, img_avtivity_list],
outputs=img_out)
img_button2.click(
fn=clear_all,
inputs=None,
outputs=[img_in, img_out, img_avtivity_list])
video_button1.click(
fn=model_inference,
inputs=[video_in, video_avtivity_list],
outputs=video_out)
video_button2.click(
fn=clear_all,
inputs=None,
outputs=[video_in, video_out, video_avtivity_list])
demo.launch()
【PP-HumanV2-App-YAML】
APP_Info:
title: PP-HumanV2-App
colorFrom: blue
colorTo: yellow
sdk: gradio
sdk_version: 3.9
app_file: app.py
license: apache-2.0
device: cpu
\ No newline at end of file
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ast
import yaml
import copy
import argparse
from argparse import ArgumentParser, RawDescriptionHelpFormatter
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
formatter_class=RawDescriptionHelpFormatter)
self.add_argument(
"-o", "--opt", nargs='*', help="set configuration options")
def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv)
assert args.config is not None, \
"Please specify --config=configure_file_path."
args.opt = self._parse_opt(args.opt)
return args
def _parse_opt(self, opts):
config = {}
if not opts:
return config
for s in opts:
s = s.strip()
k, v = s.split('=', 1)
if '.' not in k:
config[k] = yaml.load(v, Loader=yaml.Loader)
else:
keys = k.split('.')
if keys[0] not in config:
config[keys[0]] = {}
cur = config[keys[0]]
for idx, key in enumerate(keys[1:]):
if idx == len(keys) - 2:
cur[key] = yaml.load(v, Loader=yaml.Loader)
else:
cur[key] = {}
cur = cur[key]
return config
def argsparser():
parser = ArgsParser()
parser.add_argument(
"--config",
type=str,
default='pipeline/config/infer_cfg_pphuman.yml',
help=("Path of configure"))
parser.add_argument(
"--image_file", type=str, default=None, help="Path of image file.")
parser.add_argument(
"--image_dir",
type=str,
default=None,
help="Dir of image file, `image_file` has a higher priority.")
parser.add_argument(
"--video_file",
type=str,
default=None,
help="Path of video file, `video_file` or `camera_id` has a highest priority."
)
parser.add_argument(
"--video_dir",
type=str,
default=None,
help="Dir of video file, `video_file` has a higher priority.")
parser.add_argument(
"--rtsp",
type=str,
nargs='+',
default=None,
help="list of rtsp inputs, for one or multiple rtsp input.")
parser.add_argument(
"--camera_id",
type=int,
default=-1,
help="device id of camera to predict.")
parser.add_argument(
"--output_dir",
type=str,
default="output",
help="Directory of output visualization files.")
parser.add_argument(
"--pushurl",
type=str,
default="",
help="url of output visualization stream.")
parser.add_argument(
"--run_mode",
type=str,
default='paddle',
help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument(
"--enable_mkldnn",
type=ast.literal_eval,
default=False,
help="Whether use mkldnn with CPU.")
parser.add_argument(
"--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument(
"--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument(
"--trt_max_shape",
type=int,
default=1280,
help="max_shape for TensorRT.")
parser.add_argument(
"--trt_opt_shape",
type=int,
default=640,
help="opt_shape for TensorRT.")
parser.add_argument(
"--trt_calib_mode",
type=bool,
default=False,
help="If the model is produced by TRT offline quantitative "
"calibration, trt_calib_mode need to set True.")
parser.add_argument(
"--do_entrance_counting",
type=bool,
default=False,
help="Whether counting the numbers of identifiers entering "
"or getting out from the entrance. Note that only support single-class MOT."
)
parser.add_argument(
"--do_break_in_counting",
type=bool,
default=False,
help="Whether counting the numbers of identifiers break in "
"the area. Note that only support single-class MOT and "
"the video should be taken by a static camera.")
parser.add_argument(
"--illegal_parking_time",
type=int,
default=-1,
help="illegal parking time which units are seconds, default is -1 which means not recognition illegal parking"
)
parser.add_argument(
"--region_type",
type=str,
default='horizontal',
help="Area type for entrance counting or break in counting, 'horizontal' and "
"'vertical' used when do entrance counting. 'custom' used when do break in counting. "
"Note that only support single-class MOT, and the video should be taken by a static camera."
)
parser.add_argument(
'--region_polygon',
nargs='+',
type=int,
default=[],
help="Clockwise point coords (x0,y0,x1,y1...) of polygon of area when "
"do_break_in_counting. Note that only support single-class MOT and "
"the video should be taken by a static camera.")
parser.add_argument(
"--secs_interval",
type=int,
default=2,
help="The seconds interval to count after tracking")
parser.add_argument(
"--draw_center_traj",
type=bool,
default=False,
help="Whether drawing the trajectory of center")
parser.add_argument('--avtivity_list', nargs='+', type=str)
return parser
def merge_cfg(args):
# load config
with open(args.config) as f:
pred_config = yaml.safe_load(f)
def merge(cfg, arg):
# update cfg from arg directly
merge_cfg = copy.deepcopy(cfg)
for k, v in cfg.items():
if k in arg:
merge_cfg[k] = arg[k]
else:
if isinstance(v, dict):
merge_cfg[k] = merge(v, arg)
return merge_cfg
def merge_opt(cfg, arg):
merge_cfg = copy.deepcopy(cfg)
# merge opt
if 'opt' in arg.keys() and arg['opt']:
for name, value in arg['opt'].items(
): # example: {'MOT': {'batch_size': 3}}
if name not in merge_cfg.keys():
print("No", name, "in config file!")
continue
for sub_k, sub_v in value.items():
if sub_k not in merge_cfg[name].keys():
print("No", sub_k, "in config file of", name, "!")
continue
merge_cfg[name][sub_k] = sub_v
return merge_cfg
args_dict = vars(args)
pred_config = merge(pred_config, args_dict)
pred_config = merge_opt(pred_config, args_dict)
return pred_config
def print_arguments(cfg):
print('----------- Running Arguments -----------')
buffer = yaml.dump(cfg)
print(buffer)
print('------------------------------------------')
crop_thresh: 0.5
attr_thresh: 0.5
kpt_thresh: 0.2
visual: True
warmup_frame: 50
DET:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip
batch_size: 1
MOT:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip
tracker_config: pipeline/config/tracker_config.yml
batch_size: 1
skip_frame_num: -1 # preferably no more than 3
enable: False
KPT:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/dark_hrnet_w32_256x192.zip
batch_size: 8
ATTR:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/PPLCNet_x1_0_person_attribute_945_infer.zip
batch_size: 8
enable: False
VIDEO_ACTION:
model_dir: https://videotag.bj.bcebos.com/PaddleVideo-release2.3/ppTSM_fight.zip
batch_size: 1
frame_len: 8
sample_freq: 7
short_size: 340
target_size: 320
enable: False
SKELETON_ACTION:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/STGCN.zip
batch_size: 1
max_frames: 50
display_frames: 80
coord_size: [384, 512]
enable: False
ID_BASED_DETACTION:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/ppyoloe_crn_s_80e_smoking_visdrone.zip
batch_size: 8
threshold: 0.6
display_frames: 80
skip_frame_num: 2
enable: False
ID_BASED_CLSACTION:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/PPHGNet_tiny_calling_halfbody.zip
batch_size: 8
threshold: 0.8
display_frames: 80
skip_frame_num: 2
enable: False
REID:
model_dir: https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip
batch_size: 16
enable: False
# config of tracker for MOT SDE Detector, use 'JDETracker' as default.
# The tracker of MOT JDE Detector (such as FairMOT) is exported together with the model.
# Here 'min_box_area' and 'vertical_ratio' are set for pedestrian, you can modify for other objects tracking.
type: OCSORTTracker # choose one tracker in ['JDETracker', 'OCSORTTracker']
# BYTETracker
JDETracker:
use_byte: True
det_thresh: 0.3
conf_thres: 0.6
low_conf_thres: 0.1
match_thres: 0.9
min_box_area: 0
vertical_ratio: 0 # 1.6 for pedestrian
OCSORTTracker:
det_thresh: 0.4
max_age: 30
min_hits: 3
iou_threshold: 0.3
delta_t: 3
inertia: 0.2
vertical_ratio: 0
min_box_area: 0
use_byte: False
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import copy
from collections import Counter
class Result(object):
def __init__(self):
self.res_dict = {
'det': dict(),
'mot': dict(),
'attr': dict(),
'kpt': dict(),
'video_action': dict(),
'skeleton_action': dict(),
'reid': dict(),
'det_action': dict(),
'cls_action': dict(),
'vehicleplate': dict(),
'vehicle_attr': dict()
}
def update(self, res, name):
self.res_dict[name].update(res)
def get(self, name):
if name in self.res_dict and len(self.res_dict[name]) > 0:
return self.res_dict[name]
return None
def clear(self, name):
self.res_dict[name].clear()
class DataCollector(object):
"""
DataCollector of Pipeline, collect results in every frames and assign it to each track ids.
mainly used in mtmct.
data struct:
collector:
- [id1]: (all results of N frames)
- frames(list of int): Nx[int]
- rects(list of rect): Nx[rect(conf, xmin, ymin, xmax, ymax)]
- features(list of array(256,)): Nx[array(256,)]
- qualities(list of float): Nx[float]
- attrs(list of attr): refer to attrs for details
- kpts(list of kpts): refer to kpts for details
- skeleton_action(list of skeleton_action): refer to skeleton_action for details
...
- [idN]
"""
def __init__(self):
#id, frame, rect, score, label, attrs, kpts, skeleton_action
self.mots = {
"frames": [],
"rects": [],
"attrs": [],
"kpts": [],
"features": [],
"qualities": [],
"skeleton_action": [],
"vehicleplate": []
}
self.collector = {}
def append(self, frameid, Result):
mot_res = Result.get('mot')
attr_res = Result.get('attr')
kpt_res = Result.get('kpt')
skeleton_action_res = Result.get('skeleton_action')
reid_res = Result.get('reid')
vehicleplate_res = Result.get('vehicleplate')
rects = []
if reid_res is not None:
rects = reid_res['rects']
elif mot_res is not None:
rects = mot_res['boxes']
for idx, mot_item in enumerate(rects):
ids = int(mot_item[0])
if ids not in self.collector:
self.collector[ids] = copy.deepcopy(self.mots)
self.collector[ids]["frames"].append(frameid)
self.collector[ids]["rects"].append([mot_item[2:]])
if attr_res:
self.collector[ids]["attrs"].append(attr_res['output'][idx])
if kpt_res:
self.collector[ids]["kpts"].append([
kpt_res['keypoint'][0][idx], kpt_res['keypoint'][1][idx]
])
if skeleton_action_res and (idx + 1) in skeleton_action_res:
self.collector[ids]["skeleton_action"].append(
skeleton_action_res[idx + 1])
else:
# action model generate result per X frames, Not available every frames
self.collector[ids]["skeleton_action"].append(None)
if reid_res:
self.collector[ids]["features"].append(reid_res['features'][
idx])
self.collector[ids]["qualities"].append(reid_res['qualities'][
idx])
if vehicleplate_res and vehicleplate_res['plate'][idx] != "":
self.collector[ids]["vehicleplate"].append(vehicleplate_res[
'plate'][idx])
def get_res(self):
return self.collector
def get_carlp(self, trackid):
lps = self.collector[trackid]["vehicleplate"]
counter = Counter(lps)
carlp = counter.most_common()
if len(carlp) > 0:
return carlp[0][0]
else:
return None
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import os.path as osp
import hashlib
import requests
import shutil
import tqdm
import time
import tarfile
import zipfile
from paddle.utils.download import _get_unique_endpoints
PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX = 'https://paddledet.bj.bcebos.com/'
DOWNLOAD_RETRY_LIMIT = 3
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/infer_weights")
MODEL_URL_MD5_DICT = {
'https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz':
'1b8eae0f098635699bd4e8bccf3067a7',
'https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz':
'64fa0e0701efd93c7db52a9b685b3de6',
"https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip":
"3859d1a26e0c498285c2374b1a347013",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip":
"4ed58b546be2a76d8ccbb138f64874ac",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/dark_hrnet_w32_256x192.zip":
"a20d5f6ca087bff0e9f2b18df45a36f2",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/PPLCNet_x1_0_person_attribute_945_infer.zip":
"1dfb161bf12bbc1365b2ed6866674483",
"https://videotag.bj.bcebos.com/PaddleVideo-release2.3/ppTSM_fight.zip":
"5d4609142501258608bf0a1445eedaba",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/STGCN.zip":
"cf1c3c4bae90b975accb954d13129ea4",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/ppyoloe_crn_s_80e_smoking_visdrone.zip":
"4cd12ae55be8f0eb2b90c08ac3b48218",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/PPHGNet_tiny_calling_halfbody.zip":
"cf86b87ace97540dace6ef08e62b584a",
"https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip":
"fdc4dac38393b8e2b5921c1e1fdd5315"
}
def is_url(path):
"""
Whether path is URL.
Args:
path (string): URL string or not.
"""
return path.startswith('http://') \
or path.startswith('https://') \
or path.startswith('ppdet://')
def parse_url(url):
url = url.replace("ppdet://", PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX)
return url
def map_path(url, root_dir, path_depth=1):
# parse path after download to decompress under root_dir
assert path_depth > 0, "path_depth should be a positive integer"
dirname = url
for _ in range(path_depth):
dirname = osp.dirname(dirname)
fpath = osp.relpath(url, dirname)
zip_formats = ['.zip', '.tar', '.gz']
for zip_format in zip_formats:
fpath = fpath.replace(zip_format, '')
return osp.join(root_dir, fpath)
def _md5check(fullname, md5sum=None):
if md5sum is None:
return True
md5 = hashlib.md5()
with open(fullname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
calc_md5sum = md5.hexdigest()
if calc_md5sum != md5sum:
return False
return True
def _check_exist_file_md5(filename, md5sum, url):
return _md5check(filename, md5sum)
def _download(url, path, md5sum=None):
"""
Download from url, save to path.
url (str): download url
path (str): download to given path
"""
if not osp.exists(path):
os.makedirs(path)
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
retry_cnt = 0
while not (osp.exists(fullname) and _check_exist_file_md5(fullname, md5sum,
url)):
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
retry_cnt += 1
else:
raise RuntimeError("Download from {} failed. "
"Retry limit reached".format(url))
# NOTE: windows path join may incur \, which is invalid in url
if sys.platform == "win32":
url = url.replace('\\', '/')
req = requests.get(url, stream=True)
if req.status_code != 200:
raise RuntimeError("Downloading from {} failed with code "
"{}!".format(url, req.status_code))
# For protecting download interupted, download to
# tmp_fullname firstly, move tmp_fullname to fullname
# after download finished
tmp_fullname = fullname + "_tmp"
total_size = req.headers.get('content-length')
with open(tmp_fullname, 'wb') as f:
if total_size:
for chunk in tqdm.tqdm(
req.iter_content(chunk_size=1024),
total=(int(total_size) + 1023) // 1024,
unit='KB'):
f.write(chunk)
else:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
shutil.move(tmp_fullname, fullname)
return fullname
def _download_dist(url, path, md5sum=None):
env = os.environ
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
trainer_id = int(env['PADDLE_TRAINER_ID'])
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
if num_trainers <= 1:
return _download(url, path, md5sum)
else:
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
lock_path = fullname + '.download.lock'
if not osp.isdir(path):
os.makedirs(path)
if not osp.exists(fullname):
from paddle.distributed import ParallelEnv
unique_endpoints = _get_unique_endpoints(ParallelEnv()
.trainer_endpoints[:])
with open(lock_path, 'w'): # touch
os.utime(lock_path, None)
if ParallelEnv().current_endpoint in unique_endpoints:
_download(url, path, md5sum)
os.remove(lock_path)
else:
while os.path.exists(lock_path):
time.sleep(0.5)
return fullname
else:
return _download(url, path, md5sum)
def _move_and_merge_tree(src, dst):
"""
Move src directory to dst, if dst is already exists,
merge src to dst
"""
if not osp.exists(dst):
shutil.move(src, dst)
elif osp.isfile(src):
shutil.move(src, dst)
else:
for fp in os.listdir(src):
src_fp = osp.join(src, fp)
dst_fp = osp.join(dst, fp)
if osp.isdir(src_fp):
if osp.isdir(dst_fp):
_move_and_merge_tree(src_fp, dst_fp)
else:
shutil.move(src_fp, dst_fp)
elif osp.isfile(src_fp) and \
not osp.isfile(dst_fp):
shutil.move(src_fp, dst_fp)
def _decompress(fname):
"""
Decompress for zip and tar file
"""
# For protecting decompressing interupted,
# decompress to fpath_tmp directory firstly, if decompress
# successed, move decompress files to fpath and delete
# fpath_tmp and remove download compress file.
fpath = osp.split(fname)[0]
fpath_tmp = osp.join(fpath, 'tmp')
if osp.isdir(fpath_tmp):
shutil.rmtree(fpath_tmp)
os.makedirs(fpath_tmp)
if fname.find('tar') >= 0:
with tarfile.open(fname) as tf:
tf.extractall(path=fpath_tmp)
elif fname.find('zip') >= 0:
with zipfile.ZipFile(fname) as zf:
zf.extractall(path=fpath_tmp)
elif fname.find('.txt') >= 0:
return
else:
raise TypeError("Unsupport compress file type {}".format(fname))
for f in os.listdir(fpath_tmp):
src_dir = osp.join(fpath_tmp, f)
dst_dir = osp.join(fpath, f)
_move_and_merge_tree(src_dir, dst_dir)
shutil.rmtree(fpath_tmp)
os.remove(fname)
def _decompress_dist(fname):
env = os.environ
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
trainer_id = int(env['PADDLE_TRAINER_ID'])
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
if num_trainers <= 1:
_decompress(fname)
else:
lock_path = fname + '.decompress.lock'
from paddle.distributed import ParallelEnv
unique_endpoints = _get_unique_endpoints(ParallelEnv()
.trainer_endpoints[:])
# NOTE(dkp): _decompress_dist always performed after
# _download_dist, in _download_dist sub-trainers is waiting
# for download lock file release with sleeping, if decompress
# prograss is very fast and finished with in the sleeping gap
# time, e.g in tiny dataset such as coco_ce, spine_coco, main
# trainer may finish decompress and release lock file, so we
# only craete lock file in main trainer and all sub-trainer
# wait 1s for main trainer to create lock file, for 1s is
# twice as sleeping gap, this waiting time can keep all
# trainer pipeline in order
# **change this if you have more elegent methods**
if ParallelEnv().current_endpoint in unique_endpoints:
with open(lock_path, 'w'): # touch
os.utime(lock_path, None)
_decompress(fname)
os.remove(lock_path)
else:
time.sleep(1)
while os.path.exists(lock_path):
time.sleep(0.5)
else:
_decompress(fname)
def get_path(url, root_dir=WEIGHTS_HOME, md5sum=None, check_exist=True):
""" Download from given url to root_dir.
if file or directory specified by url is exists under
root_dir, return the path directly, otherwise download
from url and decompress it, return the path.
url (str): download url
root_dir (str): root dir for downloading
md5sum (str): md5 sum of download package
"""
# parse path after download to decompress under root_dir
fullpath = map_path(url, root_dir)
# For same zip file, decompressed directory name different
# from zip file name, rename by following map
decompress_name_map = {"ppTSM_fight": "ppTSM", }
for k, v in decompress_name_map.items():
if fullpath.find(k) >= 0:
fullpath = osp.join(osp.split(fullpath)[0], v)
if osp.exists(fullpath) and check_exist:
if not osp.isfile(fullpath) or \
_check_exist_file_md5(fullpath, md5sum, url):
return fullpath, True
else:
os.remove(fullpath)
fullname = _download_dist(url, root_dir, md5sum)
# new weights format which postfix is 'pdparams' not
# need to decompress
if osp.splitext(fullname)[-1] not in ['.pdparams', '.yml']:
_decompress_dist(fullname)
return fullpath, False
def get_weights_path(url):
"""Get weights path from WEIGHTS_HOME, if not exists,
download it from url.
"""
url = parse_url(url)
md5sum = None
if url in MODEL_URL_MD5_DICT.keys():
md5sum = MODEL_URL_MD5_DICT[url]
path, _ = get_path(url, WEIGHTS_HOME, md5sum)
return path
def auto_download_model(model_path):
# auto download
if is_url(model_path):
weight = get_weights_path(model_path)
return weight
return None
if __name__ == "__main__":
model_path = "https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip"
auto_download_model(model_path)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import ast
import glob
import yaml
import copy
import numpy as np
import subprocess as sp
from python.keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop
class Times(object):
def __init__(self):
self.time = 0.
# start time
self.st = 0.
# end time
self.et = 0.
def start(self):
self.st = time.time()
def end(self, repeats=1, accumulative=True):
self.et = time.time()
if accumulative:
self.time += (self.et - self.st) / repeats
else:
self.time = (self.et - self.st) / repeats
def reset(self):
self.time = 0.
self.st = 0.
self.et = 0.
def value(self):
return round(self.time, 4)
class PipeTimer(Times):
def __init__(self):
super(PipeTimer, self).__init__()
self.total_time = Times()
self.module_time = {
'det': Times(),
'mot': Times(),
'attr': Times(),
'kpt': Times(),
'video_action': Times(),
'skeleton_action': Times(),
'reid': Times(),
'det_action': Times(),
'cls_action': Times(),
'vehicle_attr': Times(),
'vehicleplate': Times()
}
self.img_num = 0
self.track_num = 0
def get_total_time(self):
total_time = self.total_time.value()
total_time = round(total_time, 4)
average_latency = total_time / max(1, self.img_num)
qps = 0
if total_time > 0:
qps = 1 / average_latency
return total_time, average_latency, qps
def info(self):
total_time, average_latency, qps = self.get_total_time()
print("------------------ Inference Time Info ----------------------")
print("total_time(ms): {}, img_num: {}".format(total_time * 1000,
self.img_num))
for k, v in self.module_time.items():
v_time = round(v.value(), 4)
if v_time > 0 and k in ['det', 'mot', 'video_action']:
print("{} time(ms): {}; per frame average time(ms): {}".format(
k, v_time * 1000, v_time * 1000 / self.img_num))
elif v_time > 0:
print("{} time(ms): {}; per trackid average time(ms): {}".
format(k, v_time * 1000, v_time * 1000 / self.track_num))
print("average latency time(ms): {:.2f}, QPS: {:2f}".format(
average_latency * 1000, qps))
return qps
def report(self, average=False):
dic = {}
dic['total'] = round(self.total_time.value() / max(1, self.img_num),
4) if average else self.total_time.value()
dic['det'] = round(self.module_time['det'].value() /
max(1, self.img_num),
4) if average else self.module_time['det'].value()
dic['mot'] = round(self.module_time['mot'].value() /
max(1, self.img_num),
4) if average else self.module_time['mot'].value()
dic['attr'] = round(self.module_time['attr'].value() /
max(1, self.img_num),
4) if average else self.module_time['attr'].value()
dic['kpt'] = round(self.module_time['kpt'].value() /
max(1, self.img_num),
4) if average else self.module_time['kpt'].value()
dic['video_action'] = self.module_time['video_action'].value()
dic['skeleton_action'] = round(
self.module_time['skeleton_action'].value() / max(1, self.img_num),
4) if average else self.module_time['skeleton_action'].value()
dic['img_num'] = self.img_num
return dic
class PushStream(object):
def __init__(self, pushurl="rtsp://127.0.0.1:8554/"):
self.command = ""
# 自行设置
self.pushurl = pushurl
def initcmd(self, fps, width, height):
self.command = [
'ffmpeg', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo',
'-pix_fmt', 'bgr24', '-s', "{}x{}".format(width, height), '-r',
str(fps), '-i', '-', '-pix_fmt', 'yuv420p', '-f', 'rtsp',
self.pushurl
]
self.pipe = sp.Popen(self.command, stdin=sp.PIPE)
def get_test_images(infer_dir, infer_img):
"""
Get image path list in TEST mode
"""
assert infer_img is not None or infer_dir is not None, \
"--infer_img or --infer_dir should be set"
assert infer_img is None or os.path.isfile(infer_img), \
"{} is not a file".format(infer_img)
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
# infer_img has a higher priority
if infer_img and os.path.isfile(infer_img):
return [infer_img]
images = set()
infer_dir = os.path.abspath(infer_dir)
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
assert len(images) > 0, "no image found in {}".format(infer_dir)
print("Found {} inference images in total.".format(len(images)))
return images
def crop_image_with_det(batch_input, det_res, thresh=0.3):
boxes = det_res['boxes']
score = det_res['boxes'][:, 1]
boxes_num = det_res['boxes_num']
start_idx = 0
crop_res = []
for b_id, input in enumerate(batch_input):
boxes_num_i = boxes_num[b_id]
if boxes_num_i == 0:
continue
boxes_i = boxes[start_idx:start_idx + boxes_num_i, :]
score_i = score[start_idx:start_idx + boxes_num_i]
res = []
for box, s in zip(boxes_i, score_i):
if s > thresh:
crop_image, new_box, ori_box = expand_crop(input, box)
if crop_image is not None:
res.append(crop_image)
crop_res.append(res)
return crop_res
def normal_crop(image, rect):
imgh, imgw, c = image.shape
label, conf, xmin, ymin, xmax, ymax = [int(x) for x in rect.tolist()]
org_rect = [xmin, ymin, xmax, ymax]
if label != 0:
return None, None, None
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = min(imgw, xmax)
ymax = min(imgh, ymax)
return image[ymin:ymax, xmin:xmax, :], [xmin, ymin, xmax, ymax], org_rect
def crop_image_with_mot(input, mot_res, expand=True):
res = mot_res['boxes']
crop_res = []
new_bboxes = []
ori_bboxes = []
for box in res:
if expand:
crop_image, new_bbox, ori_bbox = expand_crop(input, box[1:])
else:
crop_image, new_bbox, ori_bbox = normal_crop(input, box[1:])
if crop_image is not None:
crop_res.append(crop_image)
new_bboxes.append(new_bbox)
ori_bboxes.append(ori_bbox)
return crop_res, new_bboxes, ori_bboxes
def parse_mot_res(input):
mot_res = []
boxes, scores, ids = input[0]
for box, score, i in zip(boxes[0], scores[0], ids[0]):
xmin, ymin, w, h = box
res = [i, 0, score, xmin, ymin, xmin + w, ymin + h]
mot_res.append(res)
return {'boxes': np.array(mot_res)}
def refine_keypoint_coordinary(kpts, bbox, coord_size):
"""
This function is used to adjust coordinate values to a fixed scale.
"""
tl = bbox[:, 0:2]
wh = bbox[:, 2:] - tl
tl = np.expand_dims(np.transpose(tl, (1, 0)), (2, 3))
wh = np.expand_dims(np.transpose(wh, (1, 0)), (2, 3))
target_w, target_h = coord_size
res = (kpts - tl) / wh * np.expand_dims(
np.array([[target_w], [target_h]]), (2, 3))
return res
def parse_mot_keypoint(input, coord_size):
parsed_skeleton_with_mot = {}
ids = []
skeleton = []
for tracker_id, kpt_seq in input:
ids.append(tracker_id)
kpts = np.array(kpt_seq.kpts, dtype=np.float32)[:, :, :2]
kpts = np.expand_dims(np.transpose(kpts, [2, 0, 1]),
-1) #T, K, C -> C, T, K, 1
bbox = np.array(kpt_seq.bboxes, dtype=np.float32)
skeleton.append(refine_keypoint_coordinary(kpts, bbox, coord_size))
parsed_skeleton_with_mot["mot_id"] = ids
parsed_skeleton_with_mot["skeleton"] = skeleton
return parsed_skeleton_with_mot
此差异已折叠。
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class KeyPointSequence(object):
def __init__(self, max_size=100):
self.frames = 0
self.kpts = []
self.bboxes = []
self.max_size = max_size
def save(self, kpt, bbox):
self.kpts.append(kpt)
self.bboxes.append(bbox)
self.frames += 1
if self.frames == self.max_size:
return True
return False
class KeyPointBuff(object):
def __init__(self, max_size=100):
self.flag_track_interrupt = False
self.keypoint_saver = dict()
self.max_size = max_size
self.id_to_pop = set()
self.flag_to_pop = False
def get_state(self):
return self.flag_to_pop
def update(self, kpt_res, mot_res):
kpts = kpt_res.get('keypoint')[0]
bboxes = kpt_res.get('bbox')
mot_bboxes = mot_res.get('boxes')
updated_id = set()
for idx in range(len(kpts)):
tracker_id = mot_bboxes[idx, 0]
updated_id.add(tracker_id)
kpt_seq = self.keypoint_saver.get(tracker_id,
KeyPointSequence(self.max_size))
is_full = kpt_seq.save(kpts[idx], bboxes[idx])
self.keypoint_saver[tracker_id] = kpt_seq
#Scene1: result should be popped when frames meet max size
if is_full:
self.id_to_pop.add(tracker_id)
self.flag_to_pop = True
#Scene2: result of a lost tracker should be popped
interrupted_id = set(self.keypoint_saver.keys()) - updated_id
if len(interrupted_id) > 0:
self.flag_to_pop = True
self.id_to_pop.update(interrupted_id)
def get_collected_keypoint(self):
"""
Output (List): List of keypoint results for Skeletonbased Recognition task, where
the format of each element is [tracker_id, KeyPointSequence of tracker_id]
"""
output = []
for tracker_id in self.id_to_pop:
output.append([tracker_id, self.keypoint_saver[tracker_id]])
del (self.keypoint_saver[tracker_id])
self.flag_to_pop = False
self.id_to_pop.clear()
return output
class ActionVisualHelper(object):
def __init__(self, frame_life=20):
self.frame_life = frame_life
self.action_history = {}
def get_visualize_ids(self):
id_detected = self.check_detected()
return id_detected
def check_detected(self):
id_detected = set()
deperate_id = []
for mot_id in self.action_history:
self.action_history[mot_id]["life_remain"] -= 1
if int(self.action_history[mot_id]["class"]) == 0:
id_detected.add(mot_id)
if self.action_history[mot_id]["life_remain"] == 0:
deperate_id.append(mot_id)
for mot_id in deperate_id:
del (self.action_history[mot_id])
return id_detected
def update(self, action_res_list):
for mot_id, action_res in action_res_list:
if mot_id in self.action_history:
if int(action_res["class"]) != 0 and int(self.action_history[
mot_id]["class"]) == 0:
continue
action_info = self.action_history.get(mot_id, {})
action_info["class"] = action_res["class"]
action_info["life_remain"] = self.frame_life
self.action_history[mot_id] = action_info
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import glob
from functools import reduce
import cv2
import numpy as np
import math
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
import sys
# add deploy path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'])))
sys.path.insert(0, parent_path)
from python.benchmark_utils import PaddleInferBenchmark
from python.preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize, WarpAffine
from python.visualize import visualize_attr
from python.utils import argsparser, Timer, get_current_memory_mb
from python.infer import Detector, get_test_images, print_arguments, load_predictor
from PIL import Image, ImageDraw, ImageFont
class AttrDetector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
output_dir (str): The path of output
threshold (float): The threshold of score for visualization
"""
def __init__(
self,
model_dir,
device='CPU',
run_mode='paddle',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False,
output_dir='output',
threshold=0.5, ):
super(AttrDetector, self).__init__(
model_dir=model_dir,
device=device,
run_mode=run_mode,
batch_size=batch_size,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn,
output_dir=output_dir,
threshold=threshold, )
@classmethod
def init_with_cfg(cls, args, cfg):
return cls(model_dir=cfg['model_dir'],
batch_size=cfg['batch_size'],
device=args.device,
run_mode=args.run_mode,
trt_min_shape=args.trt_min_shape,
trt_max_shape=args.trt_max_shape,
trt_opt_shape=args.trt_opt_shape,
trt_calib_mode=args.trt_calib_mode,
cpu_threads=args.cpu_threads,
enable_mkldnn=args.enable_mkldnn)
def get_label(self):
return self.pred_config.labels
def postprocess(self, inputs, result):
# postprocess output of predictor
im_results = result['output']
labels = self.pred_config.labels
age_list = ['AgeLess18', 'Age18-60', 'AgeOver60']
direct_list = ['Front', 'Side', 'Back']
bag_list = ['HandBag', 'ShoulderBag', 'Backpack']
upper_list = ['UpperStride', 'UpperLogo', 'UpperPlaid', 'UpperSplice']
lower_list = [
'LowerStripe', 'LowerPattern', 'LongCoat', 'Trousers', 'Shorts',
'Skirt&Dress'
]
glasses_threshold = 0.3
hold_threshold = 0.6
batch_res = []
for res in im_results:
res = res.tolist()
label_res = []
# gender
gender = 'Female' if res[22] > self.threshold else 'Male'
label_res.append(gender)
# age
age = age_list[np.argmax(res[19:22])]
label_res.append(age)
# direction
direction = direct_list[np.argmax(res[23:])]
label_res.append(direction)
# glasses
glasses = 'Glasses: '
if res[1] > glasses_threshold:
glasses += 'True'
else:
glasses += 'False'
label_res.append(glasses)
# hat
hat = 'Hat: '
if res[0] > self.threshold:
hat += 'True'
else:
hat += 'False'
label_res.append(hat)
# hold obj
hold_obj = 'HoldObjectsInFront: '
if res[18] > hold_threshold:
hold_obj += 'True'
else:
hold_obj += 'False'
label_res.append(hold_obj)
# bag
bag = bag_list[np.argmax(res[15:18])]
bag_score = res[15 + np.argmax(res[15:18])]
bag_label = bag if bag_score > self.threshold else 'No bag'
label_res.append(bag_label)
# upper
upper_label = 'Upper:'
sleeve = 'LongSleeve' if res[3] > res[2] else 'ShortSleeve'
upper_label += ' {}'.format(sleeve)
upper_res = res[4:8]
if np.max(upper_res) > self.threshold:
upper_label += ' {}'.format(upper_list[np.argmax(upper_res)])
label_res.append(upper_label)
# lower
lower_res = res[8:14]
lower_label = 'Lower: '
has_lower = False
for i, l in enumerate(lower_res):
if l > self.threshold:
lower_label += ' {}'.format(lower_list[i])
has_lower = True
if not has_lower:
lower_label += ' {}'.format(lower_list[np.argmax(lower_res)])
label_res.append(lower_label)
# shoe
shoe = 'Boots' if res[14] > self.threshold else 'No boots'
label_res.append(shoe)
batch_res.append(label_res)
result = {'output': batch_res}
return result
def predict(self, repeats=1):
'''
Args:
repeats (int): repeats number for prediction
Returns:
result (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
MaskRCNN's result include 'masks': np.ndarray:
shape: [N, im_h, im_w]
'''
# model prediction
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
output_tensor = self.predictor.get_output_handle(output_names[0])
np_output = output_tensor.copy_to_cpu()
result = dict(output=np_output)
return result
def predict_image(self,
image_list,
run_benchmark=False,
repeats=1,
visual=True):
batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
results = []
for i in range(batch_loop_cnt):
start_index = i * self.batch_size
end_index = min((i + 1) * self.batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index]
if run_benchmark:
# preprocess
inputs = self.preprocess(batch_image_list) # warmup
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
result = self.predict(repeats=repeats) # warmup
self.det_times.inference_time_s.start()
result = self.predict(repeats=repeats)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
result_warmup = self.postprocess(inputs, result) # warmup
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(batch_image_list)
cm, gm, gu = get_current_memory_mb()
self.cpu_mem += cm
self.gpu_mem += gm
self.gpu_util += gu
else:
# preprocess
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
self.det_times.inference_time_s.start()
result = self.predict()
self.det_times.inference_time_s.end()
# postprocess
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(batch_image_list)
if visual:
visualize(
batch_image_list, result, output_dir=self.output_dir)
results.append(result)
if visual:
print('Test iter {}'.format(i))
results = self.merge_batch_result(results)
return results
def merge_batch_result(self, batch_result):
if len(batch_result) == 1:
return batch_result[0]
res_key = batch_result[0].keys()
results = {k: [] for k in res_key}
for res in batch_result:
for k, v in res.items():
results[k].extend(v)
return results
def visualize(image_list, batch_res, output_dir='output'):
# visualize the predict result
batch_res = batch_res['output']
for image_file, res in zip(image_list, batch_res):
im = visualize_attr(image_file, [res])
if not os.path.exists(output_dir):
os.makedirs(output_dir)
img_name = os.path.split(image_file)[-1]
out_path = os.path.join(output_dir, img_name)
cv2.imwrite(out_path, im)
print("save result to: " + out_path)
def main():
detector = AttrDetector(
FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn,
threshold=FLAGS.threshold,
output_dir=FLAGS.output_dir)
# predict from image
if FLAGS.image_dir is None and FLAGS.image_file is not None:
assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None"
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
else:
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
model_dir = FLAGS.model_dir
mode = FLAGS.run_mode
model_info = {
'model_name': model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
data_info = {
'batch_size': FLAGS.batch_size,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
det_log = PaddleInferBenchmark(detector.config, model_info, data_info,
perf_info, mems)
det_log('Attr')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pptracking.python.mot.visualize import plot_tracking
from python.visualize import visualize_attr
import os
import re
import cv2
import gc
import numpy as np
try:
from sklearn import preprocessing
from sklearn.cluster import AgglomerativeClustering
except:
print(
'Warning: Unable to use MTMCT in PP-Human, please install sklearn, for example: `pip install sklearn`'
)
pass
import pandas as pd
from tqdm import tqdm
from functools import reduce
import warnings
warnings.filterwarnings("ignore")
def gen_restxt(output_dir_filename, map_tid, cid_tid_dict):
pattern = re.compile(r'c(\d)_t(\d)')
f_w = open(output_dir_filename, 'w')
for key, res in cid_tid_dict.items():
cid, tid = pattern.search(key).groups()
cid = int(cid) + 1
rects = res["rects"]
frames = res["frames"]
for idx, bbox in enumerate(rects):
bbox[0][3:] -= bbox[0][1:3]
fid = frames[idx] + 1
rect = [max(int(x), 0) for x in bbox[0][1:]]
if key in map_tid:
new_tid = map_tid[key]
f_w.write(
str(cid) + ' ' + str(new_tid) + ' ' + str(fid) + ' ' +
' '.join(map(str, rect)) + '\n')
print('gen_res: write file in {}'.format(output_dir_filename))
f_w.close()
def get_mtmct_matching_results(pred_mtmct_file,
secs_interval=0.5,
video_fps=20):
res = np.loadtxt(pred_mtmct_file) # 'cid, tid, fid, x1, y1, w, h, -1, -1'
camera_ids = list(map(int, np.unique(res[:, 0])))
res = res[:, :7]
# each line in res: 'cid, tid, fid, x1, y1, w, h'
camera_tids = []
camera_results = dict()
for c_id in camera_ids:
camera_results[c_id] = res[res[:, 0] == c_id]
tids = np.unique(camera_results[c_id][:, 1])
tids = list(map(int, tids))
camera_tids.append(tids)
# select common tids throughout each video
common_tids = reduce(np.intersect1d, camera_tids)
# get mtmct matching results by cid_tid_fid_results[c_id][t_id][f_id]
cid_tid_fid_results = dict()
cid_tid_to_fids = dict()
interval = int(secs_interval * video_fps) # preferably less than 10
for c_id in camera_ids:
cid_tid_fid_results[c_id] = dict()
cid_tid_to_fids[c_id] = dict()
for t_id in common_tids:
tid_mask = camera_results[c_id][:, 1] == t_id
cid_tid_fid_results[c_id][t_id] = dict()
camera_trackid_results = camera_results[c_id][tid_mask]
fids = np.unique(camera_trackid_results[:, 2])
fids = fids[fids % interval == 0]
fids = list(map(int, fids))
cid_tid_to_fids[c_id][t_id] = fids
for f_id in fids:
st_frame = f_id
ed_frame = f_id + interval
st_mask = camera_trackid_results[:, 2] >= st_frame
ed_mask = camera_trackid_results[:, 2] < ed_frame
frame_mask = np.logical_and(st_mask, ed_mask)
cid_tid_fid_results[c_id][t_id][f_id] = camera_trackid_results[
frame_mask]
return camera_results, cid_tid_fid_results
def save_mtmct_vis_results(camera_results,
captures,
output_dir,
multi_res=None):
# camera_results: 'cid, tid, fid, x1, y1, w, h'
camera_ids = list(camera_results.keys())
import shutil
save_dir = os.path.join(output_dir, 'mtmct_vis')
if os.path.exists(save_dir):
shutil.rmtree(save_dir)
os.makedirs(save_dir)
for idx, video_file in enumerate(captures):
capture = cv2.VideoCapture(video_file)
cid = camera_ids[idx]
basename = os.path.basename(video_file)
video_out_name = "vis_" + basename
out_path = os.path.join(save_dir, video_out_name)
print("Start visualizing output video: {}".format(out_path))
# Get Video info : resolution, fps, frame count
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(capture.get(cv2.CAP_PROP_FPS))
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0
while (1):
if frame_id % 50 == 0:
print('frame id: ', frame_id)
ret, frame = capture.read()
frame_id += 1
if not ret:
if frame_id == 1:
print("video read failed!")
break
frame_results = camera_results[cid][camera_results[cid][:, 2] ==
frame_id]
boxes = frame_results[:, -4:]
ids = frame_results[:, 1]
image = plot_tracking(
frame, boxes, ids, frame_id=frame_id, fps=fps)
# add attr vis
if multi_res:
tid_list = multi_res.keys() # c0_t1, c0_t2...
all_attr_result = [multi_res[i]["attrs"]
for i in tid_list] # all cid_tid result
if any(
all_attr_result
): # at least one cid_tid[attrs] is not None will goes to attrs_vis
attr_res = []
for k in tid_list:
if (frame_id - 1) >= len(multi_res[k]['attrs']):
t_attr = None
else:
t_attr = multi_res[k]['attrs'][frame_id - 1]
attr_res.append(t_attr)
image = visualize_attr(
image, attr_res, boxes, is_mtmct=True)
writer.write(image)
writer.release()
def get_euclidean(x, y, **kwargs):
m = x.shape[0]
n = y.shape[0]
distmat = (np.power(x, 2).sum(axis=1, keepdims=True).repeat(
n, axis=1) + np.power(y, 2).sum(axis=1, keepdims=True).repeat(
m, axis=1).T)
distmat -= np.dot(2 * x, y.T)
return distmat
def cosine_similarity(x, y, eps=1e-12):
"""
Computes cosine similarity between two tensors.
Value == 1 means the same vector
Value == 0 means perpendicular vectors
"""
x_n, y_n = np.linalg.norm(
x, axis=1, keepdims=True), np.linalg.norm(
y, axis=1, keepdims=True)
x_norm = x / np.maximum(x_n, eps * np.ones_like(x_n))
y_norm = y / np.maximum(y_n, eps * np.ones_like(y_n))
sim_mt = np.dot(x_norm, y_norm.T)
return sim_mt
def get_cosine(x, y, eps=1e-12):
"""
Computes cosine distance between two tensors.
The cosine distance is the inverse cosine similarity
-> cosine_distance = abs(-cosine_distance) to make it
similar in behaviour to euclidean distance
"""
sim_mt = cosine_similarity(x, y, eps)
return sim_mt
def get_dist_mat(x, y, func_name="euclidean"):
if func_name == "cosine":
dist_mat = get_cosine(x, y)
elif func_name == "euclidean":
dist_mat = get_euclidean(x, y)
print("Using {} as distance function during evaluation".format(func_name))
return dist_mat
def intracam_ignore(st_mask, cid_tids):
count = len(cid_tids)
for i in range(count):
for j in range(count):
if cid_tids[i][1] == cid_tids[j][1]:
st_mask[i, j] = 0.
return st_mask
def get_sim_matrix_new(cid_tid_dict, cid_tids):
# Note: camera independent get_sim_matrix function,
# which is different from the one in camera_utils.py.
count = len(cid_tids)
q_arr = np.array(
[cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
g_arr = np.array(
[cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
#compute distmat
distmat = get_dist_mat(q_arr, g_arr, func_name="cosine")
#mask the element which belongs to same video
st_mask = np.ones((count, count), dtype=np.float32)
st_mask = intracam_ignore(st_mask, cid_tids)
sim_matrix = distmat * st_mask
np.fill_diagonal(sim_matrix, 0.)
return 1. - sim_matrix
def get_match(cluster_labels):
cluster_dict = dict()
cluster = list()
for i, l in enumerate(cluster_labels):
if l in list(cluster_dict.keys()):
cluster_dict[l].append(i)
else:
cluster_dict[l] = [i]
for idx in cluster_dict:
cluster.append(cluster_dict[idx])
return cluster
def get_cid_tid(cluster_labels, cid_tids):
cluster = list()
for labels in cluster_labels:
cid_tid_list = list()
for label in labels:
cid_tid_list.append(cid_tids[label])
cluster.append(cid_tid_list)
return cluster
def get_labels(cid_tid_dict, cid_tids):
#compute cost matrix between features
cost_matrix = get_sim_matrix_new(cid_tid_dict, cid_tids)
#cluster all the features
cluster1 = AgglomerativeClustering(
n_clusters=None,
distance_threshold=0.5,
affinity='precomputed',
linkage='complete')
cluster_labels1 = cluster1.fit_predict(cost_matrix)
labels = get_match(cluster_labels1)
sub_cluster = get_cid_tid(labels, cid_tids)
return labels
def sub_cluster(cid_tid_dict):
'''
cid_tid_dict: all camera_id and track_id
'''
#get all keys
cid_tids = sorted([key for key in cid_tid_dict.keys()])
#cluster all trackid
clu = get_labels(cid_tid_dict, cid_tids)
#relabel every cluster groups
new_clu = list()
for c_list in clu:
new_clu.append([cid_tids[c] for c in c_list])
cid_tid_label = dict()
for i, c_list in enumerate(new_clu):
for c in c_list:
cid_tid_label[c] = i + 1
return cid_tid_label
def distill_idfeat(mot_res):
qualities_list = mot_res["qualities"]
feature_list = mot_res["features"]
rects = mot_res["rects"]
qualities_new = []
feature_new = []
#filter rect less than 100*20
for idx, rect in enumerate(rects):
conf, xmin, ymin, xmax, ymax = rect[0]
if (xmax - xmin) * (ymax - ymin) and (xmax > xmin) > 2000:
qualities_new.append(qualities_list[idx])
feature_new.append(feature_list[idx])
#take all features if available rect is less than 2
if len(qualities_new) < 2:
qualities_new = qualities_list
feature_new = feature_list
#if available frames number is more than 200, take one frame data per 20 frames
skipf = 1
if len(qualities_new) > 20:
skipf = 2
quality_skip = np.array(qualities_new[::skipf])
feature_skip = np.array(feature_new[::skipf])
#sort features with image qualities, take the most trustworth features
topk_argq = np.argsort(quality_skip)[::-1]
if (quality_skip > 0.6).sum() > 1:
topk_feat = feature_skip[topk_argq[quality_skip > 0.6]]
else:
topk_feat = feature_skip[topk_argq]
#get final features by mean or cluster, at most take five
mean_feat = np.mean(topk_feat[:5], axis=0)
return mean_feat
def res2dict(multi_res):
cid_tid_dict = {}
for cid, c_res in enumerate(multi_res):
for tid, res in c_res.items():
key = "c" + str(cid) + "_t" + str(tid)
if key not in cid_tid_dict:
if len(res["features"]) == 0:
continue
cid_tid_dict[key] = res
cid_tid_dict[key]['mean_feat'] = distill_idfeat(res)
return cid_tid_dict
def mtmct_process(multi_res, captures, mtmct_vis=True, output_dir="output"):
cid_tid_dict = res2dict(multi_res)
if len(cid_tid_dict) == 0:
print("no tracking result found, mtmct will be skiped.")
return
map_tid = sub_cluster(cid_tid_dict)
if not os.path.exists(output_dir):
os.mkdir(output_dir)
pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt')
gen_restxt(pred_mtmct_file, map_tid, cid_tid_dict)
if mtmct_vis:
camera_results, cid_tid_fid_res = get_mtmct_matching_results(
pred_mtmct_file)
save_mtmct_vis_results(
camera_results,
captures,
output_dir=output_dir,
multi_res=cid_tid_dict)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import cv2
import numpy as np
# add deploy path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
sys.path.insert(0, parent_path)
from python.infer import PredictConfig
from pptracking.python.det_infer import load_predictor
from python.utils import Timer
class ReID(object):
"""
ReID of SDE methods
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of per batch in inference, default 50 means at most
50 sub images can be made a batch and send into ReID model
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
model_dir,
device='CPU',
run_mode='paddle',
batch_size=50,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=4,
enable_mkldnn=False):
self.pred_config = self.set_config(model_dir)
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
self.batch_size = batch_size
self.input_wh = (128, 256)
@classmethod
def init_with_cfg(cls, args, cfg):
return cls(model_dir=cfg['model_dir'],
batch_size=cfg['batch_size'],
device=args.device,
run_mode=args.run_mode,
trt_min_shape=args.trt_min_shape,
trt_max_shape=args.trt_max_shape,
trt_opt_shape=args.trt_opt_shape,
trt_calib_mode=args.trt_calib_mode,
cpu_threads=args.cpu_threads,
enable_mkldnn=args.enable_mkldnn)
def set_config(self, model_dir):
return PredictConfig(model_dir)
def check_img_quality(self, crop, bbox, xyxy):
if crop is None:
return None
#eclipse
eclipse_quality = 1.0
inner_rect = np.zeros(xyxy.shape)
inner_rect[:, :2] = np.maximum(xyxy[:, :2], bbox[None, :2])
inner_rect[:, 2:] = np.minimum(xyxy[:, 2:], bbox[None, 2:])
wh_array = inner_rect[:, 2:] - inner_rect[:, :2]
filt = np.logical_and(wh_array[:, 0] > 0, wh_array[:, 1] > 0)
wh_array = wh_array[filt]
if wh_array.shape[0] > 1:
eclipse_ratio = wh_array / (bbox[2:] - bbox[:2])
eclipse_area_ratio = eclipse_ratio[:, 0] * eclipse_ratio[:, 1]
ear_lst = eclipse_area_ratio.tolist()
ear_lst.sort(reverse=True)
eclipse_quality = 1.0 - ear_lst[1]
bbox_wh = (bbox[2:] - bbox[:2])
height_quality = bbox_wh[1] / (bbox_wh[0] * 2)
eclipse_quality = min(eclipse_quality, height_quality)
#definition
cropgray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
definition = int(cv2.Laplacian(cropgray, cv2.CV_64F, ksize=3).var())
brightness = int(cropgray.mean())
bd_quality = min(1., brightness / 50.)
eclipse_weight = 0.7
return eclipse_quality * eclipse_weight + bd_quality * (1 -
eclipse_weight)
def normal_crop(self, image, rect):
imgh, imgw, c = image.shape
label, conf, xmin, ymin, xmax, ymax = [int(x) for x in rect.tolist()]
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = min(imgw, xmax)
ymax = min(imgh, ymax)
if label != 0 or xmax <= xmin or ymax <= ymin:
print("Warning! label missed!!")
return None, None, None
return image[ymin:ymax, xmin:xmax, :]
def crop_image_with_mot(self, image, mot_res):
res = mot_res['boxes']
crop_res = []
img_quality = []
rects = []
for box in res:
crop_image = self.normal_crop(image, box[1:])
quality_item = self.check_img_quality(crop_image, box[3:],
res[:, 3:])
if crop_image is not None:
crop_res.append(crop_image)
img_quality.append(quality_item)
rects.append(box)
return crop_res, img_quality, rects
def preprocess(self,
imgs,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
im_batch = []
for img in imgs:
img = cv2.resize(img, self.input_wh)
img = img.astype('float32') / 255.
img -= np.array(mean)
img /= np.array(std)
im_batch.append(img.transpose((2, 0, 1)))
inputs = {}
inputs['x'] = np.array(im_batch).astype('float32')
return inputs
def predict(self, crops, repeats=1, add_timer=True, seq_name=''):
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(crops)
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
self.det_times.preprocess_time_s.end()
self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu()
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
if add_timer:
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
return pred_embs
def predict_batch(self, imgs, batch_size=4):
batch_feat = []
for b in range(0, len(imgs), batch_size):
b_end = min(len(imgs), b + batch_size)
batch_imgs = imgs[b:b_end]
feat = self.predict(batch_imgs)
batch_feat.extend(feat.tolist())
return batch_feat
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import glob
import cv2
import numpy as np
import math
import paddle
import sys
import paddle.nn.functional as F
# add deploy path of PadleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
sys.path.insert(0, parent_path)
from paddle.inference import Config, create_predictor
from python.utils import argsparser, Timer, get_current_memory_mb
from python.benchmark_utils import PaddleInferBenchmark
from python.infer import Detector, print_arguments
from video_action_preprocess import VideoDecoder, Sampler, Scale, CenterCrop, Normalization, Image2Array
def softmax(x):
f_x = np.exp(x) / np.sum(np.exp(x))
return f_x
class VideoActionRecognizer(object):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
model_dir,
device='CPU',
run_mode='paddle',
num_seg=8,
seg_len=1,
short_size=256,
target_size=224,
top_k=1,
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False,
ir_optim=True):
self.num_seg = num_seg
self.seg_len = seg_len
self.short_size = short_size
self.target_size = target_size
self.top_k = top_k
assert batch_size == 1, "VideoActionRecognizer only support batch_size=1 now."
self.model_dir = model_dir
self.device = device
self.run_mode = run_mode
self.batch_size = batch_size
self.trt_min_shape = trt_min_shape
self.trt_max_shape = trt_max_shape
self.trt_opt_shape = trt_opt_shape
self.trt_calib_mode = trt_calib_mode
self.cpu_threads = cpu_threads
self.enable_mkldnn = enable_mkldnn
self.ir_optim = ir_optim
self.recognize_times = Timer()
model_file_path = glob.glob(os.path.join(model_dir, "*.pdmodel"))[0]
params_file_path = glob.glob(os.path.join(model_dir, "*.pdiparams"))[0]
self.config = Config(model_file_path, params_file_path)
if device == "GPU" or device == "gpu":
self.config.enable_use_gpu(8000, 0)
else:
self.config.disable_gpu()
if self.enable_mkldnn:
# cache 10 different shapes for mkldnn to avoid memory leak
self.config.set_mkldnn_cache_capacity(10)
self.config.enable_mkldnn()
self.config.switch_ir_optim(self.ir_optim) # default true
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
self.config.enable_tensorrt_engine(
max_batch_size=8, precision_mode=precision_map[run_mode])
self.config.enable_memory_optim()
# use zero copy
self.config.switch_use_feed_fetch_ops(False)
self.predictor = create_predictor(self.config)
@classmethod
def init_with_cfg(cls, args, cfg):
return cls(model_dir=cfg['model_dir'],
short_size=cfg['short_size'],
target_size=cfg['target_size'],
batch_size=cfg['batch_size'],
device=args.device,
run_mode=args.run_mode,
trt_min_shape=args.trt_min_shape,
trt_max_shape=args.trt_max_shape,
trt_opt_shape=args.trt_opt_shape,
trt_calib_mode=args.trt_calib_mode,
cpu_threads=args.cpu_threads,
enable_mkldnn=args.enable_mkldnn)
def preprocess_batch(self, file_list):
batched_inputs = []
for file in file_list:
inputs = self.preprocess(file)
batched_inputs.append(inputs)
batched_inputs = [
np.concatenate([item[i] for item in batched_inputs])
for i in range(len(batched_inputs[0]))
]
self.input_file = file_list
return batched_inputs
def get_timer(self):
return self.recognize_times
def predict(self, input):
'''
Args:
input (str) or (list): video file path or image data list
Returns:
results (dict):
'''
input_names = self.predictor.get_input_names()
input_tensor = self.predictor.get_input_handle(input_names[0])
output_names = self.predictor.get_output_names()
output_tensor = self.predictor.get_output_handle(output_names[0])
# preprocess
self.recognize_times.preprocess_time_s.start()
if type(input) == str:
inputs = self.preprocess_video(input)
else:
inputs = self.preprocess_frames(input)
self.recognize_times.preprocess_time_s.end()
inputs = np.expand_dims(
inputs, axis=0).repeat(
self.batch_size, axis=0).copy()
input_tensor.copy_from_cpu(inputs)
# model prediction
self.recognize_times.inference_time_s.start()
self.predictor.run()
self.recognize_times.inference_time_s.end()
output = output_tensor.copy_to_cpu()
# postprocess
self.recognize_times.postprocess_time_s.start()
classes, scores = self.postprocess(output)
self.recognize_times.postprocess_time_s.end()
return classes, scores
def preprocess_frames(self, frame_list):
"""
frame_list: list, frame list
return: list
"""
results = {}
results['frames_len'] = len(frame_list)
results["imgs"] = frame_list
img_mean = [0.485, 0.456, 0.406]
img_std = [0.229, 0.224, 0.225]
ops = [
CenterCrop(self.target_size), Image2Array(),
Normalization(img_mean, img_std)
]
for op in ops:
results = op(results)
res = np.expand_dims(results['imgs'], axis=0).copy()
return [res]
def preprocess_video(self, input_file):
"""
input_file: str, file path
return: list
"""
assert os.path.isfile(input_file) is not None, "{0} not exists".format(
input_file)
results = {'filename': input_file}
img_mean = [0.485, 0.456, 0.406]
img_std = [0.229, 0.224, 0.225]
ops = [
VideoDecoder(), Sampler(
self.num_seg, self.seg_len, valid_mode=True),
Scale(self.short_size), CenterCrop(self.target_size),
Image2Array(), Normalization(img_mean, img_std)
]
for op in ops:
results = op(results)
res = np.expand_dims(results['imgs'], axis=0).copy()
return [res]
def postprocess(self, output):
output = output.flatten() # numpy.ndarray
output = softmax(output)
classes = np.argpartition(output, -self.top_k)[-self.top_k:]
classes = classes[np.argsort(-output[classes])]
scores = output[classes]
return classes, scores
def main():
if not FLAGS.run_benchmark:
assert FLAGS.batch_size == 1
assert FLAGS.use_fp16 is False
else:
assert FLAGS.use_gpu is True
recognizer = VideoActionRecognizer(
FLAGS.model_dir,
short_size=FLAGS.short_size,
target_size=FLAGS.target_size,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn, )
if not FLAGS.run_benchmark:
classes, scores = recognizer.predict(FLAGS.video_file)
print("Current video file: {}".format(FLAGS.video_file))
print("\ttop-1 class: {0}".format(classes[0]))
print("\ttop-1 score: {0}".format(scores[0]))
else:
cm, gm, gu = get_current_memory_mb()
mems = {'cpu_rss_mb': cm, 'gpu_rss_mb': gm, 'gpu_util': gu * 100}
perf_info = recognizer.recognize_times.report()
model_dir = FLAGS.model_dir
mode = FLAGS.run_mode
model_info = {
'model_name': model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
data_info = {
'batch_size': FLAGS.batch_size,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
recognize_log = PaddleInferBenchmark(recognizer.config, model_info,
data_info, perf_info, mems)
recognize_log('Fight')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import matching
from . import tracker
from . import motion
from . import utils
from . import mtmct
from .matching import *
from .tracker import *
from .motion import *
from .utils import *
from .mtmct import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_matching
from . import deepsort_matching
from . import ocsort_matching
from .jde_matching import *
from .deepsort_matching import *
from .ocsort_matching import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py
"""
try:
import lap
except:
print(
'Warning: Unable to use JDE/FairMOT/ByteTrack, please install lap, for example: `pip install lap`, see https://github.com/gatagat/lap'
)
pass
import scipy
import numpy as np
from scipy.spatial.distance import cdist
from ..motion import kalman_filter
import warnings
warnings.filterwarnings("ignore")
__all__ = [
'merge_matches',
'linear_assignment',
'bbox_ious',
'iou_distance',
'embedding_distance',
'fuse_motion',
]
def merge_matches(m1, m2, shape):
O, P, Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix(
(np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix(
(np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1 * M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def linear_assignment(cost_matrix, thresh):
try:
import lap
except Exception as e:
raise RuntimeError(
'Unable to use JDE/FairMOT/ByteTrack, please install lap, for example: `pip install lap`, see https://github.com/gatagat/lap'
)
if cost_matrix.size == 0:
return np.empty(
(0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(
range(cost_matrix.shape[1]))
matches, unmatched_a, unmatched_b = [], [], []
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def bbox_ious(atlbrs, btlbrs):
boxes = np.ascontiguousarray(atlbrs, dtype=np.float)
query_boxes = np.ascontiguousarray(btlbrs, dtype=np.float)
N = boxes.shape[0]
K = query_boxes.shape[0]
ious = np.zeros((N, K), dtype=boxes.dtype)
if N * K == 0:
return ious
for k in range(K):
box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1))
for n in range(N):
iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(
boxes[n, 0], query_boxes[k, 0]) + 1)
if iw > 0:
ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(
boxes[n, 1], query_boxes[k, 1]) + 1)
if ih > 0:
ua = float((boxes[n, 2] - boxes[n, 0] + 1) * (boxes[
n, 3] - boxes[n, 1] + 1) + box_area - iw * ih)
ious[n, k] = iw * ih / ua
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU between two list[STrack].
"""
if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = bbox_ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def embedding_distance(tracks, detections, metric='euclidean'):
"""
Compute cost based on features between two list[STrack].
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray(
[track.curr_feat for track in detections], dtype=np.float)
track_features = np.asarray(
[track.smooth_feat for track in tracks], dtype=np.float)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features,
metric)) # Nomalized features
return cost_matrix
def fuse_motion(kf,
cost_matrix,
tracks,
detections,
only_position=False,
lambda_=0.98):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean,
track.covariance,
measurements,
only_position,
metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_
) * gating_distance
return cost_matrix
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/noahcao/OC_SORT/blob/master/trackers/ocsort_tracker/association.py
"""
import os
import numpy as np
def iou_batch(bboxes1, bboxes2):
"""
From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
"""
bboxes2 = np.expand_dims(bboxes2, 0)
bboxes1 = np.expand_dims(bboxes1, 1)
xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
w = np.maximum(0., xx2 - xx1)
h = np.maximum(0., yy2 - yy1)
wh = w * h
o = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) *
(bboxes1[..., 3] - bboxes1[..., 1]) +
(bboxes2[..., 2] - bboxes2[..., 0]) *
(bboxes2[..., 3] - bboxes2[..., 1]) - wh)
return (o)
def speed_direction_batch(dets, tracks):
tracks = tracks[..., np.newaxis]
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (
tracks[:, 1] + tracks[:, 3]) / 2.0
dx = CX1 - CX2
dy = CY1 - CY2
norm = np.sqrt(dx**2 + dy**2) + 1e-6
dx = dx / norm
dy = dy / norm
return dy, dx # size: num_track x num_det
def linear_assignment(cost_matrix):
try:
import lap
_, x, y = lap.lapjv(cost_matrix, extend_cost=True)
return np.array([[y[i], i] for i in x if i >= 0]) #
except ImportError:
from scipy.optimize import linear_sum_assignment
x, y = linear_sum_assignment(cost_matrix)
return np.array(list(zip(x, y)))
def associate(detections, trackers, iou_threshold, velocities, previous_obs,
vdc_weight):
if (len(trackers) == 0):
return np.empty(
(0, 2), dtype=int), np.arange(len(detections)), np.empty(
(0, 5), dtype=int)
Y, X = speed_direction_batch(detections, previous_obs)
inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
diff_angle_cos = inertia_X * X + inertia_Y * Y
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
diff_angle = np.arccos(diff_angle_cos)
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
valid_mask = np.ones(previous_obs.shape[0])
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
iou_matrix = iou_batch(detections, trackers)
scores = np.repeat(
detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
# iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
angle_diff_cost = angle_diff_cost.T
angle_diff_cost = angle_diff_cost * scores
if min(iou_matrix.shape) > 0:
a = (iou_matrix > iou_threshold).astype(np.int32)
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
matched_indices = np.stack(np.where(a), axis=1)
else:
matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost
))
else:
matched_indices = np.empty(shape=(0, 2))
unmatched_detections = []
for d, det in enumerate(detections):
if (d not in matched_indices[:, 0]):
unmatched_detections.append(d)
unmatched_trackers = []
for t, trk in enumerate(trackers):
if (t not in matched_indices[:, 1]):
unmatched_trackers.append(t)
# filter out matched with low IOU
matches = []
for m in matched_indices:
if (iou_matrix[m[0], m[1]] < iou_threshold):
unmatched_detections.append(m[0])
unmatched_trackers.append(m[1])
else:
matches.append(m.reshape(1, 2))
if (len(matches) == 0):
matches = np.empty((0, 2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(
unmatched_trackers)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import kalman_filter
from .kalman_filter import *
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
# PP-Human Pipeline Inference Benchmark
PP-Human Pipeline推理的benchmark与各功能对应的模型相关,可参考PaddleDetection的[推理benchmark](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/deploy/BENCHMARK_INFER.md)
# PP-Human Pipeline Inference Benchmark
PP-Human Pipeline Inference benchmark is related to the model corresponding to each function. Please refer to PaddleDetection[inference benchmark](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/deploy/BENCHMARK_INFER_en.md)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册