提交 2957e638 编写于 作者: C chenjian

delete useless files

上级 35da85b7
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os import os
import sys import sys
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
from pathlib import Path
import paddle
import paddle.inference as paddle_infer
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_PATH_ROOT = f"{CUR_DIR}/../../output"
class PaddleInferBenchmark(object):
def __init__(self,
config,
model_info: dict = {},
data_info: dict = {},
perf_info: dict = {},
resource_info: dict = {},
**kwargs):
"""
Construct PaddleInferBenchmark Class to format logs.
args:
config(paddle.inference.Config): paddle inference config
model_info(dict): basic model info
{'model_name': 'resnet50'
'precision': 'fp32'}
data_info(dict): input data info
{'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info(dict): performance result
{'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info(dict):
cpu and gpu resources
{'cpu_rss': 100
'gpu_rss': 100
'gpu_util': 60}
"""
# PaddleInferBenchmark Log Version
self.log_version = "1.0.3"
# Paddle Version
self.paddle_version = paddle.__version__
self.paddle_commit = paddle.__git_commit__
paddle_infer_info = paddle_infer.get_version()
self.paddle_branch = paddle_infer_info.strip().split(': ')[-1]
# model info
self.model_info = model_info
# data info
self.data_info = data_info
# perf info
self.perf_info = perf_info
try:
# required value
self.model_name = model_info['model_name']
self.precision = model_info['precision']
self.batch_size = data_info['batch_size']
self.shape = data_info['shape']
self.data_num = data_info['data_num']
self.inference_time_s = round(perf_info['inference_time_s'], 4)
except:
self.print_help()
raise ValueError("Set argument wrong, please check input argument and its type")
self.preprocess_time_s = perf_info.get('preprocess_time_s', 0)
self.postprocess_time_s = perf_info.get('postprocess_time_s', 0)
self.with_tracker = True if 'tracking_time_s' in perf_info else False
self.tracking_time_s = perf_info.get('tracking_time_s', 0)
self.total_time_s = perf_info.get('total_time_s', 0)
self.inference_time_s_90 = perf_info.get("inference_time_s_90", "")
self.inference_time_s_99 = perf_info.get("inference_time_s_99", "")
self.succ_rate = perf_info.get("succ_rate", "")
self.qps = perf_info.get("qps", "")
# conf info
self.config_status = self.parse_config(config)
# mem info
if isinstance(resource_info, dict):
self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0))
self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0))
self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0))
self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0))
self.cpu_util = round(resource_info.get('cpu_util', 0), 2)
self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0))
self.gpu_util = round(resource_info.get('gpu_util', 0), 2)
self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2)
else:
self.cpu_rss_mb = 0
self.cpu_vms_mb = 0
self.cpu_shared_mb = 0
self.cpu_dirty_mb = 0
self.cpu_util = 0
self.gpu_rss_mb = 0
self.gpu_util = 0
self.gpu_mem_util = 0
# init benchmark logger
self.benchmark_logger()
def benchmark_logger(self):
"""
benchmark logger
"""
# remove other logging handler
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# Init logger
FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log"
Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True)
logging.basicConfig(level=logging.INFO,
format=FORMAT,
handlers=[
logging.FileHandler(filename=log_output, mode='w'),
logging.StreamHandler(),
])
self.logger = logging.getLogger(__name__)
self.logger.info(f"Paddle Inference benchmark log will be saved to {log_output}")
def parse_config(self, config) -> dict:
"""
parse paddle predictor config
args:
config(paddle.inference.Config): paddle inference config
return:
config_status(dict): dict style config info
"""
if isinstance(config, paddle_infer.Config):
config_status = {}
config_status['runtime_device'] = "gpu" if config.use_gpu() else "cpu"
config_status['ir_optim'] = config.ir_optim()
config_status['enable_tensorrt'] = config.tensorrt_engine_enabled()
config_status['precision'] = self.precision
config_status['enable_mkldnn'] = config.mkldnn_enabled()
config_status['cpu_math_library_num_threads'] = config.cpu_math_library_num_threads()
elif isinstance(config, dict):
config_status['runtime_device'] = config.get('runtime_device', "")
config_status['ir_optim'] = config.get('ir_optim', "")
config_status['enable_tensorrt'] = config.get('enable_tensorrt', "")
config_status['precision'] = config.get('precision', "")
config_status['enable_mkldnn'] = config.get('enable_mkldnn', "")
config_status['cpu_math_library_num_threads'] = config.get('cpu_math_library_num_threads', "")
else:
self.print_help()
raise ValueError("Set argument config wrong, please check input argument and its type")
return config_status
def report(self, identifier=None):
"""
print log report
args:
identifier(string): identify log
"""
if identifier:
identifier = f"[{identifier}]"
else:
identifier = ""
self.logger.info("\n")
self.logger.info("---------------------- Paddle info ----------------------")
self.logger.info(f"{identifier} paddle_version: {self.paddle_version}")
self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}")
self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}")
self.logger.info(f"{identifier} log_api_version: {self.log_version}")
self.logger.info("----------------------- Conf info -----------------------")
self.logger.info(f"{identifier} runtime_device: {self.config_status['runtime_device']}")
self.logger.info(f"{identifier} ir_optim: {self.config_status['ir_optim']}")
self.logger.info(f"{identifier} enable_memory_optim: {True}")
self.logger.info(f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}")
self.logger.info(f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}")
self.logger.info(
f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}")
self.logger.info("----------------------- Model info ----------------------")
self.logger.info(f"{identifier} model_name: {self.model_name}")
self.logger.info(f"{identifier} precision: {self.precision}")
self.logger.info("----------------------- Data info -----------------------")
self.logger.info(f"{identifier} batch_size: {self.batch_size}")
self.logger.info(f"{identifier} input_shape: {self.shape}")
self.logger.info(f"{identifier} data_num: {self.data_num}")
self.logger.info("----------------------- Perf info -----------------------")
self.logger.info(
f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%"
)
self.logger.info(
f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%"
)
self.logger.info(f"{identifier} total time spent(s): {self.total_time_s}")
if self.with_tracker:
self.logger.info(f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, "
f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, "
f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}, "
f"tracking_time(ms): {round(self.tracking_time_s*1000, 1)}")
else:
self.logger.info(f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, "
f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, "
f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}")
if self.inference_time_s_90:
self.looger.info(
f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}"
)
if self.qps:
self.logger.info(f"{identifier} QPS: {self.qps}")
def print_help(self):
"""
print function help
"""
print("""Usage:
==== Print inference benchmark logs. ====
config = paddle.inference.Config()
model_info = {'model_name': 'resnet50'
'precision': 'fp32'}
data_info = {'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info = {'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info = {'cpu_rss_mb': 100
'gpu_rss_mb': 100
'gpu_util': 60}
log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info)
log('Test')
""")
def __call__(self, identifier=None):
"""
__call__
args:
identifier(string): identify log
"""
self.report(identifier)
...@@ -19,18 +19,12 @@ import cv2 ...@@ -19,18 +19,12 @@ import cv2
import numpy as np import numpy as np
import paddle import paddle
import yaml import yaml
from benchmark_utils import PaddleInferBenchmark
from det_keypoint_unite_utils import argsparser
from infer import bench_log
from infer import Detector from infer import Detector
from infer import get_test_images
from infer import PredictConfig from infer import PredictConfig
from infer import print_arguments
from keypoint_infer import KeyPointDetector from keypoint_infer import KeyPointDetector
from keypoint_infer import PredictConfig_KeyPoint from keypoint_infer import PredictConfig_KeyPoint
from keypoint_postprocess import translate_to_ori_images from keypoint_postprocess import translate_to_ori_images
from preprocess import decode_image from preprocess import decode_image
from utils import get_current_memory_mb
from visualize import visualize_pose from visualize import visualize_pose
KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'} KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'}
...@@ -49,182 +43,3 @@ def predict_with_given_det(image, det_res, keypoint_detector, keypoint_batch_siz ...@@ -49,182 +43,3 @@ def predict_with_given_det(image, det_res, keypoint_detector, keypoint_batch_siz
[]] []]
keypoint_res['bbox'] = rect_vector keypoint_res['bbox'] = rect_vector
return keypoint_res return keypoint_res
def topdown_unite_predict(detector, topdown_keypoint_detector, image_list, keypoint_batch_size=1, save_res=False):
det_timer = detector.get_timer()
store_res = []
for i, img_file in enumerate(image_list):
# Decode image in advance in det + pose prediction
det_timer.preprocess_time_s.start()
image, _ = decode_image(img_file, {})
det_timer.preprocess_time_s.end()
if FLAGS.run_benchmark:
results = detector.predict_image([image], run_benchmark=True, repeats=10)
cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm
detector.gpu_mem += gm
detector.gpu_util += gu
else:
results = detector.predict_image([image], visual=False)
results = detector.filter_box(results, FLAGS.det_threshold)
if results['boxes_num'] > 0:
keypoint_res = predict_with_given_det(image, results, topdown_keypoint_detector, keypoint_batch_size,
FLAGS.run_benchmark)
if save_res:
save_name = img_file if isinstance(img_file, str) else i
store_res.append(
[save_name, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]])
else:
results["keypoint"] = [[], []]
keypoint_res = results
if FLAGS.run_benchmark:
cm, gm, gu = get_current_memory_mb()
topdown_keypoint_detector.cpu_mem += cm
topdown_keypoint_detector.gpu_mem += gm
topdown_keypoint_detector.gpu_util += gu
else:
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
visualize_pose(img_file, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, save_dir=FLAGS.output_dir)
if save_res:
"""
1) store_res: a list of image_data
2) image_data: [imageid, rects, [keypoints, scores]]
3) rects: list of rect [xmin, ymin, xmax, ymax]
4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list
5) scores: mean of all joint conf
"""
with open("det_keypoint_unite_image_results.json", 'w') as wf:
json.dump(store_res, wf, indent=4)
def topdown_unite_predict_video(detector, topdown_keypoint_detector, camera_id, keypoint_batch_size=1, save_res=False):
video_name = 'output.mp4'
if camera_id != -1:
capture = cv2.VideoCapture(camera_id)
else:
capture = cv2.VideoCapture(FLAGS.video_file)
video_name = os.path.split(FLAGS.video_file)[-1]
# Get Video info : resolution, fps, frame count
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(capture.get(cv2.CAP_PROP_FPS))
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print("fps: %d, frame_count: %d" % (fps, frame_count))
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 0
store_res = []
while (1):
ret, frame = capture.read()
if not ret:
break
index += 1
print('detect frame: %d' % (index))
frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = detector.predict_image([frame2], visual=False)
results = detector.filter_box(results, FLAGS.det_threshold)
if results['boxes_num'] == 0:
writer.write(frame)
continue
keypoint_res = predict_with_given_det(frame2, results, topdown_keypoint_detector, keypoint_batch_size,
FLAGS.run_benchmark)
im = visualize_pose(frame, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, returnimg=True)
if save_res:
store_res.append([index, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]])
writer.write(im)
if camera_id != -1:
cv2.imshow('Mask Detection', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
writer.release()
print('output_video saved to: {}'.format(out_path))
if save_res:
"""
1) store_res: a list of frame_data
2) frame_data: [frameid, rects, [keypoints, scores]]
3) rects: list of rect [xmin, ymin, xmax, ymax]
4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list
5) scores: mean of all joint conf
"""
with open("det_keypoint_unite_video_results.json", 'w') as wf:
json.dump(store_res, wf, indent=4)
def main():
deploy_file = os.path.join(FLAGS.det_model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
arch = yml_conf['arch']
detector = Detector(FLAGS.det_model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn,
threshold=FLAGS.det_threshold)
topdown_keypoint_detector = KeyPointDetector(FLAGS.keypoint_model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.keypoint_batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn,
use_dark=FLAGS.use_dark)
keypoint_arch = topdown_keypoint_detector.pred_config.arch
assert KEYPOINT_SUPPORT_MODELS[
keypoint_arch] == 'keypoint_topdown', 'Detection-Keypoint unite inference only supports topdown models.'
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
topdown_unite_predict_video(detector, topdown_keypoint_detector, FLAGS.camera_id, FLAGS.keypoint_batch_size,
FLAGS.save_res)
else:
# predict from image
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
topdown_unite_predict(detector, topdown_keypoint_detector, img_list, FLAGS.keypoint_batch_size, FLAGS.save_res)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
topdown_keypoint_detector.det_times.info(average=True)
else:
mode = FLAGS.run_mode
det_model_dir = FLAGS.det_model_dir
det_model_info = {'model_name': det_model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]}
bench_log(detector, img_list, det_model_info, name='Det')
keypoint_model_dir = FLAGS.keypoint_model_dir
keypoint_model_info = {
'model_name': keypoint_model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
bench_log(topdown_keypoint_detector, img_list, keypoint_model_info, FLAGS.keypoint_batch_size, 'KeyPoint')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU"
main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--det_model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."),
required=True)
parser.add_argument("--keypoint_model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."),
required=True)
parser.add_argument("--image_file", type=str, default=None, help="Path of image file.")
parser.add_argument("--image_dir",
type=str,
default=None,
help="Dir of image file, `image_file` has a higher priority.")
parser.add_argument("--keypoint_batch_size",
type=int,
default=8,
help=("batch_size for keypoint inference. In detection-keypoint unit"
"inference, the batch size in detection is 1. Then collate det "
"result in batch for keypoint inference."))
parser.add_argument("--video_file",
type=str,
default=None,
help="Path of video file, `video_file` or `camera_id` has a highest priority.")
parser.add_argument("--camera_id", type=int, default=-1, help="device id of camera to predict.")
parser.add_argument("--det_threshold", type=float, default=0.5, help="Threshold of score.")
parser.add_argument("--keypoint_threshold", type=float, default=0.5, help="Threshold of score.")
parser.add_argument("--output_dir", type=str, default="output", help="Directory of output visualization files.")
parser.add_argument("--run_mode",
type=str,
default='paddle',
help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument("--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.")
parser.add_argument("--run_benchmark",
type=ast.literal_eval,
default=False,
help="Whether to predict a image_file repeatedly for benchmark")
parser.add_argument("--enable_mkldnn", type=ast.literal_eval, default=False, help="Whether use mkldnn with CPU.")
parser.add_argument("--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument("--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument("--trt_max_shape", type=int, default=1280, help="max_shape for TensorRT.")
parser.add_argument("--trt_opt_shape", type=int, default=640, help="opt_shape for TensorRT.")
parser.add_argument("--trt_calib_mode",
type=bool,
default=False,
help="If the model is produced by TRT offline quantitative "
"calibration, trt_calib_mode need to set True.")
parser.add_argument('--use_dark',
type=ast.literal_eval,
default=True,
help='whether to use darkpose to get better keypoint position predict ')
parser.add_argument('--save_res',
type=bool,
default=False,
help=("whether to save predict results to json file"
"1) store_res: a list of image_data"
"2) image_data: [imageid, rects, [keypoints, scores]]"
"3) rects: list of rect [xmin, ymin, xmax, ymax]"
"4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list"
"5) scores: mean of all joint conf"))
return parser
...@@ -23,7 +23,6 @@ import cv2 ...@@ -23,7 +23,6 @@ import cv2
import numpy as np import numpy as np
import paddle import paddle
import yaml import yaml
from benchmark_utils import PaddleInferBenchmark
from keypoint_preprocess import EvalAffine from keypoint_preprocess import EvalAffine
from keypoint_preprocess import expand_crop from keypoint_preprocess import expand_crop
from keypoint_preprocess import TopDownEvalAffine from keypoint_preprocess import TopDownEvalAffine
...@@ -38,9 +37,6 @@ from preprocess import Permute ...@@ -38,9 +37,6 @@ from preprocess import Permute
from preprocess import preprocess from preprocess import preprocess
from preprocess import Resize from preprocess import Resize
from preprocess import WarpAffine from preprocess import WarpAffine
from utils import argsparser
from utils import get_current_memory_mb
from utils import Timer
from visualize import visualize_box from visualize import visualize_box
# Global dictionary # Global dictionary
...@@ -67,18 +63,6 @@ SUPPORT_MODELS = { ...@@ -67,18 +63,6 @@ SUPPORT_MODELS = {
} }
def bench_log(detector, img_list, model_info, batch_size=1, name=None):
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
data_info = {'batch_size': batch_size, 'shape': "dynamic_shape", 'data_num': perf_info['img_num']}
log = PaddleInferBenchmark(detector.config, model_info, data_info, perf_info, mems)
log(name)
class Detector(object): class Detector(object):
""" """
Args: Args:
...@@ -132,7 +116,6 @@ class Detector(object): ...@@ -132,7 +116,6 @@ class Detector(object):
enable_mkldnn=enable_mkldnn, enable_mkldnn=enable_mkldnn,
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16, enable_mkldnn_bfloat16=enable_mkldnn_bfloat16,
delete_shuffle_pass=delete_shuffle_pass) delete_shuffle_pass=delete_shuffle_pass)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
self.batch_size = batch_size self.batch_size = batch_size
self.output_dir = output_dir self.output_dir = output_dir
...@@ -228,9 +211,6 @@ class Detector(object): ...@@ -228,9 +211,6 @@ class Detector(object):
results[k] = np.concatenate(v) results[k] = np.concatenate(v)
return results return results
def get_timer(self):
return self.det_times
def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, save_file=None): def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, save_file=None):
batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
results = [] results = []
...@@ -238,31 +218,6 @@ class Detector(object): ...@@ -238,31 +218,6 @@ class Detector(object):
start_index = i * self.batch_size start_index = i * self.batch_size
end_index = min((i + 1) * self.batch_size, len(image_list)) end_index = min((i + 1) * self.batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index] batch_image_list = image_list[start_index:end_index]
if run_benchmark:
# preprocess
inputs = self.preprocess(batch_image_list) # warmup
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
result = self.predict(repeats=50) # warmup
self.det_times.inference_time_s.start()
result = self.predict(repeats=repeats)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
result_warmup = self.postprocess(inputs, result) # warmup
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(batch_image_list)
cm, gm, gu = get_current_memory_mb()
self.cpu_mem += cm
self.gpu_mem += gm
self.gpu_util += gu
else:
# preprocess # preprocess
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list) inputs = self.preprocess(batch_image_list)
...@@ -626,69 +581,3 @@ def visualize(image_list, result, labels, output_dir='output/', threshold=0.5): ...@@ -626,69 +581,3 @@ def visualize(image_list, result, labels, output_dir='output/', threshold=0.5):
out_path = os.path.join(output_dir, img_name) out_path = os.path.join(output_dir, img_name)
im.save(out_path, quality=95) im.save(out_path, quality=95)
print("save result to: " + out_path) print("save result to: " + out_path)
def print_arguments(args):
print('----------- Running Arguments -----------')
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------')
def main():
deploy_file = os.path.join(FLAGS.model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
arch = yml_conf['arch']
detector_func = 'Detector'
if arch == 'SOLOv2':
detector_func = 'DetectorSOLOv2'
elif arch == 'PicoDet':
detector_func = 'DetectorPicoDet'
detector = eval(detector_func)(FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn,
enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16,
threshold=FLAGS.threshold,
output_dir=FLAGS.output_dir)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
detector.predict_video(FLAGS.video_file, FLAGS.camera_id)
else:
# predict from image
if FLAGS.image_dir is None and FLAGS.image_file is not None:
assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None"
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
save_file = os.path.join(FLAGS.output_dir, 'results.json') if FLAGS.save_results else None
detector.predict_image(img_list, FLAGS.run_benchmark, repeats=100, save_file=save_file)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
else:
mode = FLAGS.run_mode
model_dir = FLAGS.model_dir
model_info = {'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]}
bench_log(detector, img_list, model_info, name='DET')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
assert not (FLAGS.enable_mkldnn == False and FLAGS.enable_mkldnn_bfloat16
== True), 'To enable mkldnn bfloat, please turn on both enable_mkldnn and enable_mkldnn_bfloat16'
main()
...@@ -33,9 +33,7 @@ from keypoint_postprocess import HRNetPostProcess ...@@ -33,9 +33,7 @@ from keypoint_postprocess import HRNetPostProcess
from visualize import visualize_pose from visualize import visualize_pose
from paddle.inference import Config from paddle.inference import Config
from paddle.inference import create_predictor from paddle.inference import create_predictor
from utils import argsparser, Timer, get_current_memory_mb from infer import Detector
from benchmark_utils import PaddleInferBenchmark
from infer import Detector, get_test_images, print_arguments
# Global dictionary # Global dictionary
KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'} KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'}
...@@ -169,32 +167,6 @@ class KeyPointDetector(Detector): ...@@ -169,32 +167,6 @@ class KeyPointDetector(Detector):
start_index = i * self.batch_size start_index = i * self.batch_size
end_index = min((i + 1) * self.batch_size, len(image_list)) end_index = min((i + 1) * self.batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index] batch_image_list = image_list[start_index:end_index]
if run_benchmark:
# preprocess
inputs = self.preprocess(batch_image_list) # warmup
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list)
self.det_times.preprocess_time_s.end()
# model prediction
result_warmup = self.predict(repeats=repeats) # warmup
self.det_times.inference_time_s.start()
result = self.predict(repeats=repeats)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
result_warmup = self.postprocess(inputs, result) # warmup
self.det_times.postprocess_time_s.start()
result = self.postprocess(inputs, result)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(batch_image_list)
cm, gm, gu = get_current_memory_mb()
self.cpu_mem += cm
self.gpu_mem += gm
self.gpu_util += gu
else:
# preprocess # preprocess
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(batch_image_list) inputs = self.preprocess(batch_image_list)
...@@ -328,54 +300,3 @@ def visualize(image_list, results, visual_thresh=0.6, save_dir='output'): ...@@ -328,54 +300,3 @@ def visualize(image_list, results, visual_thresh=0.6, save_dir='output'):
score = scores[i:i + 1] score = scores[i:i + 1]
im_results['keypoint'] = [skeleton, score] im_results['keypoint'] = [skeleton, score]
visualize_pose(image_file, im_results, visual_thresh=visual_thresh, save_dir=save_dir) visualize_pose(image_file, im_results, visual_thresh=visual_thresh, save_dir=save_dir)
def main():
detector = KeyPointDetector(FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn,
threshold=FLAGS.threshold,
output_dir=FLAGS.output_dir,
use_dark=FLAGS.use_dark)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
detector.predict_video(FLAGS.video_file, FLAGS.camera_id)
else:
# predict from image
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
else:
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
model_dir = FLAGS.model_dir
mode = FLAGS.run_mode
model_info = {'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]}
data_info = {'batch_size': 1, 'shape': "dynamic_shape", 'data_num': perf_info['img_num']}
det_log = PaddleInferBenchmark(detector.config, model_info, data_info, perf_info, mems)
det_log('KeyPoint')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import logging
import os
import sys
import paddle.distributed as dist
__all__ = ['setup_logger']
logger_initialized = []
def setup_logger(name="ppdet", output=None):
"""
Initialize logger and set its verbosity level to INFO.
Args:
output (str): a file name or a directory to save log. If None, will not save log file.
If ends with ".txt" or ".log", assumed to be a file name.
Otherwise, logs will be saved to `output/log.txt`.
name (str): the root module name of this logger
Returns:
logging.Logger: a logger
"""
logger = logging.getLogger(name)
if name in logger_initialized:
return logger
logger.setLevel(logging.INFO)
logger.propagate = False
formatter = logging.Formatter("[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S")
# stdout logging: master only
local_rank = dist.get_rank()
if local_rank == 0:
ch = logging.StreamHandler(stream=sys.stdout)
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)
# file logging: all workers
if output is not None:
if output.endswith(".txt") or output.endswith(".log"):
filename = output
else:
filename = os.path.join(output, "log.txt")
if local_rank > 0:
filename = filename + ".rank{}".format(local_rank)
os.makedirs(os.path.dirname(filename))
fh = logging.FileHandler(filename, mode='a')
fh.setLevel(logging.DEBUG)
fh.setFormatter(logging.Formatter())
logger.addHandler(fh)
logger_initialized.append(name)
return logger
...@@ -23,19 +23,12 @@ import numpy as np ...@@ -23,19 +23,12 @@ import numpy as np
import paddle import paddle
import yaml import yaml
from det_keypoint_unite_infer import predict_with_given_det from det_keypoint_unite_infer import predict_with_given_det
from infer import bench_log
from infer import Detector from infer import Detector
from infer import get_test_images
from infer import PredictConfig
from infer import print_arguments
from keypoint_infer import KeyPointDetector from keypoint_infer import KeyPointDetector
from keypoint_infer import PredictConfig_KeyPoint
from keypoint_postprocess import translate_to_ori_images
from preprocess import base64_to_cv2 from preprocess import base64_to_cv2
from preprocess import decode_image from preprocess import decode_image
from visualize import visualize_pose from visualize import visualize_pose
import paddlehub.vision.transforms as T
from paddlehub.module.module import moduleinfo from paddlehub.module.module import moduleinfo
from paddlehub.module.module import runnable from paddlehub.module.module import runnable
from paddlehub.module.module import serving from paddlehub.module.module import serving
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import os
import time
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."),
required=True)
parser.add_argument("--image_file", type=str, default=None, help="Path of image file.")
parser.add_argument("--image_dir",
type=str,
default=None,
help="Dir of image file, `image_file` has a higher priority.")
parser.add_argument("--batch_size", type=int, default=1, help="batch_size for inference.")
parser.add_argument("--video_file",
type=str,
default=None,
help="Path of video file, `video_file` or `camera_id` has a highest priority.")
parser.add_argument("--camera_id", type=int, default=-1, help="device id of camera to predict.")
parser.add_argument("--threshold", type=float, default=0.5, help="Threshold of score.")
parser.add_argument("--output_dir", type=str, default="output", help="Directory of output visualization files.")
parser.add_argument("--run_mode",
type=str,
default='paddle',
help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument("--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Deprecated, please use `--device`.")
parser.add_argument("--run_benchmark",
type=ast.literal_eval,
default=False,
help="Whether to predict a image_file repeatedly for benchmark")
parser.add_argument("--enable_mkldnn", type=ast.literal_eval, default=False, help="Whether use mkldnn with CPU.")
parser.add_argument("--enable_mkldnn_bfloat16",
type=ast.literal_eval,
default=False,
help="Whether use mkldnn bfloat16 inference with CPU.")
parser.add_argument("--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument("--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument("--trt_max_shape", type=int, default=1280, help="max_shape for TensorRT.")
parser.add_argument("--trt_opt_shape", type=int, default=640, help="opt_shape for TensorRT.")
parser.add_argument("--trt_calib_mode",
type=bool,
default=False,
help="If the model is produced by TRT offline quantitative "
"calibration, trt_calib_mode need to set True.")
parser.add_argument('--save_images', action='store_true', help='Save visualization image results.')
parser.add_argument('--save_mot_txts', action='store_true', help='Save tracking results (txt).')
parser.add_argument('--save_mot_txt_per_img',
action='store_true',
help='Save tracking results (txt) for each image.')
parser.add_argument('--scaled',
type=bool,
default=False,
help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 "
"True in general detector.")
parser.add_argument("--tracker_config", type=str, default=None, help=("tracker donfig"))
parser.add_argument("--reid_model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."))
parser.add_argument("--reid_batch_size", type=int, default=50, help="max batch_size for reid model inference.")
parser.add_argument('--use_dark',
type=ast.literal_eval,
default=True,
help='whether to use darkpose to get better keypoint position predict ')
parser.add_argument("--action_file", type=str, default=None, help="Path of input file for action recognition.")
parser.add_argument("--window_size",
type=int,
default=50,
help="Temporal size of skeleton feature for action recognition.")
parser.add_argument("--random_pad",
type=ast.literal_eval,
default=False,
help="Whether do random padding for action recognition.")
parser.add_argument("--save_results",
type=bool,
default=False,
help="Whether save detection result to file using coco format")
return parser
class Times(object):
def __init__(self):
self.time = 0.
# start time
self.st = 0.
# end time
self.et = 0.
def start(self):
self.st = time.time()
def end(self, repeats=1, accumulative=True):
self.et = time.time()
if accumulative:
self.time += (self.et - self.st) / repeats
else:
self.time = (self.et - self.st) / repeats
def reset(self):
self.time = 0.
self.st = 0.
self.et = 0.
def value(self):
return round(self.time, 4)
class Timer(Times):
def __init__(self, with_tracker=False):
super(Timer, self).__init__()
self.with_tracker = with_tracker
self.preprocess_time_s = Times()
self.inference_time_s = Times()
self.postprocess_time_s = Times()
self.tracking_time_s = Times()
self.img_num = 0
def info(self, average=False):
pre_time = self.preprocess_time_s.value()
infer_time = self.inference_time_s.value()
post_time = self.postprocess_time_s.value()
track_time = self.tracking_time_s.value()
total_time = pre_time + infer_time + post_time
if self.with_tracker:
total_time = total_time + track_time
total_time = round(total_time, 4)
print("------------------ Inference Time Info ----------------------")
print("total_time(ms): {}, img_num: {}".format(total_time * 1000, self.img_num))
preprocess_time = round(pre_time / max(1, self.img_num), 4) if average else pre_time
postprocess_time = round(post_time / max(1, self.img_num), 4) if average else post_time
inference_time = round(infer_time / max(1, self.img_num), 4) if average else infer_time
tracking_time = round(track_time / max(1, self.img_num), 4) if average else track_time
average_latency = total_time / max(1, self.img_num)
qps = 0
if total_time > 0:
qps = 1 / average_latency
print("average latency time(ms): {:.2f}, QPS: {:2f}".format(average_latency * 1000, qps))
if self.with_tracker:
print(
"preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}, tracking_time(ms): {:.2f}"
.format(preprocess_time * 1000, inference_time * 1000, postprocess_time * 1000, tracking_time * 1000))
else:
print("preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}".format(
preprocess_time * 1000, inference_time * 1000, postprocess_time * 1000))
def report(self, average=False):
dic = {}
pre_time = self.preprocess_time_s.value()
infer_time = self.inference_time_s.value()
post_time = self.postprocess_time_s.value()
track_time = self.tracking_time_s.value()
dic['preprocess_time_s'] = round(pre_time / max(1, self.img_num), 4) if average else pre_time
dic['inference_time_s'] = round(infer_time / max(1, self.img_num), 4) if average else infer_time
dic['postprocess_time_s'] = round(post_time / max(1, self.img_num), 4) if average else post_time
dic['img_num'] = self.img_num
total_time = pre_time + infer_time + post_time
if self.with_tracker:
dic['tracking_time_s'] = round(track_time / max(1, self.img_num), 4) if average else track_time
total_time = total_time + track_time
dic['total_time_s'] = round(total_time, 4)
return dic
def get_current_memory_mb():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
import pynvml
import psutil
import GPUtil
gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0))
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
gpu_percent = 0
gpus = GPUtil.getGPUs()
if gpu_id is not None and len(gpus) > 0:
gpu_percent = gpus[gpu_id].load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024. / 1024.
return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册