diff --git a/deploy/python/benchmark_utils.py b/deploy/python/benchmark_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2bed99aa22384b74694db6cb23558b6e1f309026 --- /dev/null +++ b/deploy/python/benchmark_utils.py @@ -0,0 +1,279 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time +import logging + +import paddle +import paddle.inference as paddle_infer + +from pathlib import Path + +CUR_DIR = os.path.dirname(os.path.abspath(__file__)) +LOG_PATH_ROOT = f"{CUR_DIR}/../../output" + + +class PaddleInferBenchmark(object): + def __init__(self, + config, + model_info: dict={}, + data_info: dict={}, + perf_info: dict={}, + resource_info: dict={}, + **kwargs): + """ + Construct PaddleInferBenchmark Class to format logs. + args: + config(paddle.inference.Config): paddle inference config + model_info(dict): basic model info + {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info(dict): input data info + {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info(dict): performance result + {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info(dict): + cpu and gpu resources + {'cpu_rss': 100 + 'gpu_rss': 100 + 'gpu_util': 60} + """ + # PaddleInferBenchmark Log Version + self.log_version = "1.0.3" + + # Paddle Version + self.paddle_version = paddle.__version__ + self.paddle_commit = paddle.__git_commit__ + paddle_infer_info = paddle_infer.get_version() + self.paddle_branch = paddle_infer_info.strip().split(': ')[-1] + + # model info + self.model_info = model_info + + # data info + self.data_info = data_info + + # perf info + self.perf_info = perf_info + + try: + # required value + self.model_name = model_info['model_name'] + self.precision = model_info['precision'] + + self.batch_size = data_info['batch_size'] + self.shape = data_info['shape'] + self.data_num = data_info['data_num'] + + self.inference_time_s = round(perf_info['inference_time_s'], 4) + except: + self.print_help() + raise ValueError( + "Set argument wrong, please check input argument and its type") + + self.preprocess_time_s = perf_info.get('preprocess_time_s', 0) + self.postprocess_time_s = perf_info.get('postprocess_time_s', 0) + self.total_time_s = perf_info.get('total_time_s', 0) + + self.inference_time_s_90 = perf_info.get("inference_time_s_90", "") + self.inference_time_s_99 = perf_info.get("inference_time_s_99", "") + self.succ_rate = perf_info.get("succ_rate", "") + self.qps = perf_info.get("qps", "") + + # conf info + self.config_status = self.parse_config(config) + + # mem info + if isinstance(resource_info, dict): + self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0)) + self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0)) + self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0)) + self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0)) + self.cpu_util = round(resource_info.get('cpu_util', 0), 2) + + self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0)) + self.gpu_util = round(resource_info.get('gpu_util', 0), 2) + self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2) + else: + self.cpu_rss_mb = 0 + self.cpu_vms_mb = 0 + self.cpu_shared_mb = 0 + self.cpu_dirty_mb = 0 + self.cpu_util = 0 + + self.gpu_rss_mb = 0 + self.gpu_util = 0 + self.gpu_mem_util = 0 + + # init benchmark logger + self.benchmark_logger() + + def benchmark_logger(self): + """ + benchmark logger + """ + # remove other logging handler + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # Init logger + FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log" + Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True) + logging.basicConfig( + level=logging.INFO, + format=FORMAT, + handlers=[ + logging.FileHandler( + filename=log_output, mode='w'), + logging.StreamHandler(), + ]) + self.logger = logging.getLogger(__name__) + self.logger.info( + f"Paddle Inference benchmark log will be saved to {log_output}") + + def parse_config(self, config) -> dict: + """ + parse paddle predictor config + args: + config(paddle.inference.Config): paddle inference config + return: + config_status(dict): dict style config info + """ + if isinstance(config, paddle_infer.Config): + config_status = {} + config_status['runtime_device'] = "gpu" if config.use_gpu( + ) else "cpu" + config_status['ir_optim'] = config.ir_optim() + config_status['enable_tensorrt'] = config.tensorrt_engine_enabled() + config_status['precision'] = self.precision + config_status['enable_mkldnn'] = config.mkldnn_enabled() + config_status[ + 'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads( + ) + elif isinstance(config, dict): + config_status['runtime_device'] = config.get('runtime_device', "") + config_status['ir_optim'] = config.get('ir_optim', "") + config_status['enable_tensorrt'] = config.get('enable_tensorrt', "") + config_status['precision'] = config.get('precision', "") + config_status['enable_mkldnn'] = config.get('enable_mkldnn', "") + config_status['cpu_math_library_num_threads'] = config.get( + 'cpu_math_library_num_threads', "") + else: + self.print_help() + raise ValueError( + "Set argument config wrong, please check input argument and its type" + ) + return config_status + + def report(self, identifier=None): + """ + print log report + args: + identifier(string): identify log + """ + if identifier: + identifier = f"[{identifier}]" + else: + identifier = "" + + self.logger.info("\n") + self.logger.info( + "---------------------- Paddle info ----------------------") + self.logger.info(f"{identifier} paddle_version: {self.paddle_version}") + self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}") + self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}") + self.logger.info(f"{identifier} log_api_version: {self.log_version}") + self.logger.info( + "----------------------- Conf info -----------------------") + self.logger.info( + f"{identifier} runtime_device: {self.config_status['runtime_device']}" + ) + self.logger.info( + f"{identifier} ir_optim: {self.config_status['ir_optim']}") + self.logger.info(f"{identifier} enable_memory_optim: {True}") + self.logger.info( + f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}" + ) + self.logger.info( + f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}") + self.logger.info( + f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}" + ) + self.logger.info( + "----------------------- Model info ----------------------") + self.logger.info(f"{identifier} model_name: {self.model_name}") + self.logger.info(f"{identifier} precision: {self.precision}") + self.logger.info( + "----------------------- Data info -----------------------") + self.logger.info(f"{identifier} batch_size: {self.batch_size}") + self.logger.info(f"{identifier} input_shape: {self.shape}") + self.logger.info(f"{identifier} data_num: {self.data_num}") + self.logger.info( + "----------------------- Perf info -----------------------") + self.logger.info( + f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%" + ) + self.logger.info( + f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%" + ) + self.logger.info( + f"{identifier} total time spent(s): {self.total_time_s}") + self.logger.info( + f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, inference_time(ms): {round(self.inference_time_s*1000, 1)}, postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}" + ) + if self.inference_time_s_90: + self.looger.info( + f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}" + ) + if self.qps: + self.logger.info(f"{identifier} QPS: {self.qps}") + + def print_help(self): + """ + print function help + """ + print("""Usage: + ==== Print inference benchmark logs. ==== + config = paddle.inference.Config() + model_info = {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info = {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info = {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info = {'cpu_rss_mb': 100 + 'gpu_rss_mb': 100 + 'gpu_util': 60} + log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info) + log('Test') + """) + + def __call__(self, identifier=None): + """ + __call__ + args: + identifier(string): identify log + """ + self.report(identifier) diff --git a/deploy/python/infer.py b/deploy/python/infer.py index a86beb809a7e955d22799cf67fe48c811c691e6a..ee44f61b14e6365315a48273a0f0e5c22ce95dba 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -25,9 +25,10 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor +from benchmark_utils import PaddleInferBenchmark from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride from visualize import visualize_box_mask -from utils import argsparser, Timer, get_current_memory_mb, LoggerHelper +from utils import argsparser, Timer, get_current_memory_mb # Global dictionary SUPPORT_MODELS = { @@ -69,7 +70,7 @@ class Detector(object): cpu_threads=1, enable_mkldnn=False): self.pred_config = pred_config - self.predictor = load_predictor( + self.predictor, self.config = load_predictor( model_dir, run_mode=run_mode, min_subgraph_size=self.pred_config.min_subgraph_size, @@ -122,14 +123,14 @@ class Detector(object): MaskRCNN's results include 'masks': np.ndarray: shape: [N, im_h, im_w] ''' - self.det_times.preprocess_time.start() + self.det_times.postprocess_time_s.start() inputs = self.preprocess(image) np_boxes, np_masks = None, None input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - self.det_times.preprocess_time.end() + self.det_times.postprocess_time_s.end() for i in range(warmup): self.predictor.run() output_names = self.predictor.get_output_names() @@ -139,7 +140,7 @@ class Detector(object): masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() - self.det_times.inference_time.start() + self.det_times.inference_time_s.start() for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -148,9 +149,9 @@ class Detector(object): if self.pred_config.mask: masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() - self.det_times.inference_time.end(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) - self.det_times.postprocess_time.start() + self.det_times.postprocess_time_s.start() results = [] if reduce(lambda x, y: x * y, np_boxes.shape) < 6: print('[WARNNING] No object detected.') @@ -158,7 +159,7 @@ class Detector(object): else: results = self.postprocess( np_boxes, np_masks, inputs, threshold=threshold) - self.det_times.postprocess_time.end() + self.det_times.postprocess_time_s.end() self.det_times.img_num += 1 return results @@ -190,7 +191,7 @@ class DetectorSOLOv2(Detector): cpu_threads=1, enable_mkldnn=False): self.pred_config = pred_config - self.predictor = load_predictor( + self.predictor, self.config = load_predictor( model_dir, run_mode=run_mode, min_subgraph_size=self.pred_config.min_subgraph_size, @@ -214,14 +215,14 @@ class DetectorSOLOv2(Detector): 'cate_label': label of segm, shape:[N] 'cate_score': confidence score of segm, shape:[N] ''' - self.det_times.preprocess_time.start() + self.det_times.postprocess_time_s.start() inputs = self.preprocess(image) np_label, np_score, np_segms = None, None, None input_names = self.predictor.get_input_names() for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - self.det_times.preprocess_time.end() + self.det_times.postprocess_time_s.end() for i in range(warmup): self.predictor.run() output_names = self.predictor.get_output_names() @@ -232,7 +233,7 @@ class DetectorSOLOv2(Detector): np_segms = self.predictor.get_output_handle(output_names[ 3]).copy_to_cpu() - self.det_times.inference_time.start() + self.det_times.inference_time_s.start() for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() @@ -242,7 +243,7 @@ class DetectorSOLOv2(Detector): 2]).copy_to_cpu() np_segms = self.predictor.get_output_handle(output_names[ 3]).copy_to_cpu() - self.det_times.inference_time.end(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) self.det_times.img_num += 1 return dict(segm=np_segms, label=np_label, score=np_score) @@ -391,7 +392,7 @@ def load_predictor(model_dir, # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) - return predictor + return predictor, config def get_test_images(infer_dir, infer_img): @@ -544,9 +545,22 @@ def main(): 'gpu_rss': detector.gpu_mem / len(img_list), 'gpu_util': detector.gpu_util * 100 / len(img_list) } - det_logger = LoggerHelper( - FLAGS, detector.det_times.report(average=True), mems) - det_logger.report() + + perf_info = detector.det_times.report(average=True) + model_dir = FLAGS.model_dir + mode = FLAGS.run_mode + model_info = { + 'model_name': model_dir.strip('/').split('/')[-1], + 'precision': mode.split('_')[-1] + } + data_info = { + 'batch_size': 1, + 'shape': "dynamic_shape", + 'data_num': perf_info['img_num'] + } + det_log = PaddleInferBenchmark( + detector.config, model_info, data_info, perf_info, mems) + det_log('Det') if __name__ == '__main__': diff --git a/deploy/python/utils.py b/deploy/python/utils.py index 66dbdbd0a6dff8302806b52297d036b56bb5e051..ab41bef34e436cb99593de53a574aa0a7196304c 100644 --- a/deploy/python/utils.py +++ b/deploy/python/utils.py @@ -131,25 +131,26 @@ class Times(object): class Timer(Times): def __init__(self): super(Timer, self).__init__() - self.preprocess_time = Times() - self.inference_time = Times() - self.postprocess_time = Times() + self.preprocess_time_s = Times() + self.inference_time_s = Times() + self.postprocess_time_s = Times() self.img_num = 0 def info(self, average=False): - total_time = self.preprocess_time.value() + self.inference_time.value( - ) + self.postprocess_time.value() + total_time = self.preprocess_time_s.value( + ) + self.inference_time_s.value() + self.postprocess_time_s.value() total_time = round(total_time, 4) print("------------------ Inference Time Info ----------------------") print("total_time(ms): {}, img_num: {}".format(total_time * 1000, self.img_num)) - preprocess_time = round(self.preprocess_time.value() / self.img_num, - 4) if average else self.preprocess_time.value() + preprocess_time = round( + self.preprocess_time_s.value() / self.img_num, + 4) if average else self.preprocess_time_s.value() postprocess_time = round( - self.postprocess_time.value() / self.img_num, - 4) if average else self.postprocess_time.value() - inference_time = round(self.inference_time.value() / self.img_num, - 4) if average else self.inference_time.value() + self.postprocess_time_s.value() / self.img_num, + 4) if average else self.postprocess_time_s.value() + inference_time = round(self.inference_time_s.value() / self.img_num, + 4) if average else self.inference_time_s.value() average_latency = total_time / self.img_num print("average latency time(ms): {:.2f}, QPS: {:2f}".format( @@ -161,19 +162,19 @@ class Timer(Times): def report(self, average=False): dic = {} - dic['preprocess_time'] = round( - self.preprocess_time.value() / self.img_num, - 4) if average else self.preprocess_time.value() - dic['postprocess_time'] = round( - self.postprocess_time.value() / self.img_num, - 4) if average else self.postprocess_time.value() - dic['inference_time'] = round( - self.inference_time.value() / self.img_num, - 4) if average else self.inference_time.value() + dic['preprocess_time_s'] = round( + self.preprocess_time_s.value() / self.img_num, + 4) if average else self.preprocess_time_s.value() + dic['postprocess_time_s'] = round( + self.postprocess_time_s.value() / self.img_num, + 4) if average else self.postprocess_time_s.value() + dic['inference_time_s'] = round( + self.inference_time_s.value() / self.img_num, + 4) if average else self.inference_time_s.value() dic['img_num'] = self.img_num - total_time = self.preprocess_time.value() + self.inference_time.value( - ) + self.postprocess_time.value() - dic['total_time'] = round(total_time, 4) + total_time = self.preprocess_time_s.value( + ) + self.inference_time_s.value() + self.postprocess_time_s.value() + dic['total_time_s'] = round(total_time, 4) return dic @@ -185,7 +186,7 @@ def get_current_memory_mb(): import pynvml import psutil import GPUtil - gpu_id = os.environ.get('CUDA_VISIBLE_DEVICES', 0) + gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0)) pid = os.getpid() p = psutil.Process(pid) @@ -201,62 +202,3 @@ def get_current_memory_mb(): meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) gpu_mem = meminfo.used / 1024. / 1024. return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4) - - -class LoggerHelper(object): - def __init__(self, args, times, mem_info=None): - """ - args: utility.parse_args() - times: The Timer class - """ - self.args = args - self.times = times - self.model_name = args.model_dir.strip('/').split('/')[-1] - self.batch_size = 1 - self.shape = "dynamic shape" - if args.run_mode == 'fluid': - self.precision = "fp32" - self.use_tensorrt = False - else: - self.precision = args.run_mode.split('_')[-1] - self.use_tensorrt = True - - self.device = "gpu" if args.use_gpu else "cpu" - self.preprocess_time = round(times['preprocess_time'], 4) - self.inference_time = round(times['inference_time'], 4) - self.postprocess_time = round(times['postprocess_time'], 4) - self.data_num = times['img_num'] - self.total_time = round(times['total_time'], 4) - self.mem_info = {"cpu_rss": 0, "gpu_rss": 0, "gpu_util": 0} - if mem_info is not None: - self.mem_info = mem_info - - def report(self): - print("\n") - print("----------------------- Config info -----------------------") - print("runtime_device:", self.device) - print("ir_optim:", True) - print("enable_memory_optim:", True) - print("enable_tensorrt:", self.use_tensorrt) - print("precision:", self.precision) - print("enable_mkldnn:", self.args.enable_mkldnn) - print("cpu_math_library_num_threads:", self.args.cpu_threads) - - print("----------------------- Model info ----------------------") - print("model_name:", self.model_name) - - print("------------------------ Data info ----------------------") - print("batch_size:", self.batch_size) - print("input_shape:", self.shape) - - print("----------------------- Perf info -----------------------") - print("[cpu_rss(MB): {} gpu_rss(MB): {}, gpu_util: {}%".format( - round(self.mem_info['cpu_rss'], 4), - round(self.mem_info['gpu_rss'], 4), - round(self.mem_info['gpu_util'], 2))) - print("total number of predicted data: {} and total time spent(s): {}". - format(self.data_num, self.total_time)) - print( - "preproce_time(ms): {}, inference_time(ms): {}, postprocess_time(ms): {}". - format(self.preprocess_time * 1000, self.inference_time * 1000, - self.postprocess_time * 1000))