diff --git a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.sh b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.sh index ee20eccf64c1e08dff16c7427ae5107525281dee..9a27670fd2e4fb89192fc1eab5f16650c0e1a7c1 100644 --- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.sh +++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.sh @@ -15,7 +15,7 @@ for thread_num in 1 2 4 8 12 16 do for batch_size in 1 do - echo "----${modelname} thread num: ${thread_num} batch size: ${batch_size} mode:http ----" >>profile_log_$modelname + echo "#----${modelname} thread num: ${thread_num} batch size: ${batch_size} mode:http ----" >>profile_log_$modelname # Start one web service, If you start the service yourself, you can ignore it here. #python3 web_service.py >web.log 2>&1 & #sleep 3 @@ -23,15 +23,15 @@ do # --id is the serial number of the GPU card, Must be the same as the gpu id used by the server. nvidia-smi --id=3 --query-gpu=memory.used --format=csv -lms 1000 > gpu_use.log 2>&1 & nvidia-smi --id=3 --query-gpu=utilization.gpu --format=csv -lms 1000 > gpu_utilization.log 2>&1 & - echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py + echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTIL:', cpu_utilization)\n" > cpu_utilization.py # Start http client python3 benchmark.py run http $thread_num $batch_size > profile 2>&1 # Collect CPU metrics, Filter data that is zero momentarily, Record the maximum value of GPU memory and the average value of GPU utilization python3 cpu_utilization.py >> profile_log_$modelname grep -av '^0 %' gpu_utilization.log > gpu_utilization.log.tmp - awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname - awk -F' ' '{sum+=$1} END {print "GPU_UTILIZATION:", sum/NR, sum, NR }' gpu_utilization.log.tmp >> profile_log_$modelname + awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname + awk -F' ' '{sum+=$1} END {print "GPU_UTIL:", sum/NR, sum, NR }' gpu_utilization.log.tmp >> profile_log_$modelname # Show profiles python3 ../../../util/show_profile.py profile $thread_num >> profile_log_$modelname diff --git a/python/paddle_serving_client/utils/__init__.py b/python/paddle_serving_client/utils/__init__.py index 8af434cc7d08ca14aef7df2329e8656da930c0ce..8b3bb8c0f584505f6920647c3798c10722fe8ecf 100644 --- a/python/paddle_serving_client/utils/__init__.py +++ b/python/paddle_serving_client/utils/__init__.py @@ -41,9 +41,9 @@ def show_latency(latency_list): info = "latency:\n" info += "mean: {}ms\n".format(np.mean(latency_array)) info += "median: {}ms\n".format(np.median(latency_array)) - info += "80 percent: {}ms\n".format(np.percentile(latency_array, 80)) - info += "90 percent: {}ms\n".format(np.percentile(latency_array, 90)) - info += "99 percent: {}ms\n".format(np.percentile(latency_array, 99)) + info += "80_percent: {}ms\n".format(np.percentile(latency_array, 80)) + info += "90_percent: {}ms\n".format(np.percentile(latency_array, 90)) + info += "99_percent: {}ms\n".format(np.percentile(latency_array, 99)) sys.stderr.write(info) diff --git a/python/paddle_serving_server/benchmark_utils.py b/python/paddle_serving_server/benchmark_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b327dcc0f09194e231d30b3838687d920523c888 --- /dev/null +++ b/python/paddle_serving_server/benchmark_utils.py @@ -0,0 +1,281 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time +import logging + +import paddle +import paddle.inference as paddle_infer + +from pathlib import Path + +CUR_DIR = os.path.dirname(os.path.abspath(__file__)) +#LOG_PATH_ROOT = f"{CUR_DIR}/../../tools/output" +LOG_PATH_ROOT = f"." + +class PaddleInferBenchmark(object): + def __init__(self, + config, + model_info: dict={}, + data_info: dict={}, + perf_info: dict={}, + resource_info: dict={}, + **kwargs): + """ + Construct PaddleInferBenchmark Class to format logs. + args: + config(paddle.inference.Config): paddle inference config + model_info(dict): basic model info + {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info(dict): input data info + {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info(dict): performance result + {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info(dict): + cpu and gpu resources + {'cpu_rss': 100 + 'gpu_rss': 100 + 'gpu_util': 60} + """ + # PaddleInferBenchmark Log Version + self.log_version = "1.0.3" + + # Paddle Version + self.paddle_version = paddle.__version__ + self.paddle_commit = paddle.__git_commit__ + paddle_infer_info = paddle_infer.get_version() + self.paddle_branch = paddle_infer_info.strip().split(': ')[-1] + + # model info + self.model_info = model_info + + # data info + self.data_info = data_info + + # perf info + self.perf_info = perf_info + try: + # required value + self.model_name = model_info['model_name'] + self.precision = model_info['precision'] + + self.batch_size = data_info['batch_size'] + self.shape = data_info['shape'] + self.data_num = data_info['data_num'] + + self.inference_time_s = round(perf_info['inference_time_s'], 4) + except: + self.print_help() + raise ValueError( + "Set argument wrong, please check input argument and its type") + + self.preprocess_time_s = perf_info.get('preprocess_time_s', 0) + self.postprocess_time_s = perf_info.get('postprocess_time_s', 0) + self.total_time_s = perf_info.get('total_time_s', 0) + + self.inference_time_s_90 = perf_info.get("inference_time_s_90", 0) + self.inference_time_s_99 = perf_info.get("inference_time_s_99", 0) + self.succ_rate = perf_info.get("succ_rate", "") + self.qps = perf_info.get("qps", "") + + # conf info + self.config_status = self.parse_config(config) + + # mem info + if isinstance(resource_info, dict): + self.cpu_rss_mb = int("-1" if 'cpu_rss_mb' not in resource_info or resource_info.get('cpu_rss_mb').strip()=="" else resource_info.get('cpu_rss_mb', 0)) + self.cpu_vms_mb = int("-1" if 'cpu_vms_mb' not in resource_info or resource_info.get('cpu_vms_mb').strip()=="" else resource_info.get('cpu_vms_mb', 0)) + self.cpu_shared_mb = int("-1" if 'cpu_shared_mb' not in resource_info or resource_info.get('cpu_shared_mb').strip()=="" else resource_info.get('cpu_shared_mb', 0)) + self.cpu_dirty_mb = int("-1" if 'cpu_dirty_mb' not in resource_info or resource_info.get('cpu_dirty_mb').strip()=="" else resource_info.get('cpu_dirty_mb', 0)) + self.cpu_util = round(resource_info.get('cpu_util', 0), 2) + + self.gpu_rss_mb = int("-1" if 'gpu_rss_mb' not in resource_info or resource_info.get('gpu_rss_mb').strip()=="" else resource_info.get('gpu_rss_mb', 0)) + #self.gpu_util = round(resource_info.get('gpu_util', 0), 2) + self.gpu_util = resource_info.get('gpu_util', 0) + #self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2) + self.gpu_mem_util = resource_info.get('gpu_mem_util', 0) + else: + self.cpu_rss_mb = 0 + self.cpu_vms_mb = 0 + self.cpu_shared_mb = 0 + self.cpu_dirty_mb = 0 + self.cpu_util = 0 + + self.gpu_rss_mb = 0 + self.gpu_util = 0 + self.gpu_mem_util = 0 + + # init benchmark logger + self.benchmark_logger() + + def benchmark_logger(self): + """ + benchmark logger + """ + # remove other logging handler + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # Init logger + FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log" + Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True) + logging.basicConfig( + level=logging.INFO, + format=FORMAT, + handlers=[ + logging.FileHandler( + filename=log_output, mode='w'), + logging.StreamHandler(), + ]) + self.logger = logging.getLogger(__name__) + self.logger.info( + f"Paddle Inference benchmark log will be saved to {log_output}") + + def parse_config(self, config) -> dict: + """ + parse paddle predictor config + args: + config(paddle.inference.Config): paddle inference config + return: + config_status(dict): dict style config info + """ + if isinstance(config, paddle_infer.Config): + config_status = {} + config_status['runtime_device'] = "gpu" if config.use_gpu( + ) else "cpu" + config_status['ir_optim'] = config.ir_optim() + config_status['enable_tensorrt'] = config.tensorrt_engine_enabled() + config_status['precision'] = self.precision + config_status['enable_mkldnn'] = config.mkldnn_enabled() + config_status[ + 'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads( + ) + elif isinstance(config, dict): + config_status = {} + config_status['runtime_device'] = config.get('runtime_device', "") + config_status['ir_optim'] = config.get('ir_optim', "") + config_status['enable_tensorrt'] = config.get('enable_tensorrt', "") + config_status['precision'] = config.get('precision', "") + config_status['enable_mkldnn'] = config.get('enable_mkldnn', "") + config_status['cpu_math_library_num_threads'] = config.get( + 'cpu_math_library_num_threads', "") + else: + self.print_help() + raise ValueError( + "Set argument config wrong, please check input argument and its type" + ) + return config_status + + def report(self, identifier=None): + """ + print log report + args: + identifier(string): identify log + """ + if identifier: + identifier = f"[{identifier}]" + else: + identifier = "" + + self.logger.info("\n") + self.logger.info( + "---------------------- Paddle info ----------------------") + self.logger.info(f"{identifier} paddle_version: {self.paddle_version}") + self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}") + self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}") + self.logger.info(f"{identifier} log_api_version: {self.log_version}") + self.logger.info( + "----------------------- Conf info -----------------------") + self.logger.info( + f"{identifier} runtime_device: {self.config_status['runtime_device']}" + ) + self.logger.info( + f"{identifier} ir_optim: {self.config_status['ir_optim']}") + self.logger.info(f"{identifier} enable_memory_optim: {True}") + self.logger.info( + f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}" + ) + self.logger.info( + f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}") + self.logger.info( + f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}" + ) + self.logger.info( + "----------------------- Model info ----------------------") + self.logger.info(f"{identifier} model_name: {self.model_name}") + self.logger.info(f"{identifier} precision: {self.precision}") + self.logger.info( + "----------------------- Data info -----------------------") + self.logger.info(f"{identifier} batch_size: {self.batch_size}") + self.logger.info(f"{identifier} input_shape: {self.shape}") + self.logger.info(f"{identifier} data_num: {self.data_num}") + self.logger.info( + "----------------------- Perf info -----------------------") + self.logger.info( + f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%" + ) + self.logger.info( + f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%" + ) + self.logger.info( + f"{identifier} total time spent(s): {self.total_time_s}") + self.logger.info( + f"{identifier} preprocess_time(ms): {self.preprocess_time_s}, inference_time(ms): {self.inference_time_s}, postprocess_time(ms): {self.postprocess_time_s}" + ) + if self.inference_time_s_90: + self.logger.info( + f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}" + ) + if self.qps: + self.logger.info(f"{identifier} QPS: {self.qps}") + + def print_help(self): + """ + print function help + """ + print("""Usage: + ==== Print inference benchmark logs. ==== + config = paddle.inference.Config() + model_info = {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info = {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info = {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info = {'cpu_rss_mb': 100 + 'gpu_rss_mb': 100 + 'gpu_util': 60} + log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info) + log('Test') + """) + + def __call__(self, identifier=None): + """ + __call__ + args: + identifier(string): identify log + """ + self.report(identifier) diff --git a/python/paddle_serving_server/parse_profile.py b/python/paddle_serving_server/parse_profile.py new file mode 100644 index 0000000000000000000000000000000000000000..37e801c255272778c6926642beabdcf2f3f92cf0 --- /dev/null +++ b/python/paddle_serving_server/parse_profile.py @@ -0,0 +1,115 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import yaml +import argparse + +from .benchmark_utils import PaddleInferBenchmark +""" +{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}} +""" + + +class LogHandler(object): + def __init__(self): + self.fstr = "" + + def print(self): + print(self.fstr) + + def dump(self, filename): + with open(filename, 'w') as fout: + fout.write(self.fstr) + + def append(self, new_str): + self.fstr += new_str + "\n" + +def handle_benchmark(benchmark_config, benchmark_raw, indentifier): + model_info = { + 'model_name': benchmark_config["model_name"], + 'precision': benchmark_config["precision"] + } + data_info = { + 'batch_size': benchmark_config["batch_size"], + 'shape': benchmark_config["input_shape"], + 'data_num': benchmark_config["num_of_samples"] + } + perf_info = { + 'preprocess_time_s': "", + 'inference_time_s': float(benchmark_raw["median"][0:-2])/1000, # *** ms + 'postprocess_time_s': "", + 'total_time_s': "", + 'inference_time_s_90': float(benchmark_raw["90_percent"][0:-2])/1000, # *** ms + 'inference_time_s_99': float(benchmark_raw["99_percent"][0:-2])/1000, # *** ms + 'qps': benchmark_raw["AVG_QPS"] + } + resource_info = { + 'cpu_rss_mb': "", + 'cpu_vms_mb': "", + 'cpu_shared_mb': "", + 'cpu_dirty_mb': "", + 'cpu_util': benchmark_raw["CPU_UTIL"], + 'gpu_rss_mb': "", + 'gpu_util': benchmark_raw["GPU_UTIL"], + 'gpu_mem_util': benchmark_raw["GPU_MEM"] + } + + server_log = PaddleInferBenchmark( + benchmark_config, model_info, data_info, perf_info, resource_info) + server_log(indentifier) + +def parse_args(): # pylint: disable=doc-string-missing + parser = argparse.ArgumentParser("serve") + parser.add_argument( + "--benchmark_cfg", + type=str, + required=True, + help="benchmark config yaml. including general info, model info, data info, conf info" + ) + parser.add_argument( + "--benchmark_log", + type=str, + required=True, + help="benchmark log, generated by a web service or pipeline.") + parser.add_argument( + "--output", + type=str, + default="std_benchmark.log", + help="the output filename, default std_benchmark.log") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + benchmark_cfg_filename = args.benchmark_cfg + f = open(benchmark_cfg_filename, 'r') + benchmark_config = yaml.load(f) + f.close() + benchmark_log_filename = args.benchmark_log + f = open(benchmark_log_filename, 'r') + lines = f.readlines() + line_no = 0 + while line_no < len(lines): + if len(lines[line_no]) > 5 and lines[line_no].startswith("#---"): + iden = lines[line_no][5: -5] + line_no += 1 + sub_log = lines[line_no: line_no + 13] + sub_dict = yaml.safe_load("".join(sub_log)) + handle_benchmark(benchmark_config, sub_dict, iden) + line_no += 13 + else: + line_no += 1 +