From 6e6433127c90d17093658a1124d8b07238a650e7 Mon Sep 17 00:00:00 2001 From: bjjwwang Date: Thu, 22 Apr 2021 16:01:08 +0000 Subject: [PATCH] fix imagenet --- python/examples/bert/bert_web_service.py | 2 +- python/examples/bert/new_benchmark.py | 4 +- python/examples/bert/new_benchmark.sh | 12 +- python/examples/bert/parse_profile.py | 44 ++++-- .../examples/pipeline/imagenet/benchmark.py | 98 ++++++++++++++ .../examples/pipeline/imagenet/benchmark.sh | 36 +++++ .../pipeline/imagenet/benchmark_config.yaml | 32 +++++ python/examples/pipeline/imagenet/config.yml | 4 +- .../pipeline/imagenet/pipeline_http_client.py | 19 +++ .../pipeline/imagenet/pipeline_rpc_client.py | 2 +- .../pipeline/imagenet/resnet50_web_service.py | 3 +- python/paddle_serving_server/parse_profile.py | 126 ++++++++++++++++++ python/paddle_serving_server/profiler.py | 44 ++++++ python/paddle_serving_server/version.py | 2 +- .../parse_profile.py | 126 ++++++++++++++++++ python/paddle_serving_server_gpu/profiler.py | 42 ++++++ python/paddle_serving_server_gpu/version.py | 2 +- 17 files changed, 573 insertions(+), 25 deletions(-) create mode 100644 python/examples/pipeline/imagenet/benchmark.py create mode 100644 python/examples/pipeline/imagenet/benchmark.sh create mode 100644 python/examples/pipeline/imagenet/benchmark_config.yaml create mode 100644 python/examples/pipeline/imagenet/pipeline_http_client.py create mode 100644 python/paddle_serving_server/parse_profile.py create mode 100644 python/paddle_serving_server_gpu/parse_profile.py diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py index a5674e13..4b61d0e6 100644 --- a/python/examples/bert/bert_web_service.py +++ b/python/examples/bert/bert_web_service.py @@ -44,7 +44,7 @@ class BertService(WebService): return feed_dict, fetch, is_batch bert_service = BertService(name="bert") -bert_service.setup_profile(30) +bert_service.setup_profile(10) bert_service.load() bert_service.load_model_config(sys.argv[1]) bert_service.prepare_server( diff --git a/python/examples/bert/new_benchmark.py b/python/examples/bert/new_benchmark.py index ab367ce0..cd838b31 100644 --- a/python/examples/bert/new_benchmark.py +++ b/python/examples/bert/new_benchmark.py @@ -25,7 +25,7 @@ def run_http(idx, batch_size): {"feed":[{"words": "hello"}], "fetch":["pooled_output"]} """ print("start thread ({})".format(idx)) - url = "http://127.0.0.1:9292/bert/prediction" + url = "http://127.0.0.1:9696/bert/prediction" start = time.time() with open("data-c.txt", 'r') as fin: start = time.time() @@ -39,7 +39,7 @@ def run_http(idx, batch_size): r = requests.post(url=url, data=json.dumps(data), headers={"Content-Type": "application/json"}) start_idx += batch_size end = time.time() - if end - start > 40: + if end - start > 15: break end = time.time() return [[end - start]] diff --git a/python/examples/bert/new_benchmark.sh b/python/examples/bert/new_benchmark.sh index 9373be30..59c33326 100644 --- a/python/examples/bert/new_benchmark.sh +++ b/python/examples/bert/new_benchmark.sh @@ -1,20 +1,22 @@ +export FLAGS_profile_pipeline=1 modelname="bert" # HTTP ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 sleep 3 rm -rf profile_log_$modelname -for thread_num in 1 8 16 +for thread_num in 1 do - for batch_size in 1 10 100 + for batch_size in 1 do - python3.7 bert_web_service.py bert_seq128_model/ 9292 & + python3.7 bert_web_service.py bert_seq128_model/ 9696 & sleep 3 - echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname + echo "#----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py python3.7 new_benchmark.py run $thread_num $batch_size - python3.7 cpu_utilization.py >>profile_log_$modelname + #python3.7 cpu_utilization.py >>profile_log_$modelname + python3.7 -m paddle_serving_server_gpu.profiler --use_gpu --gpu_id 0 >>profile_log_$modelname ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 python3.7 new_benchmark.py dump benchmark.log benchmark.tmp mv benchmark.tmp benchmark.log diff --git a/python/examples/bert/parse_profile.py b/python/examples/bert/parse_profile.py index 12484d6f..7b47a375 100644 --- a/python/examples/bert/parse_profile.py +++ b/python/examples/bert/parse_profile.py @@ -1,7 +1,10 @@ import sys import os import yaml - +import argparse +""" +{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}} +""" class LogHandler(object): def __init__(self): self.fstr = "" @@ -9,24 +12,45 @@ class LogHandler(object): def print(self): print(self.fstr) - def dump(self): - with open("inference_profile.log",'w') as fout: + def dump(self, filename): + with open(filename,'w') as fout: fout.write(self.fstr) def append(self, new_str): self.fstr += new_str + "\n" +def parse_args(): # pylint: disable=doc-string-missing + parser = argparse.ArgumentParser("serve") + parser.add_argument( + "--benchmark_cfg", type=str, required=True, help="benchmark config yaml. including general info, model info, data info, conf info") + parser.add_argument( + "--benchmark_log", + type=str, required=True, + help="benchmark log, generated by a web service or pipeline.") + parser.add_argument( + "--output", + type=str, + default="std_benchmark.log", + help="the output filename, default std_benchmark.log") + return parser.parse_args() + if __name__ == "__main__": - filename = sys.argv[1] - f = open(filename, 'r') + args = parse_args() + benchmark_cfg_filename = args.benchmark_cfg + f = open(benchmark_cfg_filename, 'r') config = yaml.load(f) + f.close() + benchmark_raw_filename = args.benchmark_log + f = open(benchmark_raw_filename, 'r') + benchmark_raw = yaml.load(f) + f.close() ## general info cuda_version = config["cuda_version"] cudnn_version = config["cudnn_version"] trt_version = config["cudnn_version"] python_version = config["python_version"] gcc_version = config["gcc_version"] - paddle_version = config["paddle_servion"] + paddle_version = config["paddle_version"] cpu = config["cpu"] gpu = config["gpu"] xpu = config["xpu"] @@ -53,10 +77,10 @@ if __name__ == "__main__": acc1 = "Nan" acc5 = "Nan" ## perf info - average_latency, QPS = "", "" + average_latency, QPS = benchmark_raw["DAG"]["avg"], benchmark_raw["DAG"]["qps"] process_latency = "" - cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", "" - gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","","", "" + cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", benchmark_raw["CPU_UTILIZATION"] + gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","", benchmark_raw["GPU_UTILIZATION"], benchmark_raw["MAX_GPU_MEMORY"] fh = LogHandler() @@ -97,4 +121,4 @@ if __name__ == "__main__": fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage)) fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate)) -fh.dump() + fh.dump(args.output) diff --git a/python/examples/pipeline/imagenet/benchmark.py b/python/examples/pipeline/imagenet/benchmark.py new file mode 100644 index 00000000..2419a9cb --- /dev/null +++ b/python/examples/pipeline/imagenet/benchmark.py @@ -0,0 +1,98 @@ +import sys +import os +import base64 +import yaml +import requests +import time +import json +try: + from paddle_serving_server_gpu.pipeline import PipelineClient +except ImportError: + from paddle_serving_server.pipeline import PipelineClient +import numpy as np +from paddle_serving_client.utils import MultiThreadRunner +from paddle_serving_client.utils import benchmark_args, show_latency +def parse_benchmark(filein, fileout): + with open(filein, "r") as fin: + res = yaml.load(fin) + del_list = [] + for key in res["DAG"].keys(): + if "call" in key: + del_list.append(key) + for key in del_list: + del res["DAG"][key] + with open(fileout, "w") as fout: + yaml.dump(res, fout, default_flow_style=False) + +def gen_yml(device, gpu_id): + fin = open("config.yml", "r") + config = yaml.load(fin) + fin.close() + config["dag"]["tracer"] = {"interval_s": 10} + if device == "gpu": + config["op"]["imagenet"]["local_service_conf"]["device_type"] = 1 + config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id + with open("config2.yml", "w") as fout: + yaml.dump(config, fout, default_flow_style=False) + +def cv2_to_base64(image): + return base64.b64encode(image).decode('utf8') + +def run_http(idx, batch_size): + print("start thread ({})".format(idx)) + url = "http://127.0.0.1:18080/imagenet/prediction" + start = time.time() + + with open(os.path.join(".", "daisy.jpg"), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + data = {"key": ["image"], "value": [image]} + for i in range(100): + r = requests.post(url=url, data=json.dumps(data)) + end = time.time() + return [[end - start]] + +def multithread_http(thread, batch_size): + multi_thread_runner = MultiThreadRunner() + result = multi_thread_runner.run(run_http , thread, batch_size) + +def run_rpc(thread, batch_size): + client = PipelineClient() + client.connect(['127.0.0.1:18090']) + start = time.time() + test_img_dir = "imgs/" + for img_file in os.listdir(test_img_dir): + with open(os.path.join(test_img_dir, img_file), 'rb') as file: + image_data = file.read() + image = cv2_to_base64(image_data) + + for i in range(100): + ret = client.predict(feed_dict={"image": image}, fetch=["res"]) + end = time.time() + return [[end - start]] + + +def multithread_rpc(thraed, batch_size): + multi_thread_runner = MultiThreadRunner() + result = multi_thread_runner.run(run_rpc , thread, batch_size) + +if __name__ == "__main__": + if sys.argv[1] == "yaml": + mode = sys.argv[2] # brpc/ local predictor + thread = int(sys.argv[3]) + device = sys.argv[4] + gpu_id = sys.argv[5] + gen_yml(device, gpu_id) + elif sys.argv[1] == "run": + mode = sys.argv[2] # http/ rpc + thread = int(sys.argv[3]) + batch_size = int(sys.argv[4]) + if mode == "http": + multithread_http(thread, batch_size) + elif mode == "rpc": + multithread_rpc(thread, batch_size) + elif sys.argv[1] == "dump": + filein = sys.argv[2] + fileout = sys.argv[3] + parse_benchmark(filein, fileout) + diff --git a/python/examples/pipeline/imagenet/benchmark.sh b/python/examples/pipeline/imagenet/benchmark.sh new file mode 100644 index 00000000..572c029e --- /dev/null +++ b/python/examples/pipeline/imagenet/benchmark.sh @@ -0,0 +1,36 @@ +export FLAGS_profile_pipeline=1 +alias python3="python3.7" +modelname="imagenet" +gpu_id="0" +benchmark_config_filename="benchmark_config.yaml" + +# HTTP +ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 +sleep 3 +python3 benchmark.py yaml local_predictor 1 gpu $gpu_id +rm -rf profile_log_$modelname +for thread_num in 1 +do + for batch_size in 1 + do + echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname + rm -rf PipelineServingLogs + rm -rf cpu_utilization.py + python3 resnet50_web_service.py >web.log 2>&1 & + sleep 3 + nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py + python3 benchmark.py run http $thread_num $batch_size + python3 cpu_utilization.py >>profile_log_$modelname + python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname + ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 + python3 benchmark.py dump benchmark.log benchmark.tmp + mv benchmark.tmp benchmark.log + awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname + awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname + cat benchmark.log >> profile_log_$modelname + python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname + #rm -rf gpu_use.log gpu_utilization.log + done +done diff --git a/python/examples/pipeline/imagenet/benchmark_config.yaml b/python/examples/pipeline/imagenet/benchmark_config.yaml new file mode 100644 index 00000000..6b41400c --- /dev/null +++ b/python/examples/pipeline/imagenet/benchmark_config.yaml @@ -0,0 +1,32 @@ + +cuda_version: "10.1" +cudnn_version: "7.6" +trt_version: "6.0" +python_version: "3.7" +gcc_version: "8.2" +paddle_version: "2.0.2" + +cpu: "Xeon 6148" +gpu: "P4" +xpu: "None" +api: "" +owner: "wangjiawei04" + +model_name: "imagenet" +model_type: "static" +model_source: "paddleclas" +model_url: "" + +batch_size: 1 +num_of_samples: 1000 +input_shape: "128,1" + +runtime_device: "gpu" +ir_optim: true +enable_memory_optim: true +enable_tensorrt: false +precision: "fp32" +enable_mkldnn: true +cpu_math_library_num_threads: "" + + diff --git a/python/examples/pipeline/imagenet/config.yml b/python/examples/pipeline/imagenet/config.yml index 6e48018f..ea865359 100644 --- a/python/examples/pipeline/imagenet/config.yml +++ b/python/examples/pipeline/imagenet/config.yml @@ -3,8 +3,8 @@ worker_num: 1 #http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port -http_port: 18082 -rpc_port: 9999 +http_port: 18080 +rpc_port: 9993 dag: #op资源类型, True, 为线程模型;False,为进程模型 diff --git a/python/examples/pipeline/imagenet/pipeline_http_client.py b/python/examples/pipeline/imagenet/pipeline_http_client.py new file mode 100644 index 00000000..bc3fab25 --- /dev/null +++ b/python/examples/pipeline/imagenet/pipeline_http_client.py @@ -0,0 +1,19 @@ +import numpy as np +import requests +import json +import cv2 +import base64 +import os + +def cv2_to_base64(image): + return base64.b64encode(image).decode('utf8') + +if __name__ == "__main__": + url = "http://127.0.0.1:18080/imagenet/prediction" + with open(os.path.join(".", "daisy.jpg"), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + data = {"key": ["image"], "value": [image]} + for i in range(100): + r = requests.post(url=url, data=json.dumps(data)) + print(r.json()) diff --git a/python/examples/pipeline/imagenet/pipeline_rpc_client.py b/python/examples/pipeline/imagenet/pipeline_rpc_client.py index 77157359..34a08f4b 100644 --- a/python/examples/pipeline/imagenet/pipeline_rpc_client.py +++ b/python/examples/pipeline/imagenet/pipeline_rpc_client.py @@ -23,7 +23,7 @@ import base64 import os client = PipelineClient() -client.connect(['127.0.0.1:9999']) +client.connect(['127.0.0.1:9993']) def cv2_to_base64(image): diff --git a/python/examples/pipeline/imagenet/resnet50_web_service.py b/python/examples/pipeline/imagenet/resnet50_web_service.py index ece3befe..635636cd 100644 --- a/python/examples/pipeline/imagenet/resnet50_web_service.py +++ b/python/examples/pipeline/imagenet/resnet50_web_service.py @@ -46,7 +46,6 @@ class ImagenetOp(Op): return {"image": img[np.newaxis, :].copy()}, False, None, "" def postprocess(self, input_dicts, fetch_dict, log_id): - print(fetch_dict) score_list = fetch_dict["score"] result = {"label": [], "prob": []} for score in score_list: @@ -67,5 +66,5 @@ class ImageService(WebService): uci_service = ImageService(name="imagenet") -uci_service.prepare_pipeline_config("config.yml") +uci_service.prepare_pipeline_config("config2.yml") uci_service.run_service() diff --git a/python/paddle_serving_server/parse_profile.py b/python/paddle_serving_server/parse_profile.py new file mode 100644 index 00000000..2a409c8a --- /dev/null +++ b/python/paddle_serving_server/parse_profile.py @@ -0,0 +1,126 @@ +import sys +import os +import yaml +import argparse +""" +{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}} +""" +class LogHandler(object): + def __init__(self): + self.fstr = "" + + def print(self): + print(self.fstr) + + def dump(self, filename): + with open(filename,'w') as fout: + fout.write(self.fstr) + + def append(self, new_str): + self.fstr += new_str + "\n" + +def parse_args(): # pylint: disable=doc-string-missing + parser = argparse.ArgumentParser("serve") + parser.add_argument( + "--benchmark_cfg", type=str, required=True, help="benchmark config yaml. including general info, model info, data info, conf info") + parser.add_argument( + "--benchmark_log", + type=str, required=True, + help="benchmark log, generated by a web service or pipeline.") + parser.add_argument( + "--output", + type=str, + default="std_benchmark.log", + help="the output filename, default std_benchmark.log") + return parser.parse_args() + +if __name__ == "__main__": + args = parse_args() + benchmark_cfg_filename = args.benchmark_cfg + f = open(benchmark_cfg_filename, 'r') + config = yaml.load(f) + f.close() + benchmark_raw_filename = args.benchmark_log + f = open(benchmark_raw_filename, 'r') + benchmark_raw = yaml.load(f) + f.close() + ## general info + cuda_version = config["cuda_version"] + cudnn_version = config["cudnn_version"] + trt_version = config["cudnn_version"] + python_version = config["python_version"] + gcc_version = config["gcc_version"] + paddle_version = config["paddle_version"] + cpu = config["cpu"] + gpu = config["gpu"] + xpu = config["xpu"] + api = config["api"] + owner = config["owner"] + ## model info + model_name = config["model_name"] + model_type = config["model_type"] + model_source = config["model_source"] + model_url = config["model_url"] + ## data info + batch_size = config["batch_size"] + num_of_samples = config["num_of_samples"] + input_shape = config["input_shape"] + ## conf info + runtime_device = config["runtime_device"] + ir_optim = config["ir_optim"] + enable_memory_optim = config["enable_memory_optim"] + enable_tensorrt = config["enable_tensorrt"] + precision = config["precision"] + enable_mkldnn = config["enable_mkldnn"] + cpu_math_library_num_threads = config["cpu_math_library_num_threads"] + ## acc info + acc1 = "Nan" + acc5 = "Nan" + ## perf info + average_latency, QPS = benchmark_raw["DAG"]["avg"], benchmark_raw["DAG"]["qps"] + cost_90, cost_99, succ_rate = benchmark_raw["DAG"]["90"], benchmark_raw["DAG"]["99"], benchmark_raw["DAG"]["succ"] + process_latency = "" + cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", benchmark_raw["CPU_MEM"] + gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","", benchmark_raw["GPU_UTIL"], benchmark_raw["GPU_MEM"] + + fh = LogHandler() + + fh.append("cuda_version: {}".format(cuda_version)) + fh.append("cudnn_version: {}".format(cudnn_version)) + fh.append("trt_version: {} ".format(trt_version)) + fh.append("python_version: {}".format(python_version)) + fh.append("gcc_version: {}".format(gcc_version)) + fh.append("paddle_version: {}".format(paddle_version)) + fh.append("cpu: {}".format(cpu)) + fh.append("gpu: {}".format(gpu)) # p4, v100, 1080 + fh.append("xpu: {}".format(xpu)) + fh.append("api: {}".format(api)) + fh.append("owner: {}".format(owner)) + fh.append("----------------------- Model info ----------------------") + fh.append("model_name: {}".format(model_name)) + fh.append("model_type: {}".format(model_type)) + fh.append("model_source: {}".format(model_source)) + fh.append("model_url: {}".format(model_url)) + fh.append("----------------------- Data info -----------------------") + fh.append("batch_size: {}".format(batch_size)) + fh.append("num_of_samples: {}".format(num_of_samples)) + fh.append("input_shape: {}".format(input_shape)) + fh.append("----------------------- Conf info -----------------------") + fh.append("runtime_device: {}".format(runtime_device)) + fh.append("ir_optim: {}".format(ir_optim)) + fh.append("enable_memory_optim: {}".format(enable_memory_optim)) + fh.append("enable_tensorrt: {}".format(enable_tensorrt)) + fh.append("precision: {}".format(precision)) # fp32, fp16, int8 + fh.append("enable_mkldnn: {}".format(enable_mkldnn)) + fh.append("cpu_math_library_num_threads: {}".format(cpu_math_library_num_threads)) + fh.append("----------------------- Acc info ------------------------") + fh.append("acc1:".format(acc1)) + fh.append("acc5:".format(acc5)) + fh.append("----------------------- Perf info -----------------------") + fh.append("average_latency(ms): {}, QPS: {}".format(average_latency, QPS)) + fh.append("process_latency(ms): {}".format(process_latency)) + fh.append("90%_cost: {}, 99%_cost: {}, succ_rate: {}".format(cost_90, cost_99, succ_rate)) + fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage)) + fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate)) + + fh.dump(args.output) diff --git a/python/paddle_serving_server/profiler.py b/python/paddle_serving_server/profiler.py index 8464cfa7..715afa23 100644 --- a/python/paddle_serving_server/profiler.py +++ b/python/paddle_serving_server/profiler.py @@ -31,6 +31,29 @@ _LOGGER = logging.getLogger(__name__) _LOGGER.propagate = False _is_profile = int(os.environ.get('FLAGS_profile_pipeline', 0)) +import pynvml +import psutil +import GPUtil +import argparse + +def get_mem(gpu_id=None): + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + cpu_mem = info.uss / 1024. / 1024. + gpu_mem = 0 + if gpu_id is not None: + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_mem = meminfo.used / 1024./ 1024. + return cpu_mem, gpu_mem + +def get_gpu_util(gpu_id): + GPUs = GPUtil.getGPUs() + gpu_load = GPUs[gpu_id].load + return gpu_load + class PerformanceTracer(object): def __init__(self, is_thread_mode, interval_s, server_worker_num): @@ -245,3 +268,24 @@ class TimeProfiler(object): tag, timestamp = item self._time_record.put((name, tag, timestamp)) return print_str + +def parse_args(): # pylint: disable=doc-string-missing + parser = argparse.ArgumentParser("serve") + parser.add_argument( + "--use_gpu", default=False, action="store_true", help="use gpu or not") + parser.add_argument( + "--gpu_id", + type=int, + default=0, + help="gpu id") + return parser.parse_args() + +if __name__ == "__main__": + args = parse_args() + if args.use_gpu: + cm, gm = get_mem(args.gpu_id) + gpu_util = get_gpu_util(args.gpu_id) + print("CPU_MEM: {}\nGPU_MEM: {}\nGPU_UTIL:{}\n".format(cm, gm, gpu_util)) + else: + cm, _ = get_mem(args.gpu_id) + print("CPU_MEM: {}".format(cm)) diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py index 490ba962..85a14d32 100644 --- a/python/paddle_serving_server/version.py +++ b/python/paddle_serving_server/version.py @@ -13,6 +13,6 @@ # limitations under the License. """ Paddle Serving Client version string """ serving_client_version = "0.0.0" -serving_server_version = "0.0.0" +serving_server_version = "0.5.0" module_proto_version = "0.0.0" commit_id = "" diff --git a/python/paddle_serving_server_gpu/parse_profile.py b/python/paddle_serving_server_gpu/parse_profile.py new file mode 100644 index 00000000..2a409c8a --- /dev/null +++ b/python/paddle_serving_server_gpu/parse_profile.py @@ -0,0 +1,126 @@ +import sys +import os +import yaml +import argparse +""" +{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}} +""" +class LogHandler(object): + def __init__(self): + self.fstr = "" + + def print(self): + print(self.fstr) + + def dump(self, filename): + with open(filename,'w') as fout: + fout.write(self.fstr) + + def append(self, new_str): + self.fstr += new_str + "\n" + +def parse_args(): # pylint: disable=doc-string-missing + parser = argparse.ArgumentParser("serve") + parser.add_argument( + "--benchmark_cfg", type=str, required=True, help="benchmark config yaml. including general info, model info, data info, conf info") + parser.add_argument( + "--benchmark_log", + type=str, required=True, + help="benchmark log, generated by a web service or pipeline.") + parser.add_argument( + "--output", + type=str, + default="std_benchmark.log", + help="the output filename, default std_benchmark.log") + return parser.parse_args() + +if __name__ == "__main__": + args = parse_args() + benchmark_cfg_filename = args.benchmark_cfg + f = open(benchmark_cfg_filename, 'r') + config = yaml.load(f) + f.close() + benchmark_raw_filename = args.benchmark_log + f = open(benchmark_raw_filename, 'r') + benchmark_raw = yaml.load(f) + f.close() + ## general info + cuda_version = config["cuda_version"] + cudnn_version = config["cudnn_version"] + trt_version = config["cudnn_version"] + python_version = config["python_version"] + gcc_version = config["gcc_version"] + paddle_version = config["paddle_version"] + cpu = config["cpu"] + gpu = config["gpu"] + xpu = config["xpu"] + api = config["api"] + owner = config["owner"] + ## model info + model_name = config["model_name"] + model_type = config["model_type"] + model_source = config["model_source"] + model_url = config["model_url"] + ## data info + batch_size = config["batch_size"] + num_of_samples = config["num_of_samples"] + input_shape = config["input_shape"] + ## conf info + runtime_device = config["runtime_device"] + ir_optim = config["ir_optim"] + enable_memory_optim = config["enable_memory_optim"] + enable_tensorrt = config["enable_tensorrt"] + precision = config["precision"] + enable_mkldnn = config["enable_mkldnn"] + cpu_math_library_num_threads = config["cpu_math_library_num_threads"] + ## acc info + acc1 = "Nan" + acc5 = "Nan" + ## perf info + average_latency, QPS = benchmark_raw["DAG"]["avg"], benchmark_raw["DAG"]["qps"] + cost_90, cost_99, succ_rate = benchmark_raw["DAG"]["90"], benchmark_raw["DAG"]["99"], benchmark_raw["DAG"]["succ"] + process_latency = "" + cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", benchmark_raw["CPU_MEM"] + gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","", benchmark_raw["GPU_UTIL"], benchmark_raw["GPU_MEM"] + + fh = LogHandler() + + fh.append("cuda_version: {}".format(cuda_version)) + fh.append("cudnn_version: {}".format(cudnn_version)) + fh.append("trt_version: {} ".format(trt_version)) + fh.append("python_version: {}".format(python_version)) + fh.append("gcc_version: {}".format(gcc_version)) + fh.append("paddle_version: {}".format(paddle_version)) + fh.append("cpu: {}".format(cpu)) + fh.append("gpu: {}".format(gpu)) # p4, v100, 1080 + fh.append("xpu: {}".format(xpu)) + fh.append("api: {}".format(api)) + fh.append("owner: {}".format(owner)) + fh.append("----------------------- Model info ----------------------") + fh.append("model_name: {}".format(model_name)) + fh.append("model_type: {}".format(model_type)) + fh.append("model_source: {}".format(model_source)) + fh.append("model_url: {}".format(model_url)) + fh.append("----------------------- Data info -----------------------") + fh.append("batch_size: {}".format(batch_size)) + fh.append("num_of_samples: {}".format(num_of_samples)) + fh.append("input_shape: {}".format(input_shape)) + fh.append("----------------------- Conf info -----------------------") + fh.append("runtime_device: {}".format(runtime_device)) + fh.append("ir_optim: {}".format(ir_optim)) + fh.append("enable_memory_optim: {}".format(enable_memory_optim)) + fh.append("enable_tensorrt: {}".format(enable_tensorrt)) + fh.append("precision: {}".format(precision)) # fp32, fp16, int8 + fh.append("enable_mkldnn: {}".format(enable_mkldnn)) + fh.append("cpu_math_library_num_threads: {}".format(cpu_math_library_num_threads)) + fh.append("----------------------- Acc info ------------------------") + fh.append("acc1:".format(acc1)) + fh.append("acc5:".format(acc5)) + fh.append("----------------------- Perf info -----------------------") + fh.append("average_latency(ms): {}, QPS: {}".format(average_latency, QPS)) + fh.append("process_latency(ms): {}".format(process_latency)) + fh.append("90%_cost: {}, 99%_cost: {}, succ_rate: {}".format(cost_90, cost_99, succ_rate)) + fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage)) + fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate)) + + fh.dump(args.output) diff --git a/python/paddle_serving_server_gpu/profiler.py b/python/paddle_serving_server_gpu/profiler.py index 8464cfa7..5d1d947a 100644 --- a/python/paddle_serving_server_gpu/profiler.py +++ b/python/paddle_serving_server_gpu/profiler.py @@ -31,6 +31,27 @@ _LOGGER = logging.getLogger(__name__) _LOGGER.propagate = False _is_profile = int(os.environ.get('FLAGS_profile_pipeline', 0)) +import pynvml +import psutil +import GPUtil +import argparse +def get_mem(gpu_id=None): + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + cpu_mem = info.uss / 1024. / 1024. + gpu_mem = 0 + if gpu_id is not None: + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_mem = meminfo.used / 1024./ 1024. + return cpu_mem, gpu_mem + +def get_gpu_util(gpu_id): + GPUs = GPUtil.getGPUs() + gpu_load = GPUs[gpu_id].load + return gpu_load class PerformanceTracer(object): def __init__(self, is_thread_mode, interval_s, server_worker_num): @@ -245,3 +266,24 @@ class TimeProfiler(object): tag, timestamp = item self._time_record.put((name, tag, timestamp)) return print_str + +def parse_args(): # pylint: disable=doc-string-missing + parser = argparse.ArgumentParser("serve") + parser.add_argument( + "--use_gpu", default=False, action="store_true", help="use gpu or not") + parser.add_argument( + "--gpu_id", + type=int, + default=0, + help="gpu id") + return parser.parse_args() + +if __name__ == "__main__": + args = parse_args() + if args.use_gpu is True: + cm, gm = get_mem(args.gpu_id) + gpu_util = get_gpu_util(args.gpu_id) + print("CPU_MEM: {}\nGPU_MEM: {}\nGPU_UTIL:{}".format(cm, gm, gpu_util)) + else: + cm, _ = get_mem(args.gpu_id) + print("CPU_MEM: {}".format(cm)) diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py index b774c223..ae8c7b53 100644 --- a/python/paddle_serving_server_gpu/version.py +++ b/python/paddle_serving_server_gpu/version.py @@ -13,7 +13,7 @@ # limitations under the License. """ Paddle Serving Client version string """ serving_client_version = "0.0.0" -serving_server_version = "0.0.0" +serving_server_version = "0.5.0" module_proto_version = "0.0.0" cuda_version = "9" commit_id = "" -- GitLab