提交 6e643312 编写于 作者: B bjjwwang

fix imagenet

上级 99aaae1d
......@@ -44,7 +44,7 @@ class BertService(WebService):
return feed_dict, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.setup_profile(30)
bert_service.setup_profile(10)
bert_service.load()
bert_service.load_model_config(sys.argv[1])
bert_service.prepare_server(
......
......@@ -25,7 +25,7 @@ def run_http(idx, batch_size):
{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}
"""
print("start thread ({})".format(idx))
url = "http://127.0.0.1:9292/bert/prediction"
url = "http://127.0.0.1:9696/bert/prediction"
start = time.time()
with open("data-c.txt", 'r') as fin:
start = time.time()
......@@ -39,7 +39,7 @@ def run_http(idx, batch_size):
r = requests.post(url=url, data=json.dumps(data), headers={"Content-Type": "application/json"})
start_idx += batch_size
end = time.time()
if end - start > 40:
if end - start > 15:
break
end = time.time()
return [[end - start]]
......
export FLAGS_profile_pipeline=1
modelname="bert"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
rm -rf profile_log_$modelname
for thread_num in 1 8 16
for thread_num in 1
do
for batch_size in 1 10 100
for batch_size in 1
do
python3.7 bert_web_service.py bert_seq128_model/ 9292 &
python3.7 bert_web_service.py bert_seq128_model/ 9696 &
sleep 3
echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
echo "#----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3.7 new_benchmark.py run $thread_num $batch_size
python3.7 cpu_utilization.py >>profile_log_$modelname
#python3.7 cpu_utilization.py >>profile_log_$modelname
python3.7 -m paddle_serving_server_gpu.profiler --use_gpu --gpu_id 0 >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3.7 new_benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
......
import sys
import os
import yaml
import argparse
"""
{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}}
"""
class LogHandler(object):
def __init__(self):
self.fstr = ""
......@@ -9,24 +12,45 @@ class LogHandler(object):
def print(self):
print(self.fstr)
def dump(self):
with open("inference_profile.log",'w') as fout:
def dump(self, filename):
with open(filename,'w') as fout:
fout.write(self.fstr)
def append(self, new_str):
self.fstr += new_str + "\n"
def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("serve")
parser.add_argument(
"--benchmark_cfg", type=str, required=True, help="benchmark config yaml. including general info, model info, data info, conf info")
parser.add_argument(
"--benchmark_log",
type=str, required=True,
help="benchmark log, generated by a web service or pipeline.")
parser.add_argument(
"--output",
type=str,
default="std_benchmark.log",
help="the output filename, default std_benchmark.log")
return parser.parse_args()
if __name__ == "__main__":
filename = sys.argv[1]
f = open(filename, 'r')
args = parse_args()
benchmark_cfg_filename = args.benchmark_cfg
f = open(benchmark_cfg_filename, 'r')
config = yaml.load(f)
f.close()
benchmark_raw_filename = args.benchmark_log
f = open(benchmark_raw_filename, 'r')
benchmark_raw = yaml.load(f)
f.close()
## general info
cuda_version = config["cuda_version"]
cudnn_version = config["cudnn_version"]
trt_version = config["cudnn_version"]
python_version = config["python_version"]
gcc_version = config["gcc_version"]
paddle_version = config["paddle_servion"]
paddle_version = config["paddle_version"]
cpu = config["cpu"]
gpu = config["gpu"]
xpu = config["xpu"]
......@@ -53,10 +77,10 @@ if __name__ == "__main__":
acc1 = "Nan"
acc5 = "Nan"
## perf info
average_latency, QPS = "", ""
average_latency, QPS = benchmark_raw["DAG"]["avg"], benchmark_raw["DAG"]["qps"]
process_latency = ""
cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", ""
gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","","", ""
cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", benchmark_raw["CPU_UTILIZATION"]
gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","", benchmark_raw["GPU_UTILIZATION"], benchmark_raw["MAX_GPU_MEMORY"]
fh = LogHandler()
......@@ -97,4 +121,4 @@ if __name__ == "__main__":
fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage))
fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate))
fh.dump()
fh.dump(args.output)
import sys
import os
import base64
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu":
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 1
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"key": ["image"], "value": [image]}
for i in range(100):
r = requests.post(url=url, data=json.dumps(data))
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:18090'])
start = time.time()
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(100):
ret = client.predict(feed_dict={"image": image}, fetch=["res"])
end = time.time()
return [[end - start]]
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gpu_id = sys.argv[5]
gen_yml(device, gpu_id)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.2"
cpu: "Xeon 6148"
gpu: "P4"
xpu: "None"
api: ""
owner: "wangjiawei04"
model_name: "imagenet"
model_type: "static"
model_source: "paddleclas"
model_url: ""
batch_size: 1
num_of_samples: 1000
input_shape: "128,1"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: true
cpu_math_library_num_threads: ""
......@@ -3,8 +3,8 @@
worker_num: 1
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 18082
rpc_port: 9999
http_port: 18080
rpc_port: 9993
dag:
#op资源类型, True, 为线程模型;False,为进程模型
......
import numpy as np
import requests
import json
import cv2
import base64
import os
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
if __name__ == "__main__":
url = "http://127.0.0.1:18080/imagenet/prediction"
with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"key": ["image"], "value": [image]}
for i in range(100):
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
......@@ -23,7 +23,7 @@ import base64
import os
client = PipelineClient()
client.connect(['127.0.0.1:9999'])
client.connect(['127.0.0.1:9993'])
def cv2_to_base64(image):
......
......@@ -46,7 +46,6 @@ class ImagenetOp(Op):
return {"image": img[np.newaxis, :].copy()}, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
print(fetch_dict)
score_list = fetch_dict["score"]
result = {"label": [], "prob": []}
for score in score_list:
......@@ -67,5 +66,5 @@ class ImageService(WebService):
uci_service = ImageService(name="imagenet")
uci_service.prepare_pipeline_config("config.yml")
uci_service.prepare_pipeline_config("config2.yml")
uci_service.run_service()
import sys
import os
import yaml
import argparse
"""
{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}}
"""
class LogHandler(object):
def __init__(self):
self.fstr = ""
def print(self):
print(self.fstr)
def dump(self, filename):
with open(filename,'w') as fout:
fout.write(self.fstr)
def append(self, new_str):
self.fstr += new_str + "\n"
def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("serve")
parser.add_argument(
"--benchmark_cfg", type=str, required=True, help="benchmark config yaml. including general info, model info, data info, conf info")
parser.add_argument(
"--benchmark_log",
type=str, required=True,
help="benchmark log, generated by a web service or pipeline.")
parser.add_argument(
"--output",
type=str,
default="std_benchmark.log",
help="the output filename, default std_benchmark.log")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
benchmark_cfg_filename = args.benchmark_cfg
f = open(benchmark_cfg_filename, 'r')
config = yaml.load(f)
f.close()
benchmark_raw_filename = args.benchmark_log
f = open(benchmark_raw_filename, 'r')
benchmark_raw = yaml.load(f)
f.close()
## general info
cuda_version = config["cuda_version"]
cudnn_version = config["cudnn_version"]
trt_version = config["cudnn_version"]
python_version = config["python_version"]
gcc_version = config["gcc_version"]
paddle_version = config["paddle_version"]
cpu = config["cpu"]
gpu = config["gpu"]
xpu = config["xpu"]
api = config["api"]
owner = config["owner"]
## model info
model_name = config["model_name"]
model_type = config["model_type"]
model_source = config["model_source"]
model_url = config["model_url"]
## data info
batch_size = config["batch_size"]
num_of_samples = config["num_of_samples"]
input_shape = config["input_shape"]
## conf info
runtime_device = config["runtime_device"]
ir_optim = config["ir_optim"]
enable_memory_optim = config["enable_memory_optim"]
enable_tensorrt = config["enable_tensorrt"]
precision = config["precision"]
enable_mkldnn = config["enable_mkldnn"]
cpu_math_library_num_threads = config["cpu_math_library_num_threads"]
## acc info
acc1 = "Nan"
acc5 = "Nan"
## perf info
average_latency, QPS = benchmark_raw["DAG"]["avg"], benchmark_raw["DAG"]["qps"]
cost_90, cost_99, succ_rate = benchmark_raw["DAG"]["90"], benchmark_raw["DAG"]["99"], benchmark_raw["DAG"]["succ"]
process_latency = ""
cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", benchmark_raw["CPU_MEM"]
gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","", benchmark_raw["GPU_UTIL"], benchmark_raw["GPU_MEM"]
fh = LogHandler()
fh.append("cuda_version: {}".format(cuda_version))
fh.append("cudnn_version: {}".format(cudnn_version))
fh.append("trt_version: {} ".format(trt_version))
fh.append("python_version: {}".format(python_version))
fh.append("gcc_version: {}".format(gcc_version))
fh.append("paddle_version: {}".format(paddle_version))
fh.append("cpu: {}".format(cpu))
fh.append("gpu: {}".format(gpu)) # p4, v100, 1080
fh.append("xpu: {}".format(xpu))
fh.append("api: {}".format(api))
fh.append("owner: {}".format(owner))
fh.append("----------------------- Model info ----------------------")
fh.append("model_name: {}".format(model_name))
fh.append("model_type: {}".format(model_type))
fh.append("model_source: {}".format(model_source))
fh.append("model_url: {}".format(model_url))
fh.append("----------------------- Data info -----------------------")
fh.append("batch_size: {}".format(batch_size))
fh.append("num_of_samples: {}".format(num_of_samples))
fh.append("input_shape: {}".format(input_shape))
fh.append("----------------------- Conf info -----------------------")
fh.append("runtime_device: {}".format(runtime_device))
fh.append("ir_optim: {}".format(ir_optim))
fh.append("enable_memory_optim: {}".format(enable_memory_optim))
fh.append("enable_tensorrt: {}".format(enable_tensorrt))
fh.append("precision: {}".format(precision)) # fp32, fp16, int8
fh.append("enable_mkldnn: {}".format(enable_mkldnn))
fh.append("cpu_math_library_num_threads: {}".format(cpu_math_library_num_threads))
fh.append("----------------------- Acc info ------------------------")
fh.append("acc1:".format(acc1))
fh.append("acc5:".format(acc5))
fh.append("----------------------- Perf info -----------------------")
fh.append("average_latency(ms): {}, QPS: {}".format(average_latency, QPS))
fh.append("process_latency(ms): {}".format(process_latency))
fh.append("90%_cost: {}, 99%_cost: {}, succ_rate: {}".format(cost_90, cost_99, succ_rate))
fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage))
fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate))
fh.dump(args.output)
......@@ -31,6 +31,29 @@ _LOGGER = logging.getLogger(__name__)
_LOGGER.propagate = False
_is_profile = int(os.environ.get('FLAGS_profile_pipeline', 0))
import pynvml
import psutil
import GPUtil
import argparse
def get_mem(gpu_id=None):
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
if gpu_id is not None:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024./ 1024.
return cpu_mem, gpu_mem
def get_gpu_util(gpu_id):
GPUs = GPUtil.getGPUs()
gpu_load = GPUs[gpu_id].load
return gpu_load
class PerformanceTracer(object):
def __init__(self, is_thread_mode, interval_s, server_worker_num):
......@@ -245,3 +268,24 @@ class TimeProfiler(object):
tag, timestamp = item
self._time_record.put((name, tag, timestamp))
return print_str
def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("serve")
parser.add_argument(
"--use_gpu", default=False, action="store_true", help="use gpu or not")
parser.add_argument(
"--gpu_id",
type=int,
default=0,
help="gpu id")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
if args.use_gpu:
cm, gm = get_mem(args.gpu_id)
gpu_util = get_gpu_util(args.gpu_id)
print("CPU_MEM: {}\nGPU_MEM: {}\nGPU_UTIL:{}\n".format(cm, gm, gpu_util))
else:
cm, _ = get_mem(args.gpu_id)
print("CPU_MEM: {}".format(cm))
......@@ -13,6 +13,6 @@
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.0.0"
serving_server_version = "0.0.0"
serving_server_version = "0.5.0"
module_proto_version = "0.0.0"
commit_id = ""
import sys
import os
import yaml
import argparse
"""
{'CPU_UTILIZATION': 0.8, 'MAX_GPU_MEMORY': 0, 'GPU_UTILIZATION': '0 %', 'DAG': {'50': 670.256, '60': 670.256, '70': 670.765, '80': 671.23, '90': 687.546, '95': 687.546, '99': 687.546, 'avg': 670.755625, 'qps': 0.8, 'query_count': 8, 'succ': 1.0}, 'demo': {'midp': 669.484375, 'postp': 0.184875, 'prep': 1.001875}}
"""
class LogHandler(object):
def __init__(self):
self.fstr = ""
def print(self):
print(self.fstr)
def dump(self, filename):
with open(filename,'w') as fout:
fout.write(self.fstr)
def append(self, new_str):
self.fstr += new_str + "\n"
def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("serve")
parser.add_argument(
"--benchmark_cfg", type=str, required=True, help="benchmark config yaml. including general info, model info, data info, conf info")
parser.add_argument(
"--benchmark_log",
type=str, required=True,
help="benchmark log, generated by a web service or pipeline.")
parser.add_argument(
"--output",
type=str,
default="std_benchmark.log",
help="the output filename, default std_benchmark.log")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
benchmark_cfg_filename = args.benchmark_cfg
f = open(benchmark_cfg_filename, 'r')
config = yaml.load(f)
f.close()
benchmark_raw_filename = args.benchmark_log
f = open(benchmark_raw_filename, 'r')
benchmark_raw = yaml.load(f)
f.close()
## general info
cuda_version = config["cuda_version"]
cudnn_version = config["cudnn_version"]
trt_version = config["cudnn_version"]
python_version = config["python_version"]
gcc_version = config["gcc_version"]
paddle_version = config["paddle_version"]
cpu = config["cpu"]
gpu = config["gpu"]
xpu = config["xpu"]
api = config["api"]
owner = config["owner"]
## model info
model_name = config["model_name"]
model_type = config["model_type"]
model_source = config["model_source"]
model_url = config["model_url"]
## data info
batch_size = config["batch_size"]
num_of_samples = config["num_of_samples"]
input_shape = config["input_shape"]
## conf info
runtime_device = config["runtime_device"]
ir_optim = config["ir_optim"]
enable_memory_optim = config["enable_memory_optim"]
enable_tensorrt = config["enable_tensorrt"]
precision = config["precision"]
enable_mkldnn = config["enable_mkldnn"]
cpu_math_library_num_threads = config["cpu_math_library_num_threads"]
## acc info
acc1 = "Nan"
acc5 = "Nan"
## perf info
average_latency, QPS = benchmark_raw["DAG"]["avg"], benchmark_raw["DAG"]["qps"]
cost_90, cost_99, succ_rate = benchmark_raw["DAG"]["90"], benchmark_raw["DAG"]["99"], benchmark_raw["DAG"]["succ"]
process_latency = ""
cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", benchmark_raw["CPU_MEM"]
gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","", benchmark_raw["GPU_UTIL"], benchmark_raw["GPU_MEM"]
fh = LogHandler()
fh.append("cuda_version: {}".format(cuda_version))
fh.append("cudnn_version: {}".format(cudnn_version))
fh.append("trt_version: {} ".format(trt_version))
fh.append("python_version: {}".format(python_version))
fh.append("gcc_version: {}".format(gcc_version))
fh.append("paddle_version: {}".format(paddle_version))
fh.append("cpu: {}".format(cpu))
fh.append("gpu: {}".format(gpu)) # p4, v100, 1080
fh.append("xpu: {}".format(xpu))
fh.append("api: {}".format(api))
fh.append("owner: {}".format(owner))
fh.append("----------------------- Model info ----------------------")
fh.append("model_name: {}".format(model_name))
fh.append("model_type: {}".format(model_type))
fh.append("model_source: {}".format(model_source))
fh.append("model_url: {}".format(model_url))
fh.append("----------------------- Data info -----------------------")
fh.append("batch_size: {}".format(batch_size))
fh.append("num_of_samples: {}".format(num_of_samples))
fh.append("input_shape: {}".format(input_shape))
fh.append("----------------------- Conf info -----------------------")
fh.append("runtime_device: {}".format(runtime_device))
fh.append("ir_optim: {}".format(ir_optim))
fh.append("enable_memory_optim: {}".format(enable_memory_optim))
fh.append("enable_tensorrt: {}".format(enable_tensorrt))
fh.append("precision: {}".format(precision)) # fp32, fp16, int8
fh.append("enable_mkldnn: {}".format(enable_mkldnn))
fh.append("cpu_math_library_num_threads: {}".format(cpu_math_library_num_threads))
fh.append("----------------------- Acc info ------------------------")
fh.append("acc1:".format(acc1))
fh.append("acc5:".format(acc5))
fh.append("----------------------- Perf info -----------------------")
fh.append("average_latency(ms): {}, QPS: {}".format(average_latency, QPS))
fh.append("process_latency(ms): {}".format(process_latency))
fh.append("90%_cost: {}, 99%_cost: {}, succ_rate: {}".format(cost_90, cost_99, succ_rate))
fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage))
fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate))
fh.dump(args.output)
......@@ -31,6 +31,27 @@ _LOGGER = logging.getLogger(__name__)
_LOGGER.propagate = False
_is_profile = int(os.environ.get('FLAGS_profile_pipeline', 0))
import pynvml
import psutil
import GPUtil
import argparse
def get_mem(gpu_id=None):
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
if gpu_id is not None:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024./ 1024.
return cpu_mem, gpu_mem
def get_gpu_util(gpu_id):
GPUs = GPUtil.getGPUs()
gpu_load = GPUs[gpu_id].load
return gpu_load
class PerformanceTracer(object):
def __init__(self, is_thread_mode, interval_s, server_worker_num):
......@@ -245,3 +266,24 @@ class TimeProfiler(object):
tag, timestamp = item
self._time_record.put((name, tag, timestamp))
return print_str
def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("serve")
parser.add_argument(
"--use_gpu", default=False, action="store_true", help="use gpu or not")
parser.add_argument(
"--gpu_id",
type=int,
default=0,
help="gpu id")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
if args.use_gpu is True:
cm, gm = get_mem(args.gpu_id)
gpu_util = get_gpu_util(args.gpu_id)
print("CPU_MEM: {}\nGPU_MEM: {}\nGPU_UTIL:{}".format(cm, gm, gpu_util))
else:
cm, _ = get_mem(args.gpu_id)
print("CPU_MEM: {}".format(cm))
......@@ -13,7 +13,7 @@
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.0.0"
serving_server_version = "0.0.0"
serving_server_version = "0.5.0"
module_proto_version = "0.0.0"
cuda_version = "9"
commit_id = ""
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册