提交 31f75d66 编写于 作者: W wangjiawei04

add parse script

上级 543e2069
import sys
import os
## general info
cuda_version = ""
cudnn_version = ""
trt_version = ""
python_version = ""
gcc_version = ""
paddle_version = ""
cpu = ""
gpu = ""
xpu = ""
api = ""
owner = ""
## model info
model_name = ""
model_type = ""
model_source = ""
model_url = ""
## data info
batch_size = ""
num_of_samples = ""
input_shape = ""
## conf info
runtime_device = ""
ir_optim = ""
enable_memory_optim = ""
enable_tensorrt = ""
precision = ""
enable_mkldnn = ""
cpu_math_library_num_threads = ""
## acc info
acc1 = ""
acc5 = ""
## perf info
average_latency, QPS = "", ""
process_latency = ""
cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", ""
gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","","", ""
class LogHandler(object):
def __init__(self):
self.fstr = ""
def print(self):
print(self.fstr)
def dump(self):
with open("inference_profile.log",'w') as fout:
fout.write(self.fstr)
def append(self, new_str):
self.fstr += new_str + "\n"
fh = LogHandler()
fh.append("cuda_version: {}".format(cuda_version))
fh.append("cudnn_version: {}".format(cudnn_version))
fh.append("trt_version: {} ".format(trt_version))
fh.append("python_version: {}".format(python_version))
fh.append("gcc_version: {}".format(gcc_version))
fh.append("paddle_version: {}".format(paddle_version))
fh.append("cpu: {}".format(cpu))
fh.append("gpu: {}".format(gpu)) # p4, v100, 1080
fh.append("xpu: {}".format(xpu))
fh.append("api: {}".format(api))
fh.append("owner: {}".format(owner))
fh.append("----------------------- Model info ----------------------")
fh.append("model_name: {}".format(model_name))
fh.append("model_type: {}".format(model_type))
fh.append("model_source: {}".format(model_source))
fh.append("model_url: {}".format(model_url))
fh.append("----------------------- Data info -----------------------")
fh.append("batch_size: {}".format(batch_size))
fh.append("num_of_samples: {}".format(num_of_samples))
fh.append("input_shape: {}".format(input_shape))
fh.append("----------------------- Conf info -----------------------")
fh.append("runtime_device: {}".format(runtime_device))
fh.append("ir_optim: {}".format(ir_optim))
fh.append("enable_memory_optim: {}".format(enable_memory_optim))
fh.append("enable_tensorrt: {}".format(enable_tensorrt))
fh.append("precision: {}".format(precision)) # fp32, fp16, int8
fh.append("enable_mkldnn: {}".format(enable_mkldnn))
fh.append("cpu_math_library_num_threads: {}".format(cpu_math_library_num_threads))
fh.append("----------------------- Acc info ------------------------")
fh.append("acc1:".format(acc1))
fh.append("acc5:".format(acc5))
fh.append("----------------------- Perf info -----------------------")
fh.append("average_latency(ms): {}, QPS: {}".format(average_latency, QPS))
fh.append("process_latency(ms): {}".format(process_latency))
fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage))
fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate))
fh.dump()
......@@ -14,6 +14,7 @@
#!flask/bin/python
# pylint: disable=doc-string-missing
from time import time as _time
from flask import Flask, request, abort
from contextlib import closing
from multiprocessing import Pool, Process, Queue
......@@ -24,7 +25,8 @@ import socket
import sys
import numpy as np
import paddle_serving_server_gpu as serving
import collections
from .profiler import TimeProfiler, PerformanceTracer
from paddle_serving_server_gpu import pipeline
from paddle_serving_server_gpu.pipeline import Op
......@@ -51,6 +53,15 @@ class WebService(object):
def get_pipeline_response(self, read_op):
return None
def setup_profile(self, trace_interval=10, thread_num=1):
self.is_profile = True
if self.is_profile:
self._tracer = PerformanceTracer(True, 10 ,1)
self.trace_buffer = self._tracer.data_buffer()
self._profiler = TimeProfiler()
self._profiler.enable(True)
self.data_id = 0
def prepare_pipeline_config(self, yaml_file):
# build dag
read_op = pipeline.RequestOp()
......@@ -208,20 +219,62 @@ class WebService(object):
abort(400)
if "fetch" not in request.json:
abort(400)
start_call, end_call = None, None
if self.is_profile:
trace_que = collections.deque()
start_call = self._profiler.record("call_{}".format(self.data_id))
try:
start = int(round(_time() * 1000000))
feed, fetch, is_batch = self.preprocess(request.json["feed"],
request.json["fetch"])
if isinstance(feed, dict) and "fetch" in feed:
del feed["fetch"]
if len(feed) == 0:
raise ValueError("empty input")
end = int(round(_time() * 1000000))
prep_time = end - start
start = int(round(_time() * 1000000))
fetch_map = self.client.predict(
feed=feed, fetch=fetch, batch=is_batch)
end = int(round(_time() * 1000000))
midp_time = end - start
start = int(round(_time() * 1000000))
result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result}
end = int(round(_time() * 1000000))
postp_time = end - start
succ = 1
except ValueError as err:
succ = 0
result = {"result": str(err)}
if self.is_profile:
end_call = self._profiler.record("call_{}".format(self.data_id))
self.data_id += 1
if self.trace_buffer is not None:
self.trace_buffer.put({
"name": "DAG",
"id": self.data_id,
"succ": succ,
"actions": {
"call_{}".format(self.data_id): end_call - start_call,
},
})
trace_que.append({
"name": "demo",
"actions": {
"prep": prep_time,
"midp": midp_time,
"postp": postp_time
}
})
while trace_que:
info = trace_que[0]
try:
self.trace_buffer.put_nowait(info)
trace_que.popleft()
except Queue.Full:
break
return result
def run_rpc_service(self):
......@@ -281,6 +334,8 @@ class WebService(object):
"{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0])
def run_web_service(self):
if self.is_profile:
self._tracer.start()
print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册