“b9fdd3bc0f4f22af17a81bb8a50a337b563c876b”上不存在“paddle/phi/tests/kernels/test_elementwise_dev_api.cc”
提交 31f75d66 编写于 作者: W wangjiawei04

add parse script

上级 543e2069
import sys
import os
## general info
cuda_version = ""
cudnn_version = ""
trt_version = ""
python_version = ""
gcc_version = ""
paddle_version = ""
cpu = ""
gpu = ""
xpu = ""
api = ""
owner = ""
## model info
model_name = ""
model_type = ""
model_source = ""
model_url = ""
## data info
batch_size = ""
num_of_samples = ""
input_shape = ""
## conf info
runtime_device = ""
ir_optim = ""
enable_memory_optim = ""
enable_tensorrt = ""
precision = ""
enable_mkldnn = ""
cpu_math_library_num_threads = ""
## acc info
acc1 = ""
acc5 = ""
## perf info
average_latency, QPS = "", ""
process_latency = ""
cpu_rss, vms, shared, dirty, cpu_usage = "", "", "", "", ""
gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate = "","","","","", ""
class LogHandler(object):
def __init__(self):
self.fstr = ""
def print(self):
print(self.fstr)
def dump(self):
with open("inference_profile.log",'w') as fout:
fout.write(self.fstr)
def append(self, new_str):
self.fstr += new_str + "\n"
fh = LogHandler()
fh.append("cuda_version: {}".format(cuda_version))
fh.append("cudnn_version: {}".format(cudnn_version))
fh.append("trt_version: {} ".format(trt_version))
fh.append("python_version: {}".format(python_version))
fh.append("gcc_version: {}".format(gcc_version))
fh.append("paddle_version: {}".format(paddle_version))
fh.append("cpu: {}".format(cpu))
fh.append("gpu: {}".format(gpu)) # p4, v100, 1080
fh.append("xpu: {}".format(xpu))
fh.append("api: {}".format(api))
fh.append("owner: {}".format(owner))
fh.append("----------------------- Model info ----------------------")
fh.append("model_name: {}".format(model_name))
fh.append("model_type: {}".format(model_type))
fh.append("model_source: {}".format(model_source))
fh.append("model_url: {}".format(model_url))
fh.append("----------------------- Data info -----------------------")
fh.append("batch_size: {}".format(batch_size))
fh.append("num_of_samples: {}".format(num_of_samples))
fh.append("input_shape: {}".format(input_shape))
fh.append("----------------------- Conf info -----------------------")
fh.append("runtime_device: {}".format(runtime_device))
fh.append("ir_optim: {}".format(ir_optim))
fh.append("enable_memory_optim: {}".format(enable_memory_optim))
fh.append("enable_tensorrt: {}".format(enable_tensorrt))
fh.append("precision: {}".format(precision)) # fp32, fp16, int8
fh.append("enable_mkldnn: {}".format(enable_mkldnn))
fh.append("cpu_math_library_num_threads: {}".format(cpu_math_library_num_threads))
fh.append("----------------------- Acc info ------------------------")
fh.append("acc1:".format(acc1))
fh.append("acc5:".format(acc5))
fh.append("----------------------- Perf info -----------------------")
fh.append("average_latency(ms): {}, QPS: {}".format(average_latency, QPS))
fh.append("process_latency(ms): {}".format(process_latency))
fh.append("process_name: clas_benchmark, cpu_rss(MB): {}, vms(MB): {}, shared(MB): {}, dirty(MB): {}, cpu_usage(%): {}".format(cpu_rss, vms, shared, dirty, cpu_usage))
fh.append("gpu_id: {}, total(MB): {}, free(MB): {}, used(MB): {}, gpu_utilization_rate(%): {}, gpu_mem_utilization_rate(%): {}".format(gpu_id, total, free, used, gpu_utilization_rate, gpu_mem_utilization_rate))
fh.dump()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#!flask/bin/python #!flask/bin/python
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from time import time as _time
from flask import Flask, request, abort from flask import Flask, request, abort
from contextlib import closing from contextlib import closing
from multiprocessing import Pool, Process, Queue from multiprocessing import Pool, Process, Queue
...@@ -24,7 +25,8 @@ import socket ...@@ -24,7 +25,8 @@ import socket
import sys import sys
import numpy as np import numpy as np
import paddle_serving_server_gpu as serving import paddle_serving_server_gpu as serving
import collections
from .profiler import TimeProfiler, PerformanceTracer
from paddle_serving_server_gpu import pipeline from paddle_serving_server_gpu import pipeline
from paddle_serving_server_gpu.pipeline import Op from paddle_serving_server_gpu.pipeline import Op
...@@ -51,6 +53,15 @@ class WebService(object): ...@@ -51,6 +53,15 @@ class WebService(object):
def get_pipeline_response(self, read_op): def get_pipeline_response(self, read_op):
return None return None
def setup_profile(self, trace_interval=10, thread_num=1):
self.is_profile = True
if self.is_profile:
self._tracer = PerformanceTracer(True, 10 ,1)
self.trace_buffer = self._tracer.data_buffer()
self._profiler = TimeProfiler()
self._profiler.enable(True)
self.data_id = 0
def prepare_pipeline_config(self, yaml_file): def prepare_pipeline_config(self, yaml_file):
# build dag # build dag
read_op = pipeline.RequestOp() read_op = pipeline.RequestOp()
...@@ -208,20 +219,62 @@ class WebService(object): ...@@ -208,20 +219,62 @@ class WebService(object):
abort(400) abort(400)
if "fetch" not in request.json: if "fetch" not in request.json:
abort(400) abort(400)
start_call, end_call = None, None
if self.is_profile:
trace_que = collections.deque()
start_call = self._profiler.record("call_{}".format(self.data_id))
try: try:
start = int(round(_time() * 1000000))
feed, fetch, is_batch = self.preprocess(request.json["feed"], feed, fetch, is_batch = self.preprocess(request.json["feed"],
request.json["fetch"]) request.json["fetch"])
if isinstance(feed, dict) and "fetch" in feed: if isinstance(feed, dict) and "fetch" in feed:
del feed["fetch"] del feed["fetch"]
if len(feed) == 0: if len(feed) == 0:
raise ValueError("empty input") raise ValueError("empty input")
end = int(round(_time() * 1000000))
prep_time = end - start
start = int(round(_time() * 1000000))
fetch_map = self.client.predict( fetch_map = self.client.predict(
feed=feed, fetch=fetch, batch=is_batch) feed=feed, fetch=fetch, batch=is_batch)
end = int(round(_time() * 1000000))
midp_time = end - start
start = int(round(_time() * 1000000))
result = self.postprocess( result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result} result = {"result": result}
end = int(round(_time() * 1000000))
postp_time = end - start
succ = 1
except ValueError as err: except ValueError as err:
succ = 0
result = {"result": str(err)} result = {"result": str(err)}
if self.is_profile:
end_call = self._profiler.record("call_{}".format(self.data_id))
self.data_id += 1
if self.trace_buffer is not None:
self.trace_buffer.put({
"name": "DAG",
"id": self.data_id,
"succ": succ,
"actions": {
"call_{}".format(self.data_id): end_call - start_call,
},
})
trace_que.append({
"name": "demo",
"actions": {
"prep": prep_time,
"midp": midp_time,
"postp": postp_time
}
})
while trace_que:
info = trace_que[0]
try:
self.trace_buffer.put_nowait(info)
trace_que.popleft()
except Queue.Full:
break
return result return result
def run_rpc_service(self): def run_rpc_service(self):
...@@ -281,6 +334,8 @@ class WebService(object): ...@@ -281,6 +334,8 @@ class WebService(object):
"{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0]) "{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0])
def run_web_service(self): def run_web_service(self):
if self.is_profile:
self._tracer.start()
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True) self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册