Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
49f1d810
S
Serving
项目概览
PaddlePaddle
/
Serving
接近 2 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
49f1d810
编写于
1月 28, 2022
作者:
B
bjjwwang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add prometheus python cli
上级
bc9a65c0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
46 addition
and
17 deletion
+46
-17
python/pipeline/dag.py
python/pipeline/dag.py
+34
-13
python/pipeline/operator.py
python/pipeline/operator.py
+4
-0
python/pipeline/profiler.py
python/pipeline/profiler.py
+8
-4
未找到文件。
python/pipeline/dag.py
浏览文件 @
49f1d810
...
@@ -62,6 +62,10 @@ class DAGExecutor(object):
...
@@ -62,6 +62,10 @@ class DAGExecutor(object):
self
.
_retry
=
dag_conf
[
"retry"
]
self
.
_retry
=
dag_conf
[
"retry"
]
self
.
_server_use_profile
=
dag_conf
[
"use_profile"
]
self
.
_server_use_profile
=
dag_conf
[
"use_profile"
]
if
"prometheus_port"
in
dag_conf
:
self
.
_prometheus_port
=
dag_conf
[
"prometheus_port"
]
else
:
self
.
_prometheus_port
=
None
channel_size
=
dag_conf
[
"channel_size"
]
channel_size
=
dag_conf
[
"channel_size"
]
channel_recv_frist_arrive
=
dag_conf
[
"channel_recv_frist_arrive"
]
channel_recv_frist_arrive
=
dag_conf
[
"channel_recv_frist_arrive"
]
self
.
_is_thread_op
=
dag_conf
[
"is_thread_op"
]
self
.
_is_thread_op
=
dag_conf
[
"is_thread_op"
]
...
@@ -78,7 +82,7 @@ class DAGExecutor(object):
...
@@ -78,7 +82,7 @@ class DAGExecutor(object):
self
.
_tracer
=
PerformanceTracer
(
self
.
_tracer
=
PerformanceTracer
(
self
.
_is_thread_op
,
tracer_interval_s
,
server_worker_num
)
self
.
_is_thread_op
,
tracer_interval_s
,
server_worker_num
)
self
.
_dag
=
DAG
(
self
.
name
,
response_op
,
self
.
_server_use_profile
,
self
.
_dag
=
DAG
(
self
.
name
,
response_op
,
self
.
_server_use_profile
,
self
.
_prometheus_port
,
self
.
_is_thread_op
,
channel_size
,
build_dag_each_worker
,
self
.
_is_thread_op
,
channel_size
,
build_dag_each_worker
,
self
.
_tracer
,
channel_recv_frist_arrive
)
self
.
_tracer
,
channel_recv_frist_arrive
)
(
in_channel
,
out_channel
,
pack_rpc_func
,
(
in_channel
,
out_channel
,
pack_rpc_func
,
...
@@ -480,10 +484,10 @@ class DAG(object):
...
@@ -480,10 +484,10 @@ class DAG(object):
"""
"""
Directed Acyclic Graph(DAG) engine, builds one DAG topology.
Directed Acyclic Graph(DAG) engine, builds one DAG topology.
"""
"""
def
__init__
(
self
,
request_name
,
response_op
,
use_profile
,
is_thread_op
,
def
__init__
(
self
,
request_name
,
response_op
,
use_profile
,
prometheus_port
,
is_thread_op
,
channel_size
,
build_dag_each_worker
,
tracer
,
channel_size
,
build_dag_each_worker
,
tracer
,
channel_recv_frist_arrive
):
channel_recv_frist_arrive
):
_LOGGER
.
info
(
"{}, {}, {}, {}, {}
,{} ,{} ,{}"
.
format
(
request_name
,
response_op
,
use_profile
,
is_thread_op
,
_LOGGER
.
info
(
"{}, {}, {}, {}, {}
, {} ,{} ,{} ,{}"
.
format
(
request_name
,
response_op
,
use_profile
,
prometheus_port
,
is_thread_op
,
channel_size
,
build_dag_each_worker
,
tracer
,
channel_size
,
build_dag_each_worker
,
tracer
,
channel_recv_frist_arrive
))
channel_recv_frist_arrive
))
@
ErrorCatch
@
ErrorCatch
...
@@ -491,6 +495,7 @@ class DAG(object):
...
@@ -491,6 +495,7 @@ class DAG(object):
def
init_helper
(
self
,
request_name
:
str
,
def
init_helper
(
self
,
request_name
:
str
,
response_op
,
response_op
,
use_profile
:
[
bool
,
None
],
use_profile
:
[
bool
,
None
],
prometheus_port
:
[
int
,
None
],
is_thread_op
:
bool
,
is_thread_op
:
bool
,
channel_size
,
channel_size
,
build_dag_each_worker
:
[
bool
,
None
],
build_dag_each_worker
:
[
bool
,
None
],
...
@@ -499,6 +504,8 @@ class DAG(object):
...
@@ -499,6 +504,8 @@ class DAG(object):
self
.
_request_name
=
request_name
self
.
_request_name
=
request_name
self
.
_response_op
=
response_op
self
.
_response_op
=
response_op
self
.
_use_profile
=
use_profile
self
.
_use_profile
=
use_profile
self
.
_prometheus_port
=
prometheus_port
self
.
_use_prometheus
=
(
self
.
_prometheus_port
is
not
None
)
self
.
_is_thread_op
=
is_thread_op
self
.
_is_thread_op
=
is_thread_op
self
.
_channel_size
=
channel_size
self
.
_channel_size
=
channel_size
self
.
_build_dag_each_worker
=
build_dag_each_worker
self
.
_build_dag_each_worker
=
build_dag_each_worker
...
@@ -506,7 +513,7 @@ class DAG(object):
...
@@ -506,7 +513,7 @@ class DAG(object):
self
.
_channel_recv_frist_arrive
=
channel_recv_frist_arrive
self
.
_channel_recv_frist_arrive
=
channel_recv_frist_arrive
if
not
self
.
_is_thread_op
:
if
not
self
.
_is_thread_op
:
self
.
_manager
=
PipelineProcSyncManager
()
self
.
_manager
=
PipelineProcSyncManager
()
init_helper
(
self
,
request_name
,
response_op
,
use_profile
,
is_thread_op
,
init_helper
(
self
,
request_name
,
response_op
,
use_profile
,
prometheus_port
,
is_thread_op
,
channel_size
,
build_dag_each_worker
,
tracer
,
channel_size
,
build_dag_each_worker
,
tracer
,
channel_recv_frist_arrive
)
channel_recv_frist_arrive
)
print
(
"[DAG] Succ init"
)
print
(
"[DAG] Succ init"
)
...
@@ -828,27 +835,40 @@ class DAG(object):
...
@@ -828,27 +835,40 @@ class DAG(object):
return
self
.
_input_channel
,
self
.
_output_channel
,
self
.
_pack_func
,
self
.
_unpack_func
return
self
.
_input_channel
,
self
.
_output_channel
,
self
.
_pack_func
,
self
.
_unpack_func
def
start_prom
(
self
):
def
start_prom
(
self
,
prometheus_port
):
import
prometheus_client
import
prometheus_client
from
prometheus_client
import
Counter
from
prometheus_client
import
Counter
from
prometheus_client.core
import
CollectorRegistry
from
prometheus_client.core
import
CollectorRegistry
from
flask
import
Response
,
Flask
from
flask
import
Response
,
Flask
from
.prometheus_metrics
import
registry
from
.prometheus_metrics
import
registry
from
.prometheus_metrics
import
metric_query_success
,
metric_query_failure
,
metric_inf_count
,
metric_query_duration_us
,
metric_inf_duration_us
app
=
Flask
(
__name__
)
app
=
Flask
(
__name__
)
requests_total
=
Counter
(
'c1'
,
'A counter'
)
requests_total
=
Counter
(
'c1'
,
'A counter'
)
@
app
.
route
(
"/metrics/"
)
@
app
.
route
(
"/metrics/"
)
def
requests_count
():
def
requests_count
():
requests_total
.
inc
(
1
)
item
=
self
.
_tracer
.
profile_dict
# requests_total.inc(2)
_LOGGER
.
info
(
"metrics: {}"
.
format
(
item
))
# {'uci': {'in': 727.443, 'prep': 0.5525833333333333, 'midp': 2.21375, 'postp': 1.32375, 'out': 0.9396666666666667}, 'DAG': {'call_0': 29.479, 'call_1': 8.176, 'call_2': 8.045, 'call_3': 7.988, 'call_4': 7.609, 'call_5': 7.629, 'call_6': 7.625, 'call_7': 8.32, 'call_8': 8.57, 'call_9': 8.055, 'call_10': 7.915, 'call_11': 7.873, 'query_count': 12, 'qps': 1.2, 'succ': 1.0, 'avg': 9.773666666666667, '50': 8.045, '60': 8.055, '70': 8.176, '80': 8.32, '90': 8.57, '95': 29.479, '99': 29.479}}
if
"DAG"
in
item
:
total
=
item
[
"DAG"
][
"query_count"
]
succ
=
total
*
item
[
"DAG"
][
"succ"
]
fail
=
total
*
(
1
-
item
[
"DAG"
][
"succ"
])
inf_cnt
=
total
query_duration
=
total
*
item
[
"DAG"
][
"avg"
]
metric_query_success
.
_value
.
set
(
succ
)
metric_query_failure
.
_value
.
set
(
fail
)
metric_inf_count
.
_value
.
set
(
total
)
metric_query_duration_us
.
_value
.
set
(
query_duration
)
#return str(item)
return
Response
(
prometheus_client
.
generate_latest
(
registry
),
mimetype
=
"text/plain"
)
return
Response
(
prometheus_client
.
generate_latest
(
registry
),
mimetype
=
"text/plain"
)
def
prom_run
():
def
prom_run
():
app
.
run
(
host
=
"0.0.0.0"
,
port
=
8581
)
app
.
run
(
host
=
"0.0.0.0"
,
port
=
prometheus_port
)
p
=
multiprocessing
.
Process
(
p
=
threading
.
Thread
(
target
=
prom_run
,
target
=
prom_run
,
args
=
())
args
=
())
_LOGGER
.
info
(
"Prometheus Start 2"
)
_LOGGER
.
info
(
"Prometheus Start 2"
)
...
@@ -869,13 +889,14 @@ class DAG(object):
...
@@ -869,13 +889,14 @@ class DAG(object):
for
op
in
self
.
_actual_ops
:
for
op
in
self
.
_actual_ops
:
op
.
use_profiler
(
self
.
_use_profile
)
op
.
use_profiler
(
self
.
_use_profile
)
op
.
set_tracer
(
self
.
_tracer
)
op
.
set_tracer
(
self
.
_tracer
)
op
.
set_use_prometheus
(
self
.
_use_prometheus
)
if
self
.
_is_thread_op
:
if
self
.
_is_thread_op
:
self
.
_threads_or_proces
.
extend
(
op
.
start_with_thread
())
self
.
_threads_or_proces
.
extend
(
op
.
start_with_thread
())
else
:
else
:
self
.
_threads_or_proces
.
extend
(
op
.
start_with_process
())
self
.
_threads_or_proces
.
extend
(
op
.
start_with_process
())
_LOGGER
.
info
(
"[DAG] start"
)
_LOGGER
.
info
(
"[DAG] start"
)
_LOGGER
.
info
(
"Prometheus Start 1"
)
_LOGGER
.
info
(
"Prometheus Start 1"
)
self
.
start_prom
()
self
.
start_prom
(
self
.
_prometheus_port
)
# not join yet
# not join yet
return
self
.
_threads_or_proces
return
self
.
_threads_or_proces
...
...
python/pipeline/operator.py
浏览文件 @
49f1d810
...
@@ -349,6 +349,9 @@ class Op(object):
...
@@ -349,6 +349,9 @@ class Op(object):
def
set_tracer
(
self
,
tracer
):
def
set_tracer
(
self
,
tracer
):
self
.
_tracer
=
tracer
self
.
_tracer
=
tracer
def
set_use_prometheus
(
self
,
use_prometheus
):
self
.
_use_prometheus
=
use_prometheus
def
init_client
(
self
,
client_config
,
server_endpoints
):
def
init_client
(
self
,
client_config
,
server_endpoints
):
"""
"""
Initialize the client object. There are three types of clients, brpc,
Initialize the client object. There are three types of clients, brpc,
...
@@ -1403,6 +1406,7 @@ class Op(object):
...
@@ -1403,6 +1406,7 @@ class Op(object):
midped_data_dict
,
err_channeldata_dict
\
midped_data_dict
,
err_channeldata_dict
\
=
self
.
_run_process
(
preped_data_dict
,
op_info_prefix
,
skip_process_dict
,
logid_dict
)
=
self
.
_run_process
(
preped_data_dict
,
op_info_prefix
,
skip_process_dict
,
logid_dict
)
end
=
profiler
.
record
(
"midp#{}_1"
.
format
(
op_info_prefix
))
end
=
profiler
.
record
(
"midp#{}_1"
.
format
(
op_info_prefix
))
_LOGGER
.
info
(
"prometheus inf count +1"
)
midp_time
=
end
-
start
midp_time
=
end
-
start
_LOGGER
.
debug
(
"op:{} process_end:{}, cost:{}"
.
format
(
_LOGGER
.
debug
(
"op:{} process_end:{}, cost:{}"
.
format
(
op_info_prefix
,
time
.
time
(),
midp_time
))
op_info_prefix
,
time
.
time
(),
midp_time
))
...
...
python/pipeline/profiler.py
浏览文件 @
49f1d810
...
@@ -49,13 +49,17 @@ class PerformanceTracer(object):
...
@@ -49,13 +49,17 @@ class PerformanceTracer(object):
self
.
_channels
=
[]
self
.
_channels
=
[]
# The size of data in Channel will not exceed server_worker_num
# The size of data in Channel will not exceed server_worker_num
self
.
_server_worker_num
=
server_worker_num
self
.
_server_worker_num
=
server_worker_num
if
_is_profile
:
self
.
profile_dict
=
{}
self
.
profile_dict
=
{}
def
data_buffer
(
self
):
def
data_buffer
(
self
):
return
self
.
_data_buffer
return
self
.
_data_buffer
def
start
(
self
):
def
start
(
self
):
self
.
_thrd
=
threading
.
Thread
(
target
=
self
.
_trace_func
,
args
=
(
self
.
_channels
,
))
self
.
_thrd
.
daemon
=
True
self
.
_thrd
.
start
()
"""
if self._is_thread_mode:
if self._is_thread_mode:
self._thrd = threading.Thread(
self._thrd = threading.Thread(
target=self._trace_func, args=(self._channels, ))
target=self._trace_func, args=(self._channels, ))
...
@@ -66,6 +70,7 @@ class PerformanceTracer(object):
...
@@ -66,6 +70,7 @@ class PerformanceTracer(object):
target=self._trace_func, args=(self._channels, ))
target=self._trace_func, args=(self._channels, ))
self._proc.daemon = True
self._proc.daemon = True
self._proc.start()
self._proc.start()
"""
def
set_channels
(
self
,
channels
):
def
set_channels
(
self
,
channels
):
self
.
_channels
=
channels
self
.
_channels
=
channels
...
@@ -121,8 +126,7 @@ class PerformanceTracer(object):
...
@@ -121,8 +126,7 @@ class PerformanceTracer(object):
calcu_cost
+=
op_cost
[
name
][
action
]
calcu_cost
+=
op_cost
[
name
][
action
]
_LOGGER
.
info
(
"
\t
idle[{}]"
.
format
(
1
-
1.0
*
calcu_cost
/
_LOGGER
.
info
(
"
\t
idle[{}]"
.
format
(
1
-
1.0
*
calcu_cost
/
tot_cost
))
tot_cost
))
if
_is_profile
:
self
.
profile_dict
=
copy
.
deepcopy
(
op_cost
)
self
.
profile_dict
=
copy
.
deepcopy
(
op_cost
)
if
"DAG"
in
op_cost
:
if
"DAG"
in
op_cost
:
calls
=
list
(
op_cost
[
"DAG"
].
values
())
calls
=
list
(
op_cost
[
"DAG"
].
values
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录