Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
dc7359f9
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
dc7359f9
编写于
8月 06, 2019
作者:
K
kun yu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add temperature and pcpu
Former-commit-id: fbf75e467368466833d64f768563c83ecce7dda3
上级
33fa75b3
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
83 addition
and
17 deletion
+83
-17
cpp/CHANGELOG.md
cpp/CHANGELOG.md
+2
-0
cpp/src/db/DBImpl.cpp
cpp/src/db/DBImpl.cpp
+2
-1
cpp/src/metrics/MetricBase.h
cpp/src/metrics/MetricBase.h
+2
-0
cpp/src/metrics/PrometheusMetrics.cpp
cpp/src/metrics/PrometheusMetrics.cpp
+26
-9
cpp/src/metrics/PrometheusMetrics.h
cpp/src/metrics/PrometheusMetrics.h
+13
-2
cpp/src/metrics/SystemInfo.cpp
cpp/src/metrics/SystemInfo.cpp
+34
-4
cpp/src/metrics/SystemInfo.h
cpp/src/metrics/SystemInfo.h
+4
-1
未找到文件。
cpp/CHANGELOG.md
浏览文件 @
dc7359f9
...
...
@@ -40,6 +40,8 @@ Please mark all change in change log and use the ticket from JIRA.
-
MS-266 - Improve topk reduce time by using multi-threads
-
MS-275 - Avoid sqlite logic error excetion
-
MS-278 - add IndexStatsHelper
-
MS-305 - add CPU core percent metric
-
MS-310 - add milvus CPU utilization ratio and CPU/GPU temperature metrics
## New Feature
-
MS-180 - Add new mem manager
...
...
cpp/src/db/DBImpl.cpp
浏览文件 @
dc7359f9
...
...
@@ -319,7 +319,8 @@ void DBImpl::StartMetricTask() {
server
::
Metrics
::
GetInstance
().
OctetsSet
();
server
::
Metrics
::
GetInstance
().
CPUCoreUsagePercentSet
();
server
::
Metrics
::
GetInstance
().
GPUTemperature
();
server
::
Metrics
::
GetInstance
().
CPUTemperature
();
ENGINE_LOG_TRACE
<<
"Metric task finished"
;
}
...
...
cpp/src/metrics/MetricBase.h
浏览文件 @
dc7359f9
...
...
@@ -66,6 +66,8 @@ class MetricsBase{
virtual
void
OctetsSet
()
{};
virtual
void
CPUCoreUsagePercentSet
()
{};
virtual
void
GPUTemperature
()
{};
virtual
void
CPUTemperature
()
{};
};
...
...
cpp/src/metrics/PrometheusMetrics.cpp
浏览文件 @
dc7359f9
...
...
@@ -34,8 +34,6 @@ PrometheusMetrics::Init() {
return
SERVER_UNEXPECTED_ERROR
;
}
//
return
SERVER_SUCCESS
;
}
...
...
@@ -44,8 +42,6 @@ PrometheusMetrics::Init() {
void
PrometheusMetrics
::
CPUUsagePercentSet
()
{
if
(
!
startup_
)
return
;
int
numProcessor
=
server
::
SystemInfo
::
GetInstance
().
num_processor
();
double
usage_percent
=
server
::
SystemInfo
::
GetInstance
().
CPUPercent
();
CPU_usage_percent_
.
Set
(
usage_percent
);
}
...
...
@@ -64,13 +60,11 @@ PrometheusMetrics::GPUPercentGaugeSet() {
std
::
vector
<
unsigned
long
long
>
used_total
=
server
::
SystemInfo
::
GetInstance
().
GPUMemoryTotal
();
std
::
vector
<
unsigned
long
long
>
used_memory
=
server
::
SystemInfo
::
GetInstance
().
GPUMemoryUsed
();
for
(
int
i
=
0
;
i
<
numDevice
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numDevice
;
++
i
)
{
prometheus
::
Gauge
&
GPU_percent
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
std
::
to_string
(
i
)}});
double
percent
=
(
double
)
used_memory
[
i
]
/
(
double
)
used_total
[
i
];
GPU_percent
.
Set
(
percent
*
100
);
}
}
void
PrometheusMetrics
::
GPUMemoryUsageGaugeSet
()
{
...
...
@@ -79,7 +73,7 @@ void PrometheusMetrics::GPUMemoryUsageGaugeSet() {
constexpr
unsigned
long
long
MtoB
=
1024
*
1024
;
int
numDevice
=
server
::
SystemInfo
::
GetInstance
().
num_device
();
for
(
int
i
=
0
;
i
<
numDevice
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numDevice
;
++
i
)
{
prometheus
::
Gauge
&
GPU_memory
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
std
::
to_string
(
i
)}});
GPU_memory
.
Set
(
values
[
i
]
/
MtoB
);
}
...
...
@@ -142,12 +136,35 @@ void PrometheusMetrics::CPUCoreUsagePercentSet() {
std
::
vector
<
double
>
cpu_core_percent
=
server
::
SystemInfo
::
GetInstance
().
CPUCorePercent
();
for
(
int
i
=
0
;
i
<
cpu_core_percent
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
cpu_core_percent
.
size
();
++
i
)
{
prometheus
::
Gauge
&
core_percent
=
CPU_
.
Add
({{
"CPU"
,
std
::
to_string
(
i
)}});
core_percent
.
Set
(
cpu_core_percent
[
i
]);
}
}
void
PrometheusMetrics
::
GPUTemperature
()
{
if
(
!
startup_
)
return
;
std
::
vector
<
unsigned
int
>
GPU_temperatures
=
server
::
SystemInfo
::
GetInstance
().
GPUTemperature
();
for
(
int
i
=
0
;
i
<
GPU_temperatures
.
size
();
++
i
)
{
prometheus
::
Gauge
&
gpu_temp
=
GPU_temperature_
.
Add
({{
"GPU"
,
std
::
to_string
(
i
)}});
gpu_temp
.
Set
(
GPU_temperatures
[
i
]);
}
}
void
PrometheusMetrics
::
CPUTemperature
()
{
if
(
!
startup_
)
return
;
std
::
vector
<
float
>
CPU_temperatures
=
server
::
SystemInfo
::
GetInstance
().
CPUTemperature
();
for
(
int
i
=
0
;
i
<
CPU_temperatures
.
size
();
++
i
)
{
prometheus
::
Gauge
&
cpu_temp
=
CPU_temperature_
.
Add
({{
"CPU"
,
std
::
to_string
(
i
)}});
cpu_temp
.
Set
(
CPU_temperatures
[
i
]);
}
}
}
}
...
...
cpp/src/metrics/PrometheusMetrics.h
浏览文件 @
dc7359f9
...
...
@@ -79,7 +79,6 @@ class PrometheusMetrics: public MetricsBase {
void
QueryVectorResponseSummaryObserve
(
double
value
,
int
count
=
1
)
override
{
if
(
startup_
)
for
(
int
i
=
0
;
i
<
count
;
++
i
)
query_vector_response_summary_
.
Observe
(
value
);};
void
QueryVectorResponsePerSecondGaugeSet
(
double
value
)
override
{
if
(
startup_
)
query_vector_response_per_second_gauge_
.
Set
(
value
);};
void
CPUUsagePercentSet
()
override
;
void
CPUCoreUsagePercentSet
()
override
;
void
RAMUsagePercentSet
()
override
;
...
...
@@ -93,6 +92,9 @@ class PrometheusMetrics: public MetricsBase {
void
KeepingAliveCounterIncrement
(
double
value
=
1
)
override
{
if
(
startup_
)
keeping_alive_counter_
.
Increment
(
value
);};
void
OctetsSet
()
override
;
void
GPUTemperature
()
override
;
void
CPUTemperature
()
override
;
...
...
@@ -396,7 +398,7 @@ class PrometheusMetrics: public MetricsBase {
.
Name
(
"CPU_usage_percent"
)
.
Help
(
"CPU usage percent by this this process"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
CPU_usage_percent_
=
CPU_
.
Add
({{
"CPU"
,
"
0
"
}});
prometheus
::
Gauge
&
CPU_usage_percent_
=
CPU_
.
Add
({{
"CPU"
,
"
avg
"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
RAM_
=
prometheus
::
BuildGauge
()
...
...
@@ -444,6 +446,15 @@ class PrometheusMetrics: public MetricsBase {
prometheus
::
Gauge
&
outoctets_gauge_
=
octets_
.
Add
({{
"type"
,
"outoctets"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
GPU_temperature_
=
prometheus
::
BuildGauge
()
.
Name
(
"GPU_temperature"
)
.
Help
(
"GPU temperature"
)
.
Register
(
*
registry_
);
prometheus
::
Family
<
prometheus
::
Gauge
>
&
CPU_temperature_
=
prometheus
::
BuildGauge
()
.
Name
(
"CPU_temperature"
)
.
Help
(
"CPU temperature"
)
.
Register
(
*
registry_
);
};
...
...
cpp/src/metrics/SystemInfo.cpp
浏览文件 @
dc7359f9
...
...
@@ -36,6 +36,9 @@ void SystemInfo::Init() {
num_processors_
=
0
;
while
(
fgets
(
line
,
128
,
file
)
!=
NULL
){
if
(
strncmp
(
line
,
"processor"
,
9
)
==
0
)
num_processors_
++
;
if
(
strncmp
(
line
,
"physical"
,
8
)
==
0
)
{
num_physical_processors_
=
ParseLine
(
line
);
}
}
total_ram_
=
GetPhysicalMemory
();
fclose
(
file
);
...
...
@@ -108,8 +111,6 @@ SystemInfo::MemoryPercent() {
return
(
double
)(
GetProcessUsedMemory
()
*
100
)
/
(
double
)
total_ram_
;
}
std
::
vector
<
double
>
SystemInfo
::
CPUCorePercent
()
{
std
::
vector
<
unsigned
long
long
>
prev_work_time_array
;
...
...
@@ -119,7 +120,7 @@ SystemInfo::CPUCorePercent() {
std
::
vector
<
unsigned
long
long
>
cur_total_time_array
=
getTotalCpuTime
(
cur_work_time_array
);
std
::
vector
<
double
>
cpu_core_percent
;
for
(
int
i
=
0
;
i
<
num_processors_
;
i
++
)
{
for
(
int
i
=
1
;
i
<
num_processors_
;
i
++
)
{
double
total_cpu_time
=
cur_total_time_array
[
i
]
-
prev_total_time_array
[
i
];
double
cpu_work_time
=
cur_work_time_array
[
i
]
-
prev_work_time_array
[
i
];
cpu_core_percent
.
push_back
((
cpu_work_time
/
total_cpu_time
)
*
100
);
...
...
@@ -181,7 +182,6 @@ SystemInfo::CPUPercent() {
percent
=
(
time_sample
.
tms_stime
-
last_sys_cpu_
)
+
(
time_sample
.
tms_utime
-
last_user_cpu_
);
percent
/=
(
now
-
last_cpu_
);
percent
/=
num_processors_
;
percent
*=
100
;
}
last_cpu_
=
now
;
...
...
@@ -207,6 +207,36 @@ SystemInfo::GPUMemoryTotal() {
return
result
;
}
std
::
vector
<
unsigned
int
>
SystemInfo
::
GPUTemperature
(){
if
(
!
initialized_
)
Init
();
std
::
vector
<
unsigned
int
>
result
;
for
(
int
i
=
0
;
i
<
num_device_
;
i
++
)
{
nvmlDevice_t
device
;
nvmlDeviceGetHandleByIndex
(
i
,
&
device
);
unsigned
int
temp
;
nvmlDeviceGetTemperature
(
device
,
NVML_TEMPERATURE_GPU
,
&
temp
);
result
.
push_back
(
temp
);
}
return
result
;
}
std
::
vector
<
float
>
SystemInfo
::
CPUTemperature
(){
std
::
vector
<
float
>
result
;
for
(
int
i
=
0
;
i
<=
num_physical_processors_
;
++
i
)
{
std
::
string
path
=
"/sys/class/thermal/thermal_zone"
+
std
::
to_string
(
i
)
+
"/temp"
;
FILE
*
file
=
fopen
(
path
.
data
(),
"r"
);
if
(
file
==
NULL
)
{
perror
(
"Could not open thermal file"
);
return
result
;
}
float
temp
;
fscanf
(
file
,
"%f"
,
&
temp
);
result
.
push_back
(
temp
/
1000
);
}
}
std
::
vector
<
unsigned
long
long
>
SystemInfo
::
GPUMemoryUsed
()
{
// get GPU memory used
...
...
cpp/src/metrics/SystemInfo.h
浏览文件 @
dc7359f9
...
...
@@ -32,6 +32,7 @@ class SystemInfo {
clock_t
last_user_cpu_
=
clock_t
();
std
::
chrono
::
system_clock
::
time_point
net_time_
=
std
::
chrono
::
system_clock
::
now
();
int
num_processors_
=
0
;
int
num_physical_processors_
=
0
;
//number of GPU
unsigned
int
num_device_
=
0
;
unsigned
long
long
in_octets_
=
0
;
...
...
@@ -47,6 +48,7 @@ class SystemInfo {
void
Init
();
int
num_processor
()
const
{
return
num_processors_
;};
int
num_physical_processors
()
const
{
return
num_physical_processors_
;
};
int
num_device
()
const
{
return
num_device_
;};
unsigned
long
long
get_inoctets
()
{
return
in_octets_
;};
unsigned
long
long
get_octets
()
{
return
out_octets_
;};
...
...
@@ -65,7 +67,8 @@ class SystemInfo {
std
::
vector
<
double
>
CPUCorePercent
();
std
::
vector
<
unsigned
long
long
>
getTotalCpuTime
(
std
::
vector
<
unsigned
long
long
>
&
workTime
);
std
::
vector
<
unsigned
int
>
GPUTemperature
();
std
::
vector
<
float
>
CPUTemperature
();
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录