Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
79ea7ecb
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
79ea7ecb
编写于
8月 07, 2019
作者:
J
jinhai
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'branch-0.3.1' into branch-0.4.0
Former-commit-id: b479db2044d96bd9cd84e3ed6d5a33c8df2442a6
上级
9edd391f
2de15145
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
140 addition
and
27 deletion
+140
-27
ci/jenkinsfile/dev_test.groovy
ci/jenkinsfile/dev_test.groovy
+1
-1
cpp/CHANGELOG.md
cpp/CHANGELOG.md
+5
-1
cpp/conf/server_config.template
cpp/conf/server_config.template
+2
-1
cpp/src/db/DBImpl.cpp
cpp/src/db/DBImpl.cpp
+39
-7
cpp/src/db/DBMetaImpl.cpp
cpp/src/db/DBMetaImpl.cpp
+1
-1
cpp/src/metrics/MetricBase.h
cpp/src/metrics/MetricBase.h
+2
-0
cpp/src/metrics/PrometheusMetrics.cpp
cpp/src/metrics/PrometheusMetrics.cpp
+26
-9
cpp/src/metrics/PrometheusMetrics.h
cpp/src/metrics/PrometheusMetrics.h
+13
-2
cpp/src/metrics/SystemInfo.cpp
cpp/src/metrics/SystemInfo.cpp
+34
-4
cpp/src/metrics/SystemInfo.h
cpp/src/metrics/SystemInfo.h
+4
-1
cpp/src/server/DBWrapper.cpp
cpp/src/server/DBWrapper.cpp
+12
-0
cpp/src/server/ServerConfig.h
cpp/src/server/ServerConfig.h
+1
-0
未找到文件。
ci/jenkinsfile/dev_test.groovy
浏览文件 @
79ea7ecb
timeout
(
time:
3
0
,
unit:
'MINUTES'
)
{
timeout
(
time:
4
0
,
unit:
'MINUTES'
)
{
try
{
dir
(
"${PROJECT_NAME}_test"
)
{
checkout
([
$class
:
'GitSCM'
,
branches:
[[
name:
"${SEMVER}"
]],
doGenerateSubmoduleConfigurations:
false
,
extensions:
[],
submoduleCfg:
[],
userRemoteConfigs:
[[
credentialsId:
"${params.GIT_USER}"
,
url:
"git@192.168.1.105:Test/milvus_test.git"
,
name:
'origin'
,
refspec:
"+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"
]]])
...
...
cpp/CHANGELOG.md
浏览文件 @
79ea7ecb
...
...
@@ -50,7 +50,11 @@ Please mark all change in change log and use the ticket from JIRA.
-
MS-261 - Update faiss version to 1.5.3 and add BUILD_FAISS_WITH_MKL as an option
-
MS-266 - Improve topk reduce time by using multi-threads
-
MS-275 - Avoid sqlite logic error excetion
-
MS-278 - add IndexStatsHelper
-
MS-278 - Add IndexStatsHelper
-
MS-312 - Set openmp thread number by config
-
MS-305 - Add CPU core percent metric
-
MS-310 - Add milvus CPU utilization ratio and CPU/GPU temperature metrics
-
MS-324 - Show error when there is not enough gpu memory to build index
## New Feature
-
MS-180 - Add new mem manager
...
...
cpp/conf/server_config.template
浏览文件 @
79ea7ecb
...
...
@@ -43,4 +43,5 @@ engine_config:
nprobe: 10
nlist: 16384
use_blas_threshold: 20
metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP
metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP
omp_thread_num: 0 # how many compute threads be used by engine, 0 means use all cpu core to compute
cpp/src/db/DBImpl.cpp
浏览文件 @
79ea7ecb
...
...
@@ -326,7 +326,8 @@ void DBImpl::StartMetricTask() {
server
::
Metrics
::
GetInstance
().
OctetsSet
();
server
::
Metrics
::
GetInstance
().
CPUCoreUsagePercentSet
();
server
::
Metrics
::
GetInstance
().
GPUTemperature
();
server
::
Metrics
::
GetInstance
().
CPUTemperature
();
ENGINE_LOG_TRACE
<<
"Metric task finished"
;
}
...
...
@@ -541,11 +542,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
}
//step 3: build index
auto
start_time
=
METRICS_NOW_TIME
;
auto
index
=
to_index
->
BuildIndex
(
table_file
.
location_
);
auto
end_time
=
METRICS_NOW_TIME
;
auto
total_time
=
METRICS_MICROSECONDS
(
start_time
,
end_time
);
server
::
Metrics
::
GetInstance
().
BuildIndexDurationSecondsHistogramObserve
(
total_time
);
std
::
shared_ptr
<
ExecutionEngine
>
index
;
try
{
auto
start_time
=
METRICS_NOW_TIME
;
index
=
to_index
->
BuildIndex
(
table_file
.
location_
);
auto
end_time
=
METRICS_NOW_TIME
;
auto
total_time
=
METRICS_MICROSECONDS
(
start_time
,
end_time
);
server
::
Metrics
::
GetInstance
().
BuildIndexDurationSecondsHistogramObserve
(
total_time
);
}
catch
(
std
::
exception
&
ex
)
{
//typical error: out of gpu memory
std
::
string
msg
=
"BuildIndex encounter exception"
+
std
::
string
(
ex
.
what
());
ENGINE_LOG_ERROR
<<
msg
;
table_file
.
file_type_
=
meta
::
TableFileSchema
::
TO_DELETE
;
status
=
meta_ptr_
->
UpdateTableFile
(
table_file
);
ENGINE_LOG_DEBUG
<<
"Failed to update file to index, mark file: "
<<
table_file
.
file_id_
<<
" to to_delete"
;
std
::
cout
<<
"ERROR: failed to build index, index file is too large or gpu memory is not enough"
<<
std
::
endl
;
return
Status
::
Error
(
msg
);
}
//step 4: if table has been deleted, dont save index file
bool
has_table
=
false
;
...
...
@@ -556,7 +573,22 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
}
//step 5: save index file
index
->
Serialize
();
try
{
index
->
Serialize
();
}
catch
(
std
::
exception
&
ex
)
{
//typical error: out of disk space or permition denied
std
::
string
msg
=
"Serialize index encounter exception"
+
std
::
string
(
ex
.
what
());
ENGINE_LOG_ERROR
<<
msg
;
table_file
.
file_type_
=
meta
::
TableFileSchema
::
TO_DELETE
;
status
=
meta_ptr_
->
UpdateTableFile
(
table_file
);
ENGINE_LOG_DEBUG
<<
"Failed to update file to index, mark file: "
<<
table_file
.
file_id_
<<
" to to_delete"
;
std
::
cout
<<
"ERROR: failed to persist index file: "
<<
table_file
.
location_
<<
", possible out of disk space"
<<
std
::
endl
;
return
Status
::
Error
(
msg
);
}
//step 6: update meta
table_file
.
file_type_
=
meta
::
TableFileSchema
::
INDEX
;
...
...
cpp/src/db/DBMetaImpl.cpp
浏览文件 @
79ea7ecb
...
...
@@ -1005,7 +1005,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
table_file
.
date_
=
std
::
get
<
3
>
(
file
);
utils
::
DeleteTableFilePath
(
options_
,
table_file
);
ENGINE_LOG_DEBUG
<<
"Removing file id:"
<<
table_file
.
id_
<<
" location:"
<<
table_file
.
location_
;
ENGINE_LOG_DEBUG
<<
"Removing file id:"
<<
table_file
.
file_
id_
<<
" location:"
<<
table_file
.
location_
;
ConnectorPtr
->
remove
<
TableFileSchema
>
(
table_file
.
id_
);
}
...
...
cpp/src/metrics/MetricBase.h
浏览文件 @
79ea7ecb
...
...
@@ -66,6 +66,8 @@ class MetricsBase{
virtual
void
OctetsSet
()
{};
virtual
void
CPUCoreUsagePercentSet
()
{};
virtual
void
GPUTemperature
()
{};
virtual
void
CPUTemperature
()
{};
};
...
...
cpp/src/metrics/PrometheusMetrics.cpp
浏览文件 @
79ea7ecb
...
...
@@ -34,8 +34,6 @@ PrometheusMetrics::Init() {
return
SERVER_UNEXPECTED_ERROR
;
}
//
return
SERVER_SUCCESS
;
}
...
...
@@ -44,8 +42,6 @@ PrometheusMetrics::Init() {
void
PrometheusMetrics
::
CPUUsagePercentSet
()
{
if
(
!
startup_
)
return
;
int
numProcessor
=
server
::
SystemInfo
::
GetInstance
().
num_processor
();
double
usage_percent
=
server
::
SystemInfo
::
GetInstance
().
CPUPercent
();
CPU_usage_percent_
.
Set
(
usage_percent
);
}
...
...
@@ -64,13 +60,11 @@ PrometheusMetrics::GPUPercentGaugeSet() {
std
::
vector
<
unsigned
long
long
>
used_total
=
server
::
SystemInfo
::
GetInstance
().
GPUMemoryTotal
();
std
::
vector
<
unsigned
long
long
>
used_memory
=
server
::
SystemInfo
::
GetInstance
().
GPUMemoryUsed
();
for
(
int
i
=
0
;
i
<
numDevice
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numDevice
;
++
i
)
{
prometheus
::
Gauge
&
GPU_percent
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
std
::
to_string
(
i
)}});
double
percent
=
(
double
)
used_memory
[
i
]
/
(
double
)
used_total
[
i
];
GPU_percent
.
Set
(
percent
*
100
);
}
}
void
PrometheusMetrics
::
GPUMemoryUsageGaugeSet
()
{
...
...
@@ -79,7 +73,7 @@ void PrometheusMetrics::GPUMemoryUsageGaugeSet() {
constexpr
unsigned
long
long
MtoB
=
1024
*
1024
;
int
numDevice
=
server
::
SystemInfo
::
GetInstance
().
num_device
();
for
(
int
i
=
0
;
i
<
numDevice
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numDevice
;
++
i
)
{
prometheus
::
Gauge
&
GPU_memory
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
std
::
to_string
(
i
)}});
GPU_memory
.
Set
(
values
[
i
]
/
MtoB
);
}
...
...
@@ -142,12 +136,35 @@ void PrometheusMetrics::CPUCoreUsagePercentSet() {
std
::
vector
<
double
>
cpu_core_percent
=
server
::
SystemInfo
::
GetInstance
().
CPUCorePercent
();
for
(
int
i
=
0
;
i
<
cpu_core_percent
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
cpu_core_percent
.
size
();
++
i
)
{
prometheus
::
Gauge
&
core_percent
=
CPU_
.
Add
({{
"CPU"
,
std
::
to_string
(
i
)}});
core_percent
.
Set
(
cpu_core_percent
[
i
]);
}
}
void
PrometheusMetrics
::
GPUTemperature
()
{
if
(
!
startup_
)
return
;
std
::
vector
<
unsigned
int
>
GPU_temperatures
=
server
::
SystemInfo
::
GetInstance
().
GPUTemperature
();
for
(
int
i
=
0
;
i
<
GPU_temperatures
.
size
();
++
i
)
{
prometheus
::
Gauge
&
gpu_temp
=
GPU_temperature_
.
Add
({{
"GPU"
,
std
::
to_string
(
i
)}});
gpu_temp
.
Set
(
GPU_temperatures
[
i
]);
}
}
void
PrometheusMetrics
::
CPUTemperature
()
{
if
(
!
startup_
)
return
;
std
::
vector
<
float
>
CPU_temperatures
=
server
::
SystemInfo
::
GetInstance
().
CPUTemperature
();
for
(
int
i
=
0
;
i
<
CPU_temperatures
.
size
();
++
i
)
{
prometheus
::
Gauge
&
cpu_temp
=
CPU_temperature_
.
Add
({{
"CPU"
,
std
::
to_string
(
i
)}});
cpu_temp
.
Set
(
CPU_temperatures
[
i
]);
}
}
}
}
...
...
cpp/src/metrics/PrometheusMetrics.h
浏览文件 @
79ea7ecb
...
...
@@ -79,7 +79,6 @@ class PrometheusMetrics: public MetricsBase {
void
QueryVectorResponseSummaryObserve
(
double
value
,
int
count
=
1
)
override
{
if
(
startup_
)
for
(
int
i
=
0
;
i
<
count
;
++
i
)
query_vector_response_summary_
.
Observe
(
value
);};
void
QueryVectorResponsePerSecondGaugeSet
(
double
value
)
override
{
if
(
startup_
)
query_vector_response_per_second_gauge_
.
Set
(
value
);};
void
CPUUsagePercentSet
()
override
;
void
CPUCoreUsagePercentSet
()
override
;
void
RAMUsagePercentSet
()
override
;
...
...
@@ -93,6 +92,9 @@ class PrometheusMetrics: public MetricsBase {
void
KeepingAliveCounterIncrement
(
double
value
=
1
)
override
{
if
(
startup_
)
keeping_alive_counter_
.
Increment
(
value
);};
void
OctetsSet
()
override
;
void
GPUTemperature
()
override
;
void
CPUTemperature
()
override
;
...
...
@@ -396,7 +398,7 @@ class PrometheusMetrics: public MetricsBase {
.
Name
(
"CPU_usage_percent"
)
.
Help
(
"CPU usage percent by this this process"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
CPU_usage_percent_
=
CPU_
.
Add
({{
"CPU"
,
"
0
"
}});
prometheus
::
Gauge
&
CPU_usage_percent_
=
CPU_
.
Add
({{
"CPU"
,
"
avg
"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
RAM_
=
prometheus
::
BuildGauge
()
...
...
@@ -444,6 +446,15 @@ class PrometheusMetrics: public MetricsBase {
prometheus
::
Gauge
&
outoctets_gauge_
=
octets_
.
Add
({{
"type"
,
"outoctets"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
GPU_temperature_
=
prometheus
::
BuildGauge
()
.
Name
(
"GPU_temperature"
)
.
Help
(
"GPU temperature"
)
.
Register
(
*
registry_
);
prometheus
::
Family
<
prometheus
::
Gauge
>
&
CPU_temperature_
=
prometheus
::
BuildGauge
()
.
Name
(
"CPU_temperature"
)
.
Help
(
"CPU temperature"
)
.
Register
(
*
registry_
);
};
...
...
cpp/src/metrics/SystemInfo.cpp
浏览文件 @
79ea7ecb
...
...
@@ -36,6 +36,9 @@ void SystemInfo::Init() {
num_processors_
=
0
;
while
(
fgets
(
line
,
128
,
file
)
!=
NULL
){
if
(
strncmp
(
line
,
"processor"
,
9
)
==
0
)
num_processors_
++
;
if
(
strncmp
(
line
,
"physical"
,
8
)
==
0
)
{
num_physical_processors_
=
ParseLine
(
line
);
}
}
total_ram_
=
GetPhysicalMemory
();
fclose
(
file
);
...
...
@@ -108,8 +111,6 @@ SystemInfo::MemoryPercent() {
return
(
double
)(
GetProcessUsedMemory
()
*
100
)
/
(
double
)
total_ram_
;
}
std
::
vector
<
double
>
SystemInfo
::
CPUCorePercent
()
{
std
::
vector
<
unsigned
long
long
>
prev_work_time_array
;
...
...
@@ -119,7 +120,7 @@ SystemInfo::CPUCorePercent() {
std
::
vector
<
unsigned
long
long
>
cur_total_time_array
=
getTotalCpuTime
(
cur_work_time_array
);
std
::
vector
<
double
>
cpu_core_percent
;
for
(
int
i
=
0
;
i
<
num_processors_
;
i
++
)
{
for
(
int
i
=
1
;
i
<
num_processors_
;
i
++
)
{
double
total_cpu_time
=
cur_total_time_array
[
i
]
-
prev_total_time_array
[
i
];
double
cpu_work_time
=
cur_work_time_array
[
i
]
-
prev_work_time_array
[
i
];
cpu_core_percent
.
push_back
((
cpu_work_time
/
total_cpu_time
)
*
100
);
...
...
@@ -181,7 +182,6 @@ SystemInfo::CPUPercent() {
percent
=
(
time_sample
.
tms_stime
-
last_sys_cpu_
)
+
(
time_sample
.
tms_utime
-
last_user_cpu_
);
percent
/=
(
now
-
last_cpu_
);
percent
/=
num_processors_
;
percent
*=
100
;
}
last_cpu_
=
now
;
...
...
@@ -207,6 +207,36 @@ SystemInfo::GPUMemoryTotal() {
return
result
;
}
std
::
vector
<
unsigned
int
>
SystemInfo
::
GPUTemperature
(){
if
(
!
initialized_
)
Init
();
std
::
vector
<
unsigned
int
>
result
;
for
(
int
i
=
0
;
i
<
num_device_
;
i
++
)
{
nvmlDevice_t
device
;
nvmlDeviceGetHandleByIndex
(
i
,
&
device
);
unsigned
int
temp
;
nvmlDeviceGetTemperature
(
device
,
NVML_TEMPERATURE_GPU
,
&
temp
);
result
.
push_back
(
temp
);
}
return
result
;
}
std
::
vector
<
float
>
SystemInfo
::
CPUTemperature
(){
std
::
vector
<
float
>
result
;
for
(
int
i
=
0
;
i
<=
num_physical_processors_
;
++
i
)
{
std
::
string
path
=
"/sys/class/thermal/thermal_zone"
+
std
::
to_string
(
i
)
+
"/temp"
;
FILE
*
file
=
fopen
(
path
.
data
(),
"r"
);
if
(
file
==
NULL
)
{
perror
(
"Could not open thermal file"
);
return
result
;
}
float
temp
;
fscanf
(
file
,
"%f"
,
&
temp
);
result
.
push_back
(
temp
/
1000
);
}
}
std
::
vector
<
unsigned
long
long
>
SystemInfo
::
GPUMemoryUsed
()
{
// get GPU memory used
...
...
cpp/src/metrics/SystemInfo.h
浏览文件 @
79ea7ecb
...
...
@@ -32,6 +32,7 @@ class SystemInfo {
clock_t
last_user_cpu_
=
clock_t
();
std
::
chrono
::
system_clock
::
time_point
net_time_
=
std
::
chrono
::
system_clock
::
now
();
int
num_processors_
=
0
;
int
num_physical_processors_
=
0
;
//number of GPU
unsigned
int
num_device_
=
0
;
unsigned
long
long
in_octets_
=
0
;
...
...
@@ -47,6 +48,7 @@ class SystemInfo {
void
Init
();
int
num_processor
()
const
{
return
num_processors_
;};
int
num_physical_processors
()
const
{
return
num_physical_processors_
;
};
int
num_device
()
const
{
return
num_device_
;};
unsigned
long
long
get_inoctets
()
{
return
in_octets_
;};
unsigned
long
long
get_octets
()
{
return
out_octets_
;};
...
...
@@ -65,7 +67,8 @@ class SystemInfo {
std
::
vector
<
double
>
CPUCorePercent
();
std
::
vector
<
unsigned
long
long
>
getTotalCpuTime
(
std
::
vector
<
unsigned
long
long
>
&
workTime
);
std
::
vector
<
unsigned
int
>
GPUTemperature
();
std
::
vector
<
float
>
CPUTemperature
();
};
...
...
cpp/src/server/DBWrapper.cpp
浏览文件 @
79ea7ecb
...
...
@@ -10,11 +10,14 @@
#include "utils/Log.h"
#include "utils/StringHelpFunctions.h"
#include <omp.h>
namespace
zilliz
{
namespace
milvus
{
namespace
server
{
DBWrapper
::
DBWrapper
()
{
//db config
zilliz
::
milvus
::
engine
::
Options
opt
;
ConfigNode
&
db_config
=
ServerConfig
::
GetInstance
().
GetConfig
(
CONFIG_DB
);
opt
.
meta
.
backend_uri
=
db_config
.
GetValue
(
CONFIG_DB_URL
);
...
...
@@ -37,6 +40,7 @@ DBWrapper::DBWrapper() {
kill
(
0
,
SIGUSR1
);
}
// cache config
ConfigNode
&
cache_config
=
ServerConfig
::
GetInstance
().
GetConfig
(
CONFIG_CACHE
);
opt
.
insert_cache_immediately_
=
cache_config
.
GetBoolValue
(
CONFIG_INSERT_CACHE_IMMEDIATELY
,
false
);
...
...
@@ -56,6 +60,14 @@ DBWrapper::DBWrapper() {
kill
(
0
,
SIGUSR1
);
}
// engine config
ConfigNode
&
engine_config
=
ServerConfig
::
GetInstance
().
GetConfig
(
CONFIG_ENGINE
);
int32_t
omp_thread
=
engine_config
.
GetInt32Value
(
CONFIG_OMP_THREAD_NUM
,
0
);
if
(
omp_thread
>
0
)
{
omp_set_num_threads
(
omp_thread
);
SERVER_LOG_DEBUG
<<
"Specify openmp thread number: "
<<
omp_thread
;
}
//set archive config
engine
::
ArchiveConf
::
CriteriaT
criterial
;
int64_t
disk
=
db_config
.
GetInt64Value
(
CONFIG_DB_ARCHIVE_DISK
,
0
);
...
...
cpp/src/server/ServerConfig.h
浏览文件 @
79ea7ecb
...
...
@@ -53,6 +53,7 @@ static const std::string CONFIG_NPROBE = "nprobe";
static
const
std
::
string
CONFIG_NLIST
=
"nlist"
;
static
const
std
::
string
CONFIG_DCBT
=
"use_blas_threshold"
;
static
const
std
::
string
CONFIG_METRICTYPE
=
"metric_type"
;
static
const
std
::
string
CONFIG_OMP_THREAD_NUM
=
"omp_thread_num"
;
class
ServerConfig
{
public:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录