Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
24863f63
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
24863f63
编写于
6月 05, 2019
作者:
Y
yu yunfeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
all metrics are done
Former-commit-id: 68275f6545ac62a28899f4b6bbb687365c52d52e
上级
e9dac752
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
485 addition
and
18 deletion
+485
-18
cpp/src/CMakeLists.txt
cpp/src/CMakeLists.txt
+5
-2
cpp/src/db/DBImpl.inl
cpp/src/db/DBImpl.inl
+12
-5
cpp/src/db/FaissExecutionEngine.inl
cpp/src/db/FaissExecutionEngine.inl
+4
-1
cpp/src/db/MemManager.inl
cpp/src/db/MemManager.inl
+5
-0
cpp/src/metrics/MetricBase.h
cpp/src/metrics/MetricBase.h
+11
-0
cpp/src/metrics/PrometheusMetrics.cpp
cpp/src/metrics/PrometheusMetrics.cpp
+76
-1
cpp/src/metrics/PrometheusMetrics.h
cpp/src/metrics/PrometheusMetrics.h
+86
-4
cpp/src/metrics/SystemInfo.cpp
cpp/src/metrics/SystemInfo.cpp
+204
-0
cpp/src/metrics/SystemInfo.h
cpp/src/metrics/SystemInfo.h
+26
-3
cpp/src/server/MegasearchServer.cpp
cpp/src/server/MegasearchServer.cpp
+2
-1
cpp/src/server/MegasearchThreadPoolServer.cpp
cpp/src/server/MegasearchThreadPoolServer.cpp
+19
-0
cpp/src/server/MegasearchThreadPoolServer.h
cpp/src/server/MegasearchThreadPoolServer.h
+32
-0
cpp/src/server/Server.cpp
cpp/src/server/Server.cpp
+1
-0
cpp/src/thrift/megasearch.thrift
cpp/src/thrift/megasearch.thrift
+1
-1
cpp/unittest/CMakeLists.txt
cpp/unittest/CMakeLists.txt
+1
-0
未找到文件。
cpp/src/CMakeLists.txt
浏览文件 @
24863f63
...
...
@@ -33,7 +33,7 @@ set(service_files
thrift/gen-cpp/MegasearchService.cpp
thrift/gen-cpp/megasearch_constants.cpp
thrift/gen-cpp/megasearch_types.cpp
)
metrics/SystemInfo.cpp metrics/SystemInfo.h server/MegasearchThreadPoolServer.cpp server/MegasearchThreadPoolServer.h
)
set
(
vecwise_engine_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/main.cpp
...
...
@@ -51,6 +51,7 @@ include_directories(/usr/include)
include_directories
(
/usr/local/cuda/include
)
include_directories
(
thrift/gen-cpp
)
#target_link_libraries(megasearch boost_system_static)
#target_link_libraries(megasearch boost_filesystem_static)
#target_link_libraries(megasearch boost_serialization_static)
...
...
@@ -92,6 +93,7 @@ set(third_party_libs
snappy
zlib
zstd
${
CUDA_TOOLKIT_ROOT_DIR
}
/lib64/stubs/libnvidia-ml.so
)
if
(
GPU_VERSION STREQUAL
"ON"
)
...
...
@@ -109,6 +111,7 @@ if (GPU_VERSION STREQUAL "ON")
libprometheus-cpp-push.a
libprometheus-cpp-pull.a
libprometheus-cpp-core.a
${
CUDA_TOOLKIT_ROOT_DIR
}
/lib64/stubs/libnvidia-ml.so
)
else
()
set
(
engine_libs
...
...
@@ -122,6 +125,7 @@ else()
libprometheus-cpp-push.a
libprometheus-cpp-pull.a
libprometheus-cpp-core.a
${
CUDA_TOOLKIT_ROOT_DIR
}
/lib64/stubs/libnvidia-ml.so
)
endif
()
...
...
@@ -175,7 +179,6 @@ set(server_libs
liblz4.a
dl
metrics
)
add_executable
(
vecwise_server
...
...
cpp/src/db/DBImpl.inl
浏览文件 @
24863f63
...
...
@@ -63,7 +63,7 @@ Status DBImpl<EngineT>::InsertVectors(const std::string& table_id_,
// double average_time = double(time_span.count()) / n;
double total_time = METRICS_MICROSECONDS(start_time,end_time);
double avg_time = total_time /
n
;
double avg_time = total_time /
double(n)
;
for (int i = 0; i < n; ++i) {
server::Metrics::GetInstance().AddVectorsDurationHistogramOberve(avg_time);
}
...
...
@@ -85,13 +85,14 @@ Status DBImpl<EngineT>::Query(const std::string &table_id, size_t k, size_t nq,
meta::DatesT dates = {meta::Meta::GetDate()};
Status result = Query(table_id, k, nq, vectors, dates, results);
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time,end_time);
auto total_time = METRICS_MICROSECONDS(start_time,
end_time);
auto average_time = total_time / nq;
for (int i = 0; i < nq; ++i) {
server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time);
}
server::Metrics::GetInstance().QueryVectorResponseSummaryObserve(average_time, nq);
server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time);
server::Metrics::GetInstance().QueryResponsePerSecondGaugeSet(1.0 / total_time);
return result;
}
...
...
@@ -256,17 +257,23 @@ void DBImpl<EngineT>::StartTimerTasks(int interval) {
template<typename EngineT>
void DBImpl<EngineT>::BackgroundTimerTask(int interval) {
Status status;
server::SystemInfo::GetInstance().Init();
while (true) {
if (!bg_error_.ok()) break;
if (shutting_down_.load(std::memory_order_acquire)) break;
std::this_thread::sleep_for(std::chrono::seconds(interval));
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheUsage();
LOG(DEBUG) << "Cache usage " << cache_total;
server::Metrics::GetInstance().CacheUsageGaugeSet(static_cast<double>(cache_total));
server::Metrics::GetInstance().KeepingAliveCounterIncrement(interval);
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total);
long size;
Size(size);
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
server::Metrics::GetInstance().CPUUsagePercentSet();
server::Metrics::GetInstance().RAMUsagePercentSet();
server::Metrics::GetInstance().GPUPercentGaugeSet();
server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
TrySchedule();
}
}
...
...
cpp/src/db/FaissExecutionEngine.inl
浏览文件 @
24863f63
...
...
@@ -130,8 +130,11 @@ Status FaissExecutionEngine<IndexTrait>::Search(long n,
long k,
float *distances,
long *labels) const {
auto start_time = METRICS_NOW_TIME;
pIndex_->search(n, data, k, distances, labels);
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time,end_time);
server::Metrics::GetInstance().QueryIndexTypePerSecondSet(IndexTrait::BuildIndexType, double(n)/double(total_time));
return Status::OK();
}
...
...
cpp/src/db/MemManager.inl
浏览文件 @
24863f63
...
...
@@ -31,8 +31,12 @@ MemVectors<EngineT>::MemVectors(const std::shared_ptr<meta::Meta>& meta_ptr,
template<typename EngineT>
void MemVectors<EngineT>::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
auto start_time = METRICS_NOW_TIME;
pIdGenerator_->GetNextIDNumbers(n_, vector_ids_);
pEE_->AddWithIds(n_, vectors_, vector_ids_.data());
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_), static_cast<int>(schema_.dimension), total_time);
}
template<typename EngineT>
...
...
@@ -107,6 +111,7 @@ Status MemManager<EngineT>::InsertVectors(const std::string& table_id_,
const float* vectors_,
IDNumbers& vector_ids_) {
std::unique_lock<std::mutex> lock(mutex_);
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
}
...
...
cpp/src/metrics/MetricBase.h
浏览文件 @
24863f63
...
...
@@ -8,6 +8,7 @@
#include "utils/Error.h"
#include "server/ServerConfig.h"
#include "SystemInfo.h"
namespace
zilliz
{
namespace
vecwise
{
...
...
@@ -71,6 +72,16 @@ class MetricsBase{
virtual
void
AddVectorsFailGaugeSet
(
double
value
)
{};
virtual
void
QueryVectorResponseSummaryObserve
(
double
value
,
int
count
=
1
)
{};
virtual
void
QueryVectorResponsePerSecondGaugeSet
(
double
value
)
{};
virtual
void
CPUUsagePercentSet
()
{};
virtual
void
RAMUsagePercentSet
()
{};
virtual
void
QueryResponsePerSecondGaugeSet
(
double
value
)
{};
virtual
void
GPUPercentGaugeSet
()
{};
virtual
void
GPUMemoryUsageGaugeSet
()
{};
virtual
void
AddVectorsPerSecondGaugeSet
(
int
num_vector
,
int
dim
,
double
time
)
{};
virtual
void
QueryIndexTypePerSecondSet
(
std
::
string
type
,
double
value
)
{};
virtual
void
ConnectionGaugeIncrement
()
{};
virtual
void
ConnectionGaugeDecrement
()
{};
virtual
void
KeepingAliveCounterIncrement
(
double
value
=
1
)
{};
};
...
...
cpp/src/metrics/PrometheusMetrics.cpp
浏览文件 @
24863f63
...
...
@@ -5,6 +5,7 @@
******************************************************************************/
#include "PrometheusMetrics.h"
#include "SystemInfo.h"
namespace
zilliz
{
...
...
@@ -25,9 +26,83 @@ PrometheusMetrics::Init() {
// Exposer Registry
exposer_ptr_
->
RegisterCollectable
(
registry_
);
return
SERVER_SUCCESS
;
}
void
PrometheusMetrics
::
CPUUsagePercentSet
()
{
if
(
!
startup_
)
return
;
double
usage_percent
=
server
::
SystemInfo
::
GetInstance
().
CPUPercent
();
CPU_usage_percent_
.
Set
(
usage_percent
);
}
void
PrometheusMetrics
::
RAMUsagePercentSet
()
{
if
(
!
startup_
)
return
;
double
usage_percent
=
server
::
SystemInfo
::
GetInstance
().
MemoryPercent
();
RAM_usage_percent_
.
Set
(
usage_percent
);
}
void
PrometheusMetrics
::
GPUPercentGaugeSet
()
{
if
(
!
startup_
)
return
;
int
numDevide
=
server
::
SystemInfo
::
GetInstance
().
NumDevice
();
std
::
vector
<
unsigned
int
>
values
=
server
::
SystemInfo
::
GetInstance
().
GPUPercent
();
if
(
numDevide
>=
1
)
GPU0_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
0
]));
if
(
numDevide
>=
2
)
GPU1_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
1
]));
if
(
numDevide
>=
3
)
GPU2_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
2
]));
if
(
numDevide
>=
4
)
GPU3_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
3
]));
if
(
numDevide
>=
5
)
GPU4_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
4
]));
if
(
numDevide
>=
6
)
GPU5_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
5
]));
if
(
numDevide
>=
7
)
GPU6_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
6
]));
if
(
numDevide
>=
8
)
GPU7_percent_gauge_
.
Set
(
static_cast
<
double
>
(
values
[
7
]));
// to do
}
void
PrometheusMetrics
::
GPUMemoryUsageGaugeSet
()
{
if
(
!
startup_
)
return
;
std
::
vector
<
unsigned
long
long
>
values
=
server
::
SystemInfo
::
GetInstance
().
GPUMemoryUsed
();
unsigned
long
long
MtoB
=
1024
*
1024
;
int
numDevice
=
values
.
size
();
if
(
numDevice
>=
1
)
GPU0_memory_usage_gauge_
.
Set
(
values
[
0
]
/
MtoB
);
if
(
numDevice
>=
2
)
GPU1_memory_usage_gauge_
.
Set
(
values
[
1
]
/
MtoB
);
if
(
numDevice
>=
3
)
GPU2_memory_usage_gauge_
.
Set
(
values
[
2
]
/
MtoB
);
if
(
numDevice
>=
4
)
GPU3_memory_usage_gauge_
.
Set
(
values
[
3
]
/
MtoB
);
if
(
numDevice
>=
5
)
GPU4_memory_usage_gauge_
.
Set
(
values
[
4
]
/
MtoB
);
if
(
numDevice
>=
6
)
GPU5_memory_usage_gauge_
.
Set
(
values
[
5
]
/
MtoB
);
if
(
numDevice
>=
7
)
GPU6_memory_usage_gauge_
.
Set
(
values
[
6
]
/
MtoB
);
if
(
numDevice
>=
8
)
GPU7_memory_usage_gauge_
.
Set
(
values
[
7
]
/
MtoB
);
// to do
}
void
PrometheusMetrics
::
AddVectorsPerSecondGaugeSet
(
int
num_vector
,
int
dim
,
double
time
)
{
// MB/s
if
(
!
startup_
)
return
;
long
long
MtoB
=
1024
*
1024
;
long
long
size
=
num_vector
*
dim
*
4
;
add_vectors_per_second_gauge_
.
Set
(
size
/
time
/
MtoB
);
}
void
PrometheusMetrics
::
QueryIndexTypePerSecondSet
(
std
::
string
type
,
double
value
)
{
if
(
type
==
"IVF"
){
query_index_IVF_type_per_second_gauge_
.
Set
(
value
);
}
else
if
(
type
==
"IDMap"
){
query_index_IDMAP_type_per_second_gauge_
.
Set
(
value
);
}
}
void
PrometheusMetrics
::
ConnectionGaugeIncrement
()
{
if
(
!
startup_
)
return
;
connection_gauge_
.
Increment
();
}
void
PrometheusMetrics
::
ConnectionGaugeDecrement
()
{
if
(
!
startup_
)
return
;
connection_gauge_
.
Decrement
();
}
}
}
...
...
cpp/src/metrics/PrometheusMetrics.h
浏览文件 @
24863f63
...
...
@@ -104,9 +104,16 @@ class PrometheusMetrics: public MetricsBase {
void
AddVectorsFailGaugeSet
(
double
value
)
override
{
if
(
startup_
)
add_vectors_fail_gauge_
.
Set
(
value
);};
void
QueryVectorResponseSummaryObserve
(
double
value
,
int
count
=
1
)
override
{
if
(
startup_
)
for
(
int
i
=
0
;
i
<
count
;
++
i
)
query_vector_response_summary_
.
Observe
(
value
);};
void
QueryVectorResponsePerSecondGaugeSet
(
double
value
)
override
{
if
(
startup_
)
query_vector_response_per_second_gauge_
.
Set
(
value
);};
void
CPUUsagePercentSet
()
override
;
void
RAMUsagePercentSet
()
override
;
void
QueryResponsePerSecondGaugeSet
(
double
value
)
override
{
if
(
startup_
)
query_response_per_second_gauge
.
Set
(
value
);};
void
GPUPercentGaugeSet
()
override
;
void
GPUMemoryUsageGaugeSet
()
override
;
void
AddVectorsPerSecondGaugeSet
(
int
num_vector
,
int
dim
,
double
time
)
override
;
void
QueryIndexTypePerSecondSet
(
std
::
string
type
,
double
value
)
override
;
void
ConnectionGaugeIncrement
()
override
;
void
ConnectionGaugeDecrement
()
override
;
void
KeepingAliveCounterIncrement
(
double
value
=
1
)
override
{
if
(
startup_
)
keeping_alive_counter_
.
Increment
(
value
);};
// prometheus::Counter &connection_total() {return connection_total_; }
//
...
...
@@ -273,7 +280,7 @@ class PrometheusMetrics: public MetricsBase {
.
Name
(
"build_index_duration_microseconds"
)
.
Help
(
"histogram of processing time for building index"
)
.
Register
(
*
registry_
);
prometheus
::
Histogram
&
build_index_duration_seconds_histogram_
=
build_index_duration_seconds_
.
Add
({},
BucketBoundaries
{
2e6
,
4e6
,
6e6
,
8e6
,
1e7
});
prometheus
::
Histogram
&
build_index_duration_seconds_histogram_
=
build_index_duration_seconds_
.
Add
({},
BucketBoundaries
{
5e5
,
2e6
,
4e6
,
6e6
,
8e6
,
1e7
});
//record processing time for all building index
...
...
@@ -414,6 +421,12 @@ class PrometheusMetrics: public MetricsBase {
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
query_vector_response_per_second_gauge_
=
query_vector_response_per_second_
.
Add
({});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
query_response_per_second_
=
prometheus
::
BuildGauge
()
.
Name
(
"query_response_per_microsecond"
)
.
Help
(
"the number of queries can be processed every microsecond"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
query_response_per_second_gauge
=
query_response_per_second_
.
Add
({});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
disk_store_IO_speed_
=
prometheus
::
BuildGauge
()
.
Name
(
"disk_store_IO_speed_bytes_per_microseconds"
)
.
Help
(
"disk_store_IO_speed"
)
...
...
@@ -433,6 +446,75 @@ class PrometheusMetrics: public MetricsBase {
prometheus
::
Gauge
&
add_vectors_success_gauge_
=
add_vectors_
.
Add
({{
"outcome"
,
"success"
}});
prometheus
::
Gauge
&
add_vectors_fail_gauge_
=
add_vectors_
.
Add
({{
"outcome"
,
"fail"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
add_vectors_per_second_
=
prometheus
::
BuildGauge
()
.
Name
(
"add_vectors_throughput_per_microsecond"
)
.
Help
(
"add vectors throughput per microsecond"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
add_vectors_per_second_gauge_
=
add_vectors_per_second_
.
Add
({});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
CPU_
=
prometheus
::
BuildGauge
()
.
Name
(
"CPU_usage_percent"
)
.
Help
(
"CPU usage percent by this this process"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
CPU_usage_percent_
=
CPU_
.
Add
({});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
RAM_
=
prometheus
::
BuildGauge
()
.
Name
(
"RAM_usage_percent"
)
.
Help
(
"RAM usage percent by this process"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
RAM_usage_percent_
=
RAM_
.
Add
({});
//GPU Usage Percent
prometheus
::
Family
<
prometheus
::
Gauge
>
&
GPU_percent_
=
prometheus
::
BuildGauge
()
.
Name
(
"Gpu_usage_percent"
)
.
Help
(
"GPU_usage_percent "
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
GPU0_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"0"
}});
prometheus
::
Gauge
&
GPU1_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"1"
}});
prometheus
::
Gauge
&
GPU2_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"2"
}});
prometheus
::
Gauge
&
GPU3_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"3"
}});
prometheus
::
Gauge
&
GPU4_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"4"
}});
prometheus
::
Gauge
&
GPU5_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"5"
}});
prometheus
::
Gauge
&
GPU6_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"6"
}});
prometheus
::
Gauge
&
GPU7_percent_gauge_
=
GPU_percent_
.
Add
({{
"DeviceNum"
,
"7"
}});
//GPU Mempry used
prometheus
::
Family
<
prometheus
::
Gauge
>
&
GPU_memory_usage_
=
prometheus
::
BuildGauge
()
.
Name
(
"GPU_memory_usage_total"
)
.
Help
(
"GPU memory usage total "
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
GPU0_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"0"
}});
prometheus
::
Gauge
&
GPU1_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"1"
}});
prometheus
::
Gauge
&
GPU2_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"2"
}});
prometheus
::
Gauge
&
GPU3_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"3"
}});
prometheus
::
Gauge
&
GPU4_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"4"
}});
prometheus
::
Gauge
&
GPU5_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"5"
}});
prometheus
::
Gauge
&
GPU6_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"6"
}});
prometheus
::
Gauge
&
GPU7_memory_usage_gauge_
=
GPU_memory_usage_
.
Add
({{
"DeviceNum"
,
"7"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
query_index_type_per_second_
=
prometheus
::
BuildGauge
()
.
Name
(
"query_index_throughtout_per_microsecond"
)
.
Help
(
"query index throughtout per microsecond"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
query_index_IVF_type_per_second_gauge_
=
query_index_type_per_second_
.
Add
({{
"IndexType"
,
"IVF"
}});
prometheus
::
Gauge
&
query_index_IDMAP_type_per_second_gauge_
=
query_index_type_per_second_
.
Add
({{
"IndexType"
,
"IDMAP"
}});
prometheus
::
Family
<
prometheus
::
Gauge
>
&
connection_
=
prometheus
::
BuildGauge
()
.
Name
(
"connection_number"
)
.
Help
(
"the number of connections"
)
.
Register
(
*
registry_
);
prometheus
::
Gauge
&
connection_gauge_
=
connection_
.
Add
({});
prometheus
::
Family
<
prometheus
::
Counter
>
&
keeping_alive_
=
prometheus
::
BuildCounter
()
.
Name
(
"keeping_alive_seconds_total"
)
.
Help
(
"total seconds of the serve alive"
)
.
Register
(
*
registry_
);
prometheus
::
Counter
&
keeping_alive_counter_
=
keeping_alive_
.
Add
({});
};
...
...
cpp/src/metrics/SystemInfo.cpp
浏览文件 @
24863f63
...
...
@@ -5,3 +5,207 @@
******************************************************************************/
#include "SystemInfo.h"
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
#include <fstream>
#include "nvml.h"
//#include <mutex>
//
//std::mutex mutex;
namespace
zilliz
{
namespace
vecwise
{
namespace
server
{
void
SystemInfo
::
Init
()
{
if
(
initialized
)
return
;
// mutex.lock();
initialized
=
true
;
// mutex.unlock();
// initialize CPU information
FILE
*
file
;
struct
tms
timeSample
;
char
line
[
128
];
lastCPU_
=
times
(
&
timeSample
);
lastSysCPU_
=
timeSample
.
tms_stime
;
lastUserCPU_
=
timeSample
.
tms_utime
;
file
=
fopen
(
"/proc/cpuinfo"
,
"r"
);
numProcessors
=
0
;
while
(
fgets
(
line
,
128
,
file
)
!=
NULL
){
if
(
strncmp
(
line
,
"processor"
,
9
)
==
0
)
numProcessors
++
;
}
total_RAM_
=
GetPhysicalMemory
();
fclose
(
file
);
//initialize GPU information
nvmlReturn_t
nvmlresult
;
nvmlresult
=
nvmlInit
();
if
(
NVML_SUCCESS
!=
nvmlresult
)
{
printf
(
"System information initilization failed"
);
return
;
}
nvmlresult
=
nvmlDeviceGetCount
(
&
numDevice
);
if
(
NVML_SUCCESS
!=
nvmlresult
)
{
printf
(
"Unable to get devidce number"
);
return
;
}
}
long
long
SystemInfo
::
parseLine
(
char
*
line
)
{
// This assumes that a digit will be found and the line ends in " Kb".
int
i
=
strlen
(
line
);
const
char
*
p
=
line
;
while
(
*
p
<
'0'
||
*
p
>
'9'
)
p
++
;
line
[
i
-
3
]
=
'\0'
;
i
=
atoi
(
p
);
return
static_cast
<
long
long
>
(
i
);
}
unsigned
long
SystemInfo
::
GetPhysicalMemory
()
{
struct
sysinfo
memInfo
;
sysinfo
(
&
memInfo
);
unsigned
long
totalPhysMem
=
memInfo
.
totalram
;
//Multiply in next statement to avoid int overflow on right hand side...
totalPhysMem
*=
memInfo
.
mem_unit
;
return
totalPhysMem
;
}
unsigned
long
SystemInfo
::
GetProcessUsedMemory
()
{
//Note: this value is in KB!
FILE
*
file
=
fopen
(
"/proc/self/status"
,
"r"
);
long
long
result
=
-
1
;
char
line
[
128
];
while
(
fgets
(
line
,
128
,
file
)
!=
NULL
){
if
(
strncmp
(
line
,
"VmRSS:"
,
6
)
==
0
){
result
=
parseLine
(
line
);
break
;
}
}
fclose
(
file
);
// printf("RAM is %d",result);
// return value in Byte
return
(
result
*
1024
);
}
double
SystemInfo
::
MemoryPercent
()
{
if
(
!
initialized
)
Init
();
return
GetProcessUsedMemory
()
*
100
/
total_RAM_
;
}
double
SystemInfo
::
CPUPercent
()
{
if
(
!
initialized
)
Init
();
struct
tms
timeSample
;
clock_t
now
;
double
percent
;
now
=
times
(
&
timeSample
);
if
(
now
<=
lastCPU_
||
timeSample
.
tms_stime
<
lastSysCPU_
||
timeSample
.
tms_utime
<
lastUserCPU_
){
//Overflow detection. Just skip this value.
percent
=
-
1.0
;
}
else
{
percent
=
(
timeSample
.
tms_stime
-
lastSysCPU_
)
+
(
timeSample
.
tms_utime
-
lastUserCPU_
);
percent
/=
(
now
-
lastCPU_
);
percent
/=
numProcessors
;
percent
*=
100
;
}
lastCPU_
=
now
;
lastSysCPU_
=
timeSample
.
tms_stime
;
lastUserCPU_
=
timeSample
.
tms_utime
;
return
percent
;
}
std
::
unordered_map
<
int
,
std
::
vector
<
double
>>
SystemInfo
::
GetGPUMemPercent
(){
// return GPUID: MEM%
//write GPU info to a file
system
(
"nvidia-smi pmon -c 1 > GPUInfo.txt"
);
int
pid
=
(
int
)
getpid
();
//parse line
std
::
ifstream
read_file
;
read_file
.
open
(
"GPUInfo.txt"
);
std
::
string
line
;
while
(
getline
(
read_file
,
line
)){
std
::
vector
<
std
::
string
>
words
=
split
(
line
);
// 0 1 2 3 4 5 6 7
//words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively
if
(
std
::
stoi
(
words
[
1
])
!=
pid
)
continue
;
int
GPUindex
=
std
::
stoi
(
words
[
0
]);
double
sm_percent
=
std
::
stod
(
words
[
3
]);
double
mem_percent
=
std
::
stod
(
words
[
4
]);
}
}
std
::
vector
<
std
::
string
>
SystemInfo
::
split
(
std
::
string
input
)
{
std
::
vector
<
std
::
string
>
words
;
input
+=
" "
;
int
word_start
=
0
;
for
(
int
i
=
0
;
i
<
input
.
size
();
++
i
)
{
if
(
input
[
i
]
!=
' '
)
continue
;
if
(
input
[
i
]
==
' '
)
{
word_start
=
i
+
1
;
continue
;
}
words
.
push_back
(
input
.
substr
(
word_start
,
i
-
word_start
));
}
return
words
;
}
std
::
vector
<
unsigned
int
>
SystemInfo
::
GPUPercent
()
{
// get GPU usage percent
if
(
!
initialized
)
Init
();
std
::
vector
<
unsigned
int
>
result
;
nvmlUtilization_t
utilization
;
for
(
int
i
=
0
;
i
<
numDevice
;
++
i
)
{
nvmlDevice_t
device
;
nvmlDeviceGetHandleByIndex
(
i
,
&
device
);
nvmlDeviceGetUtilizationRates
(
device
,
&
utilization
);
result
.
push_back
(
utilization
.
gpu
);
}
return
result
;
// nvmlDevice_t device;
// nvmlUtilization_t utilization;
// nvmlDeviceGetHandleByIndex(device_index, &device);
// nvmlDeviceGetUtilizationRates(device, &utilization);
// return utilization.gpu;
}
std
::
vector
<
unsigned
long
long
>
SystemInfo
::
GPUMemoryUsed
()
{
// get GPU memory used
if
(
!
initialized
)
Init
();
std
::
vector
<
unsigned
long
long
int
>
result
;
nvmlMemory_t
nvmlMemory
;
for
(
int
i
=
0
;
i
<
numDevice
;
++
i
)
{
nvmlDevice_t
device
;
nvmlDeviceGetHandleByIndex
(
i
,
&
device
);
nvmlDeviceGetMemoryInfo
(
device
,
&
nvmlMemory
);
result
.
push_back
(
nvmlMemory
.
used
);
}
return
result
;
}
}
}
}
\ No newline at end of file
cpp/src/metrics/SystemInfo.h
浏览文件 @
24863f63
...
...
@@ -8,6 +8,15 @@
#include "sys/types.h"
#include "sys/sysinfo.h"
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
#include "sys/times.h"
#include "sys/vtimes.h"
#include <unordered_map>
#include <vector>
namespace
zilliz
{
...
...
@@ -16,6 +25,12 @@ namespace server {
class
SystemInfo
{
private:
unsigned
long
total_RAM_
;
clock_t
lastCPU_
,
lastSysCPU_
,
lastUserCPU_
;
int
numProcessors
;
//number of GPU
unsigned
int
numDevice
;
bool
initialized
=
false
;
public:
static
SystemInfo
&
...
...
@@ -24,9 +39,17 @@ class SystemInfo {
return
instance
;
}
long
long
GetPhysicalMemory
();
void
Init
();
int
NumDevice
()
{
return
numDevice
;};
long
long
parseLine
(
char
*
line
);
unsigned
long
GetPhysicalMemory
();
unsigned
long
GetProcessUsedMemory
();
double
MemoryPercent
();
double
CPUPercent
();
std
::
unordered_map
<
int
,
std
::
vector
<
double
>>
GetGPUMemPercent
();
std
::
vector
<
std
::
string
>
split
(
std
::
string
input
);
std
::
vector
<
unsigned
int
>
GPUPercent
();
std
::
vector
<
unsigned
long
long
>
GPUMemoryUsed
();
};
...
...
cpp/src/server/MegasearchServer.cpp
浏览文件 @
24863f63
...
...
@@ -8,6 +8,7 @@
#include "megasearch_types.h"
#include "megasearch_constants.h"
#include "ServerConfig.h"
#include "MegasearchThreadPoolServer.h"
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/protocol/TJSONProtocol.h>
...
...
@@ -76,7 +77,7 @@ MegasearchServer::StartService() {
threadManager
->
threadFactory
(
threadFactory
);
threadManager
->
start
();
s_server
.
reset
(
new
T
ThreadPoolServer
(
processor
,
s_server
.
reset
(
new
Megasearch
ThreadPoolServer
(
processor
,
server_transport
,
transport_factory
,
protocol_factory
,
...
...
cpp/src/server/MegasearchThreadPoolServer.cpp
0 → 100644
浏览文件 @
24863f63
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "metrics/Metrics.h"
#include "MegasearchThreadPoolServer.h"
void
zilliz
::
vecwise
::
server
::
MegasearchThreadPoolServer
::
onClientConnected
(
const
std
::
shared_ptr
<
apache
::
thrift
::
server
::
TConnectedClient
>
&
pClient
)
{
server
::
Metrics
::
GetInstance
().
ConnectionGaugeIncrement
();
TThreadPoolServer
::
onClientConnected
(
pClient
);
}
void
zilliz
::
vecwise
::
server
::
MegasearchThreadPoolServer
::
onClientDisconnected
(
apache
::
thrift
::
server
::
TConnectedClient
*
pClient
)
{
server
::
Metrics
::
GetInstance
().
ConnectionGaugeDecrement
();
TThreadPoolServer
::
onClientDisconnected
(
pClient
);
}
cpp/src/server/MegasearchThreadPoolServer.h
0 → 100644
浏览文件 @
24863f63
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include <thrift/server/TThreadPoolServer.h>
namespace
zilliz
{
namespace
vecwise
{
namespace
server
{
class
MegasearchThreadPoolServer
:
public
apache
::
thrift
::
server
::
TThreadPoolServer
{
public:
MegasearchThreadPoolServer
(
const
std
::
shared_ptr
<
apache
::
thrift
::
TProcessor
>&
processor
,
const
std
::
shared_ptr
<
apache
::
thrift
::
transport
::
TServerTransport
>&
serverTransport
,
const
std
::
shared_ptr
<
apache
::
thrift
::
transport
::
TTransportFactory
>&
transportFactory
,
const
std
::
shared_ptr
<
apache
::
thrift
::
protocol
::
TProtocolFactory
>&
protocolFactory
,
const
std
::
shared_ptr
<
apache
::
thrift
::
concurrency
::
ThreadManager
>&
threadManager
=
apache
::
thrift
::
concurrency
::
ThreadManager
::
newSimpleThreadManager
());
protected:
void
onClientConnected
(
const
std
::
shared_ptr
<
apache
::
thrift
::
server
::
TConnectedClient
>&
pClient
)
override
;
void
onClientDisconnected
(
apache
::
thrift
::
server
::
TConnectedClient
*
pClient
)
override
;
};
}
}
}
\ No newline at end of file
cpp/src/server/Server.cpp
浏览文件 @
24863f63
...
...
@@ -175,6 +175,7 @@ Server::Start() {
signal
(
SIGHUP
,
SignalUtil
::
HandleSignal
);
signal
(
SIGTERM
,
SignalUtil
::
HandleSignal
);
server
::
Metrics
::
GetInstance
().
Init
();
server
::
SystemInfo
::
GetInstance
().
Init
();
SERVER_LOG_INFO
<<
"Vecwise server is running..."
;
StartService
();
...
...
cpp/src/thrift/megasearch.thrift
浏览文件 @
24863f63
...
...
@@ -34,7 +34,7 @@ exception Exception {
* @brief Table column description
*/
struct Column {
1: required i32 type; ///< Column Type: 0:inv
e
alid/1:int8/2:int16/3:int32/4:int64/5:float32/6:float64/7:date/8:vector
1: required i32 type; ///< Column Type: 0:invalid/1:int8/2:int16/3:int32/4:int64/5:float32/6:float64/7:date/8:vector
2: required string name; ///< Column name
}
...
...
cpp/unittest/CMakeLists.txt
浏览文件 @
24863f63
...
...
@@ -32,6 +32,7 @@ set(unittest_libs
civetweb
dl
z
${
CUDA_TOOLKIT_ROOT_DIR
}
/lib64/stubs/libnvidia-ml.so
)
add_subdirectory
(
server
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录