提交 2d7b67a0 编写于 作者: Z zhiru

fix merge conflicts


Former-commit-id: a716e47d046e3cc738f7f4b6a7b7201f57fa0d3a
......@@ -15,6 +15,9 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-64 - Different table can have different index type
- MS-52 - Return search score
- MS-66 - Support time range query
- MS-68 - Remove rocksdb from third-party
- MS-70 - cmake: remove redundant libs in src
- MS-71 - cmake: fix faiss dependency
- MS-72 - cmake: change prometheus source to git
## Task
......@@ -45,6 +48,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-37 - Add query, cache usage, disk write speed and file data size metrics
- MS-30 - Use faiss v1.5.2
- MS-54 - cmake: Change Thrift third party URL to github.com
- MS-69 - prometheus: add all proposed metrics
## Task
......
......@@ -38,6 +38,40 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.macro ${CMAKE_CURRENT_SOURC
project(megasearch VERSION "${MEGASEARCH_VERSION}")
project(vecwise_engine LANGUAGES CUDA CXX)
# Ensure that a default make is set
if("${MAKE}" STREQUAL "")
if(NOT MSVC)
find_program(MAKE make)
endif()
endif()
set(GCC_VERSION 5.3.0)
set(GCC_DIR "${CMAKE_CURRENT_BINARY_DIR}/gcc")
set(GCC_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/gcc/gcc_build")
set(GCC_TAR_NAME "${GCC_BUILD_DIR}/gcc-${GCC_VERSION}.tar.gz")
if (NOT EXISTS ${GCC_TAR_NAME})
file(MAKE_DIRECTORY ${GCC_BUILD_DIR})
file(DOWNLOAD http://ftp.tsukuba.wide.ad.jp/software/gcc/releases/gcc-${GCC_VERSION}/${GCC_TAR_NAME}
${GCC_TAR_NAME})
message(STATUS "Building GCC-${GCC_VERSION} from source")
execute_process(
COMMAND
"${CMAKE_COMMAND} -E tar -xf ${GCC_TAR_NAME}"
COMMAND
"./configure --prefix=${GCC_DIR} --with-local-prefix=${GCC_DIR}
--enable-languages=cxx,fortran
--enable-checking=release
--disable-bootstrap"
COMMAND
"${MAKE} -j4"
COMMAND
"${MAKE} install"
RESULT_VARIABLE _status
#OUTPUT_QUIET
#ERROR_QUIET
WORKING_DIRECTORY ${GCC_BUILD_DIR})
endif ()
set(MEGASEARCH_VERSION_MAJOR "${megasearch_VERSION_MAJOR}")
set(MEGASEARCH_VERSION_MINOR "${megasearch_VERSION_MINOR}")
set(MEGASEARCH_VERSION_PATCH "${megasearch_VERSION_PATCH}")
......
......@@ -81,7 +81,7 @@ define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON)
define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)
define_option(MEGASEARCH_WITH_ROCKSDB "Build with RocksDB library" ON)
define_option(MEGASEARCH_WITH_ROCKSDB "Build with RocksDB library" OFF)
define_option(MEGASEARCH_WITH_SNAPPY "Build with Snappy compression" ON)
......
......@@ -717,17 +717,23 @@ macro(build_faiss)
${FAISS_STATIC_LIB})
# DEPENDS
# ${faiss_dependencies})
ExternalProject_Add_StepDependencies(faiss_ep build openblas_ep)
ExternalProject_Add_StepDependencies(faiss_ep build lapack_ep)
ExternalProject_Add_StepDependencies(faiss_ep build openblas_ep lapack_ep)
file(MAKE_DIRECTORY "${FAISS_INCLUDE_DIR}")
add_library(faiss STATIC IMPORTED)
set_target_properties(
faiss
PROPERTIES IMPORTED_LOCATION "${FAISS_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIR}")
INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "openblas;lapack" )
add_dependencies(faiss faiss_ep)
#add_dependencies(faiss openblas_ep)
#add_dependencies(faiss lapack_ep)
#target_link_libraries(faiss ${OPENBLAS_PREFIX}/lib)
#target_link_libraries(faiss ${LAPACK_PREFIX}/lib)
endmacro()
if(MEGASEARCH_WITH_FAISS)
......
......@@ -34,6 +34,10 @@ set(service_files
thrift/gen-cpp/MegasearchService.cpp
thrift/gen-cpp/megasearch_constants.cpp
thrift/gen-cpp/megasearch_types.cpp
metrics/SystemInfo.cpp
metrics/SystemInfo.h
server/MegasearchThreadPoolServer.cpp
server/MegasearchThreadPoolServer.h
)
set(vecwise_engine_files
......@@ -50,32 +54,12 @@ set(get_sys_info_files
license/GetSysInfo.cpp)
include_directories(/usr/include)
include_directories(/usr/local/cuda/include)
include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
include_directories(thrift/gen-cpp)
#target_link_libraries(megasearch boost_system_static)
#target_link_libraries(megasearch boost_filesystem_static)
#target_link_libraries(megasearch boost_serialization_static)
#target_link_libraries(megasearch bzip2)
#target_link_libraries(megasearch easyloggingpp)
#target_link_libraries(megasearch faiss)
#target_link_libraries(megasearch gtest)
#target_link_libraries(megasearch lapack)
#target_link_libraries(megasearch lz4)
#target_link_libraries(megasearch openblas)
#target_link_libraries(megasearch rocksdb)
#target_link_libraries(megasearch snappy)
#target_link_libraries(megasearch sqlite)
#target_link_libraries(megasearch sqlite_orm)
#target_link_libraries(megasearch thrift)
#target_link_libraries(megasearch yaml-cpp)
#target_link_libraries(megasearch zlib)
#target_link_libraries(megasearch zstd)
set(third_party_libs
easyloggingpp
sqlite
# sqlite_orm
thrift
yaml-cpp
faiss
......@@ -92,48 +76,35 @@ set(third_party_libs
snappy
zlib
zstd
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
if (GPU_VERSION STREQUAL "ON")
link_directories(/usr/local/cuda/lib64)
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
set(engine_libs
pthread
libfaiss.a
libgpufaiss.a
libgomp.a
libopenblas.a
libgfortran.a
libquadmath.a
cudart
cublas
libsqlite3.a
libprometheus-cpp-push.a
libprometheus-cpp-pull.a
libprometheus-cpp-core.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
else()
set(engine_libs
pthread
libfaiss.a
libgomp.a
libopenblas.a
libgfortran.a
libquadmath.a
libsqlite3.a
libprometheus-cpp-push.a
libprometheus-cpp-pull.a
libprometheus-cpp-core.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
endif ()
if (ENABLE_LICENSE STREQUAL "ON")
link_directories(/usr/local/cuda/lib64/stubs)
link_directories(/usr/local/cuda/lib64)
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
set(license_libs
nvidia-ml
libboost_system.a
libboost_filesystem.a
libboost_serialization.a
crypto
cudart
cublas
......@@ -163,19 +134,9 @@ target_link_libraries(metrics ${metrics_lib})
set(server_libs
vecwise_engine
libthrift.a
pthread
libyaml-cpp.a
libboost_system.a
libboost_filesystem.a
libsnappy.a
libbz2.a
libz.a
libzstd.a
liblz4.a
dl
metrics
)
add_executable(vecwise_server
......
......@@ -108,6 +108,7 @@ Status DBImpl::InsertVectors(const std::string& table_id_,
CollectInsertMetrics(total_time, n, status.ok());
return status;
}
Status DBImpl::Query(const std::string &table_id, size_t k, size_t nq,
......@@ -119,6 +120,7 @@ Status DBImpl::Query(const std::string &table_id, size_t k, size_t nq,
auto total_time = METRICS_MICROSECONDS(start_time,end_time);
CollectQueryMetrics(total_time, nq);
return result;
}
......@@ -304,17 +306,23 @@ void DBImpl::StartTimerTasks(int interval) {
void DBImpl::BackgroundTimerTask(int interval) {
Status status;
server::SystemInfo::GetInstance().Init();
while (true) {
if (!bg_error_.ok()) break;
if (shutting_down_.load(std::memory_order_acquire)) break;
std::this_thread::sleep_for(std::chrono::seconds(interval));
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheUsage();
LOG(DEBUG) << "Cache usage " << cache_total;
server::Metrics::GetInstance().CacheUsageGaugeSet(static_cast<double>(cache_total));
server::Metrics::GetInstance().KeepingAliveCounterIncrement(interval);
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total);
long size;
Size(size);
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
server::Metrics::GetInstance().CPUUsagePercentSet();
server::Metrics::GetInstance().RAMUsagePercentSet();
server::Metrics::GetInstance().GPUPercentGaugeSet();
server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
TrySchedule();
}
}
......
......@@ -131,8 +131,11 @@ Status FaissExecutionEngine::Search(long n,
long k,
float *distances,
long *labels) const {
auto start_time = METRICS_NOW_TIME;
pIndex_->search(n, data, k, distances, labels);
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time,end_time);
server::Metrics::GetInstance().QueryIndexTypePerSecondSet(build_index_type_, double(n)/double(total_time));
return Status::OK();
}
......
......@@ -27,9 +27,14 @@ MemVectors::MemVectors(const std::shared_ptr<meta::Meta>& meta_ptr,
pEE_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) {
}
void MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
auto start_time = METRICS_NOW_TIME;
pIdGenerator_->GetNextIDNumbers(n_, vector_ids_);
pEE_->AddWithIds(n_, vectors_, vector_ids_.data());
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_), static_cast<int>(schema_.dimension_), total_time);
}
size_t MemVectors::Total() const {
......@@ -97,6 +102,7 @@ Status MemManager::InsertVectors(const std::string& table_id_,
const float* vectors_,
IDNumbers& vector_ids_) {
std::unique_lock<std::mutex> lock(mutex_);
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
}
......
......@@ -8,6 +8,7 @@
#include "utils/Error.h"
#include "server/ServerConfig.h"
#include "SystemInfo.h"
namespace zilliz {
namespace vecwise {
......@@ -71,6 +72,16 @@ class MetricsBase{
virtual void AddVectorsFailGaugeSet(double value) {};
virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {};
virtual void QueryVectorResponsePerSecondGaugeSet(double value) {};
virtual void CPUUsagePercentSet() {};
virtual void RAMUsagePercentSet() {};
virtual void QueryResponsePerSecondGaugeSet(double value) {};
virtual void GPUPercentGaugeSet() {};
virtual void GPUMemoryUsageGaugeSet() {};
virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {};
virtual void QueryIndexTypePerSecondSet(std::string type, double value) {};
virtual void ConnectionGaugeIncrement() {};
virtual void ConnectionGaugeDecrement() {};
virtual void KeepingAliveCounterIncrement(double value = 1) {};
};
......
......@@ -6,6 +6,8 @@
#include "PrometheusMetrics.h"
#include "utils/Log.h"
#include "SystemInfo.h"
namespace zilliz {
namespace vecwise {
......@@ -32,8 +34,108 @@ PrometheusMetrics::Init() {
}
return SERVER_SUCCESS;
}
void
PrometheusMetrics::CPUUsagePercentSet() {
if(!startup_) return ;
double usage_percent = server::SystemInfo::GetInstance().CPUPercent();
CPU_usage_percent_.Set(usage_percent);
}
void
PrometheusMetrics::RAMUsagePercentSet() {
if(!startup_) return ;
double usage_percent = server::SystemInfo::GetInstance().MemoryPercent();
RAM_usage_percent_.Set(usage_percent);
}
void
PrometheusMetrics::GPUPercentGaugeSet() {
if(!startup_) return;
int numDevide = server::SystemInfo::GetInstance().num_device();
std::vector<unsigned int> values = server::SystemInfo::GetInstance().GPUPercent();
// for (int i = 0; i < numDevide; ++i) {
// GPU_percent_gauges_[i].Set(static_cast<double>(values[i]));
// }
if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast<double>(values[0]));
if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast<double>(values[1]));
if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast<double>(values[2]));
if(numDevide >= 4) GPU3_percent_gauge_.Set(static_cast<double>(values[3]));
if(numDevide >= 5) GPU4_percent_gauge_.Set(static_cast<double>(values[4]));
if(numDevide >= 6) GPU5_percent_gauge_.Set(static_cast<double>(values[5]));
if(numDevide >= 7) GPU6_percent_gauge_.Set(static_cast<double>(values[6]));
if(numDevide >= 8) GPU7_percent_gauge_.Set(static_cast<double>(values[7]));
// to do
}
void PrometheusMetrics::GPUMemoryUsageGaugeSet() {
if(!startup_) return;
int numDevide = server::SystemInfo::GetInstance().num_device();
std::vector<unsigned long long> values = server::SystemInfo::GetInstance().GPUMemoryUsed();
constexpr unsigned long long MtoB = 1024*1024;
int numDevice = values.size();
// for (int i = 0; i < numDevice; ++i) {
// GPU_memory_usage_gauges_[i].Set(values[i]/MtoB);
// }
if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB);
if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB);
if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB);
if(numDevice >=4) GPU3_memory_usage_gauge_.Set(values[3]/MtoB);
if(numDevice >=5) GPU4_memory_usage_gauge_.Set(values[4]/MtoB);
if(numDevice >=6) GPU5_memory_usage_gauge_.Set(values[5]/MtoB);
if(numDevice >=7) GPU6_memory_usage_gauge_.Set(values[6]/MtoB);
if(numDevice >=8) GPU7_memory_usage_gauge_.Set(values[7]/MtoB);
// to do
}
void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
// MB/s
if(!startup_) return;
long long MtoB = 1024*1024;
long long size = num_vector * dim * 4;
add_vectors_per_second_gauge_.Set(size/time/MtoB);
}
void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) {
if(!startup_) return;
if(type == "IVF"){
query_index_IVF_type_per_second_gauge_.Set(value);
} else if(type == "IDMap"){
query_index_IDMAP_type_per_second_gauge_.Set(value);
}
}
void PrometheusMetrics::ConnectionGaugeIncrement() {
if(!startup_) return;
connection_gauge_.Increment();
}
void PrometheusMetrics::ConnectionGaugeDecrement() {
if(!startup_) return;
connection_gauge_.Decrement();
}
//void PrometheusMetrics::GpuPercentInit() {
// int num_device = SystemInfo::GetInstance().num_device();
// constexpr char device_number[] = "DeviceNum";
// for(int i = 0; i < num_device; ++ i) {
// GPU_percent_gauges_.emplace_back(GPU_percent_.Add({{device_number,std::to_string(i)}}));
// }
//
//}
//void PrometheusMetrics::GpuMemoryInit() {
// int num_device = SystemInfo::GetInstance().num_device();
// constexpr char device_number[] = "DeviceNum";
// for(int i = 0; i < num_device; ++ i) {
// GPU_memory_usage_gauges_.emplace_back(GPU_memory_usage_.Add({{device_number,std::to_string(i)}}));
// }
//}
}
}
}
......@@ -49,6 +49,8 @@ class PrometheusMetrics: public MetricsBase {
std::shared_ptr<prometheus::Exposer> exposer_ptr_;
std::shared_ptr<prometheus::Registry> registry_ = std::make_shared<prometheus::Registry>();
bool startup_ = false;
// void GpuPercentInit();
// void GpuMemoryInit();
public:
void AddGroupSuccessTotalIncrement(double value = 1.0) override { if(startup_) add_group_success_total_.Increment(value);};
......@@ -104,51 +106,20 @@ class PrometheusMetrics: public MetricsBase {
void AddVectorsFailGaugeSet(double value) override { if(startup_) add_vectors_fail_gauge_.Set(value);};
void QueryVectorResponseSummaryObserve(double value, int count = 1) override { if (startup_) for(int i = 0 ; i < count ; ++i) query_vector_response_summary_.Observe(value);};
void QueryVectorResponsePerSecondGaugeSet(double value) override {if (startup_) query_vector_response_per_second_gauge_.Set(value);};
void CPUUsagePercentSet() override ;
void RAMUsagePercentSet() override ;
void QueryResponsePerSecondGaugeSet(double value) override {if(startup_) query_response_per_second_gauge.Set(value);};
void GPUPercentGaugeSet() override ;
void GPUMemoryUsageGaugeSet() override ;
void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) override ;
void QueryIndexTypePerSecondSet(std::string type, double value) override ;
void ConnectionGaugeIncrement() override ;
void ConnectionGaugeDecrement() override ;
void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);};
// prometheus::Counter &connection_total() {return connection_total_; }
//
// prometheus::Counter &add_group_success_total() { return add_group_success_total_; }
// prometheus::Counter &add_group_fail_total() { return add_group_fail_total_; }
//
// prometheus::Counter &get_group_success_total() { return get_group_success_total_;}
// prometheus::Counter &get_group_fail_total() { return get_group_fail_total_;}
//
// prometheus::Counter &has_group_success_total() { return has_group_success_total_;}
// prometheus::Counter &has_group_fail_total() { return has_group_fail_total_;}
//
// prometheus::Counter &get_group_files_success_total() { return get_group_files_success_total_;};
// prometheus::Counter &get_group_files_fail_total() { return get_group_files_fail_total_;}
//
// prometheus::Counter &add_vectors_success_total() { return add_vectors_success_total_; }
// prometheus::Counter &add_vectors_fail_total() { return add_vectors_fail_total_; }
//
// prometheus::Histogram &add_vectors_duration_histogram() { return add_vectors_duration_histogram_;}
//
// prometheus::Counter &search_success_total() { return search_success_total_; }
// prometheus::Counter &search_fail_total() { return search_fail_total_; }
//
// prometheus::Histogram &search_duration_histogram() { return search_duration_histogram_; }
// prometheus::Histogram &raw_files_size_histogram() { return raw_files_size_histogram_; }
// prometheus::Histogram &index_files_size_histogram() { return index_files_size_histogram_; }
//
// prometheus::Histogram &build_index_duration_seconds_histogram() { return build_index_duration_seconds_histogram_; }
//
// prometheus::Histogram &all_build_index_duration_seconds_histogram() { return all_build_index_duration_seconds_histogram_; }
//
// prometheus::Gauge &cache_usage_gauge() { return cache_usage_gauge_; }
//
// prometheus::Counter &meta_visit_total() { return meta_visit_total_; }
//
// prometheus::Histogram &meta_visit_duration_seconds_histogram() { return meta_visit_duration_seconds_histogram_; }
//
// prometheus::Gauge &mem_usage_percent_gauge() { return mem_usage_percent_gauge_; }
//
// prometheus::Gauge &mem_usage_total_gauge() { return mem_usage_total_gauge_; }
std::shared_ptr<prometheus::Exposer> &exposer_ptr() {return exposer_ptr_; }
......@@ -273,7 +244,7 @@ class PrometheusMetrics: public MetricsBase {
.Name("build_index_duration_microseconds")
.Help("histogram of processing time for building index")
.Register(*registry_);
prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7});
prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{5e5, 2e6, 4e6, 6e6, 8e6, 1e7});
//record processing time for all building index
......@@ -414,6 +385,12 @@ class PrometheusMetrics: public MetricsBase {
.Register(*registry_);
prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({});
prometheus::Family<prometheus::Gauge> &query_response_per_second_ = prometheus::BuildGauge()
.Name("query_response_per_microsecond")
.Help("the number of queries can be processed every microsecond")
.Register(*registry_);
prometheus::Gauge &query_response_per_second_gauge = query_response_per_second_.Add({});
prometheus::Family<prometheus::Gauge> &disk_store_IO_speed_ = prometheus::BuildGauge()
.Name("disk_store_IO_speed_bytes_per_microseconds")
.Help("disk_store_IO_speed")
......@@ -433,6 +410,77 @@ class PrometheusMetrics: public MetricsBase {
prometheus::Gauge &add_vectors_success_gauge_ = add_vectors_.Add({{"outcome", "success"}});
prometheus::Gauge &add_vectors_fail_gauge_ = add_vectors_.Add({{"outcome", "fail"}});
prometheus::Family<prometheus::Gauge> &add_vectors_per_second_ = prometheus::BuildGauge()
.Name("add_vectors_throughput_per_microsecond")
.Help("add vectors throughput per microsecond")
.Register(*registry_);
prometheus::Gauge &add_vectors_per_second_gauge_ = add_vectors_per_second_.Add({});
prometheus::Family<prometheus::Gauge> &CPU_ = prometheus::BuildGauge()
.Name("CPU_usage_percent")
.Help("CPU usage percent by this this process")
.Register(*registry_);
prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({});
prometheus::Family<prometheus::Gauge> &RAM_ = prometheus::BuildGauge()
.Name("RAM_usage_percent")
.Help("RAM usage percent by this process")
.Register(*registry_);
prometheus::Gauge &RAM_usage_percent_ = RAM_.Add({});
//GPU Usage Percent
prometheus::Family<prometheus::Gauge> &GPU_percent_ = prometheus::BuildGauge()
.Name("Gpu_usage_percent")
.Help("GPU_usage_percent ")
.Register(*registry_);
prometheus::Gauge &GPU0_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "0"}});
prometheus::Gauge &GPU1_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "1"}});
prometheus::Gauge &GPU2_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "2"}});
prometheus::Gauge &GPU3_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "3"}});
prometheus::Gauge &GPU4_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "4"}});
prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}});
prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}});
prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}});
// std::vector<prometheus::Gauge> GPU_percent_gauges_;
//GPU Mempry used
prometheus::Family<prometheus::Gauge> &GPU_memory_usage_ = prometheus::BuildGauge()
.Name("GPU_memory_usage_total")
.Help("GPU memory usage total ")
.Register(*registry_);
prometheus::Gauge &GPU0_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "0"}});
prometheus::Gauge &GPU1_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "1"}});
prometheus::Gauge &GPU2_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "2"}});
prometheus::Gauge &GPU3_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "3"}});
prometheus::Gauge &GPU4_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "4"}});
prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}});
prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}});
prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}});
// std::vector<prometheus::Gauge> GPU_memory_usage_gauges_;
prometheus::Family<prometheus::Gauge> &query_index_type_per_second_ = prometheus::BuildGauge()
.Name("query_index_throughtout_per_microsecond")
.Help("query index throughtout per microsecond")
.Register(*registry_);
prometheus::Gauge &query_index_IVF_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IVF"}});
prometheus::Gauge &query_index_IDMAP_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IDMAP"}});
prometheus::Family<prometheus::Gauge> &connection_ = prometheus::BuildGauge()
.Name("connection_number")
.Help("the number of connections")
.Register(*registry_);
prometheus::Gauge &connection_gauge_ = connection_.Add({});
prometheus::Family<prometheus::Counter> &keeping_alive_ = prometheus::BuildCounter()
.Name("keeping_alive_seconds_total")
.Help("total seconds of the serve alive")
.Register(*registry_);
prometheus::Counter &keeping_alive_counter_ = keeping_alive_.Add({});
};
......
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "SystemInfo.h"
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
#include <fstream>
#include "nvml.h"
//#include <mutex>
//
//std::mutex mutex;
namespace zilliz {
namespace vecwise {
namespace server {
void SystemInfo::Init() {
if(initialized_) return;
initialized_ = true;
// initialize CPU information
FILE* file;
struct tms time_sample;
char line[128];
last_cpu_ = times(&time_sample);
last_sys_cpu_ = time_sample.tms_stime;
last_user_cpu_ = time_sample.tms_utime;
file = fopen("/proc/cpuinfo", "r");
num_processors_ = 0;
while(fgets(line, 128, file) != NULL){
if (strncmp(line, "processor", 9) == 0) num_processors_++;
}
total_ram_ = GetPhysicalMemory();
fclose(file);
//initialize GPU information
nvmlReturn_t nvmlresult;
nvmlresult = nvmlInit();
if(NVML_SUCCESS != nvmlresult) {
printf("System information initilization failed");
return ;
}
nvmlresult = nvmlDeviceGetCount(&num_device_);
if(NVML_SUCCESS != nvmlresult) {
printf("Unable to get devidce number");
return ;
}
}
long long
SystemInfo::ParseLine(char *line) {
// This assumes that a digit will be found and the line ends in " Kb".
int i = strlen(line);
const char *p = line;
while (*p < '0' || *p > '9') p++;
line[i - 3] = '\0';
i = atoi(p);
return static_cast<long long>(i);
}
unsigned long
SystemInfo::GetPhysicalMemory() {
struct sysinfo memInfo;
sysinfo (&memInfo);
unsigned long totalPhysMem = memInfo.totalram;
//Multiply in next statement to avoid int overflow on right hand side...
totalPhysMem *= memInfo.mem_unit;
return totalPhysMem;
}
unsigned long
SystemInfo::GetProcessUsedMemory() {
//Note: this value is in KB!
FILE* file = fopen("/proc/self/status", "r");
constexpr int64_t line_length = 128;
long long result = -1;
constexpr int64_t KB_SIZE = 1024;
char line[line_length];
while (fgets(line, line_length, file) != NULL){
if (strncmp(line, "VmRSS:", 6) == 0){
result = ParseLine(line);
break;
}
}
fclose(file);
// return value in Byte
return (result*KB_SIZE);
}
double
SystemInfo::MemoryPercent() {
if (!initialized_) Init();
return GetProcessUsedMemory()*100/total_ram_;
}
double
SystemInfo::CPUPercent() {
if (!initialized_) Init();
struct tms time_sample;
clock_t now;
double percent;
now = times(&time_sample);
if (now <= last_cpu_ || time_sample.tms_stime < last_sys_cpu_ ||
time_sample.tms_utime < last_user_cpu_){
//Overflow detection. Just skip this value.
percent = -1.0;
}
else{
percent = (time_sample.tms_stime - last_sys_cpu_) +
(time_sample.tms_utime - last_user_cpu_);
percent /= (now - last_cpu_);
percent /= num_processors_;
percent *= 100;
}
last_cpu_ = now;
last_sys_cpu_ = time_sample.tms_stime;
last_user_cpu_ = time_sample.tms_utime;
return percent;
}
//std::unordered_map<int,std::vector<double>>
//SystemInfo::GetGPUMemPercent(){
// // return GPUID: MEM%
//
// //write GPU info to a file
// system("nvidia-smi pmon -c 1 > GPUInfo.txt");
// int pid = (int)getpid();
//
// //parse line
// std::ifstream read_file;
// read_file.open("GPUInfo.txt");
// std::string line;
// while(getline(read_file, line)){
// std::vector<std::string> words = split(line);
// // 0 1 2 3 4 5 6 7
// //words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively
// if(std::stoi(words[1]) != pid) continue;
// int GPUindex = std::stoi(words[0]);
// double sm_percent = std::stod(words[3]);
// double mem_percent = std::stod(words[4]);
//
// }
//
//}
//std::vector<std::string>
//SystemInfo::split(std::string input) {
// std::vector<std::string> words;
// input += " ";
// int word_start = 0;
// for (int i = 0; i < input.size(); ++i) {
// if(input[i] != ' ') continue;
// if(input[i] == ' ') {
// word_start = i + 1;
// continue;
// }
// words.push_back(input.substr(word_start,i-word_start));
// }
// return words;
//}
std::vector<unsigned int>
SystemInfo::GPUPercent() {
// get GPU usage percent
if(!initialized_) Init();
std::vector<unsigned int> result;
nvmlUtilization_t utilization;
for (int i = 0; i < num_device_; ++i) {
nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(i, &device);
nvmlDeviceGetUtilizationRates(device, &utilization);
result.push_back(utilization.gpu);
}
return result;
}
std::vector<unsigned long long>
SystemInfo::GPUMemoryUsed() {
// get GPU memory used
if(!initialized_) Init();
std::vector<unsigned long long int> result;
nvmlMemory_t nvmlMemory;
for (int i = 0; i < num_device_; ++i) {
nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(i, &device);
nvmlDeviceGetMemoryInfo(device, &nvmlMemory);
result.push_back(nvmlMemory.used);
}
return result;
}
}
}
}
\ No newline at end of file
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "sys/types.h"
#include "sys/sysinfo.h"
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
#include "sys/times.h"
#include "sys/vtimes.h"
#include <unordered_map>
#include <vector>
namespace zilliz {
namespace vecwise {
namespace server {
class SystemInfo {
private:
unsigned long total_ram_ = 0;
clock_t last_cpu_ = clock_t();
clock_t last_sys_cpu_ = clock_t();
clock_t last_user_cpu_ = clock_t();
int num_processors_ = 0;
//number of GPU
unsigned int num_device_ = 0;
bool initialized_ = false;
public:
static SystemInfo &
GetInstance(){
static SystemInfo instance;
return instance;
}
void Init();
int num_device() const {return num_device_;};
long long ParseLine(char* line);
unsigned long GetPhysicalMemory();
unsigned long GetProcessUsedMemory();
double MemoryPercent();
double CPUPercent();
// std::unordered_map<int,std::vector<double>> GetGPUMemPercent() {};
// std::vector<std::string> split(std::string input) {};
std::vector<unsigned int> GPUPercent();
std::vector<unsigned long long> GPUMemoryUsed();
};
}
}
}
......@@ -26,7 +26,6 @@ add_library(megasearch_sdk STATIC
${service_files}
)
link_directories(../../third_party/build/lib)
target_link_libraries(megasearch_sdk
${third_party_libs}
)
......
......@@ -8,6 +8,7 @@
#include "megasearch_types.h"
#include "megasearch_constants.h"
#include "ServerConfig.h"
#include "MegasearchThreadPoolServer.h"
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/protocol/TJSONProtocol.h>
......@@ -76,7 +77,7 @@ MegasearchServer::StartService() {
threadManager->threadFactory(threadFactory);
threadManager->start();
s_server.reset(new TThreadPoolServer(processor,
s_server.reset(new MegasearchThreadPoolServer(processor,
server_transport,
transport_factory,
protocol_factory,
......
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "metrics/Metrics.h"
#include "MegasearchThreadPoolServer.h"
namespace zilliz {
namespace vecwise {
namespace server {
void
MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr<apache::thrift::server::TConnectedClient> &pClient) {
server::Metrics::GetInstance().ConnectionGaugeIncrement();
TThreadPoolServer::onClientConnected(pClient);
}
void
MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) {
server::Metrics::GetInstance().ConnectionGaugeDecrement();
TThreadPoolServer::onClientDisconnected(pClient);
}
zilliz::vecwise::server::MegasearchThreadPoolServer::MegasearchThreadPoolServer(const std::shared_ptr<apache::thrift::TProcessor> &processor,
const std::shared_ptr<apache::thrift::transport::TServerTransport> &serverTransport,
const std::shared_ptr<apache::thrift::transport::TTransportFactory> &transportFactory,
const std::shared_ptr<apache::thrift::protocol::TProtocolFactory> &protocolFactory,
const std::shared_ptr<apache::thrift::concurrency::ThreadManager> &threadManager)
: TThreadPoolServer(processor, serverTransport, transportFactory, protocolFactory, threadManager) {
}
}
}
}
\ No newline at end of file
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include <thrift/server/TThreadPoolServer.h>
namespace zilliz {
namespace vecwise {
namespace server {
class MegasearchThreadPoolServer : public apache::thrift::server::TThreadPoolServer {
public:
MegasearchThreadPoolServer(
const std::shared_ptr<apache::thrift::TProcessor>& processor,
const std::shared_ptr<apache::thrift::transport::TServerTransport>& serverTransport,
const std::shared_ptr<apache::thrift::transport::TTransportFactory>& transportFactory,
const std::shared_ptr<apache::thrift::protocol::TProtocolFactory>& protocolFactory,
const std::shared_ptr<apache::thrift::concurrency::ThreadManager>& threadManager
= apache::thrift::concurrency::ThreadManager::newSimpleThreadManager());
protected:
void onClientConnected(const std::shared_ptr<apache::thrift::server::TConnectedClient>& pClient) override ;
void onClientDisconnected(apache::thrift::server::TConnectedClient* pClient) override ;
};
}
}
}
\ No newline at end of file
......@@ -173,6 +173,7 @@ Server::Start() {
signal(SIGHUP, SignalUtil::HandleSignal);
signal(SIGTERM, SignalUtil::HandleSignal);
server::Metrics::GetInstance().Init();
server::SystemInfo::GetInstance().Init();
SERVER_LOG_INFO << "Vecwise server is running...";
StartService();
......
......@@ -32,6 +32,7 @@ set(unittest_libs
civetweb
dl
z
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
add_subdirectory(server)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册