提交 7268cfb3 编写于 作者: P peng.xu

Merge branch 'branch-0.5.0' into 'branch-0.5.0'

format metric code

See merge request megasearch/milvus!629

Former-commit-id: b3a568226c079750930027bfb617063736ed0cfd
...@@ -5,3 +5,4 @@ ...@@ -5,3 +5,4 @@
*src/core/thirdparty* *src/core/thirdparty*
*src/grpc* *src/grpc*
*easylogging++* *easylogging++*
*SqliteMetaImpl.cpp
\ No newline at end of file
...@@ -21,67 +21,149 @@ ...@@ -21,67 +21,149 @@
#include "utils/Error.h" #include "utils/Error.h"
#include "SystemInfo.h" #include "SystemInfo.h"
#include <string>
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
namespace server { namespace server {
class MetricsBase{ class MetricsBase {
public: public:
static MetricsBase& static MetricsBase &
GetInstance(){ GetInstance() {
static MetricsBase instance; static MetricsBase instance;
return instance; return instance;
} }
virtual ErrorCode Init() {}; virtual ErrorCode Init() {
}
virtual void AddVectorsSuccessTotalIncrement(double value = 1) {};
virtual void AddVectorsFailTotalIncrement(double value = 1) {}; virtual void AddVectorsSuccessTotalIncrement(double value = 1) {
virtual void AddVectorsDurationHistogramOberve(double value) {}; }
virtual void RawFileSizeHistogramObserve(double value) {}; virtual void AddVectorsFailTotalIncrement(double value = 1) {
virtual void IndexFileSizeHistogramObserve(double value) {}; }
virtual void BuildIndexDurationSecondsHistogramObserve(double value) {};
virtual void AddVectorsDurationHistogramOberve(double value) {
virtual void CpuCacheUsageGaugeSet(double value) {}; }
virtual void GpuCacheUsageGaugeSet() {};
virtual void RawFileSizeHistogramObserve(double value) {
virtual void MetaAccessTotalIncrement(double value = 1) {}; }
virtual void MetaAccessDurationSecondsHistogramObserve(double value) {};
virtual void FaissDiskLoadDurationSecondsHistogramObserve(double value) {}; virtual void IndexFileSizeHistogramObserve(double value) {
virtual void FaissDiskLoadSizeBytesHistogramObserve(double value) {}; }
virtual void CacheAccessTotalIncrement(double value = 1) {};
virtual void MemTableMergeDurationSecondsHistogramObserve(double value) {}; virtual void BuildIndexDurationSecondsHistogramObserve(double value) {
virtual void SearchIndexDataDurationSecondsHistogramObserve(double value) {}; }
virtual void SearchRawDataDurationSecondsHistogramObserve(double value) {};
virtual void IndexFileSizeTotalIncrement(double value = 1) {}; virtual void CpuCacheUsageGaugeSet(double value) {
virtual void RawFileSizeTotalIncrement(double value = 1) {}; }
virtual void IndexFileSizeGaugeSet(double value) {};
virtual void RawFileSizeGaugeSet(double value) {}; virtual void GpuCacheUsageGaugeSet() {
virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {}; }
virtual void QueryResponseSummaryObserve(double value) {};
virtual void DiskStoreIOSpeedGaugeSet(double value) {}; virtual void MetaAccessTotalIncrement(double value = 1) {
virtual void DataFileSizeGaugeSet(double value) {}; }
virtual void AddVectorsSuccessGaugeSet(double value) {};
virtual void AddVectorsFailGaugeSet(double value) {}; virtual void MetaAccessDurationSecondsHistogramObserve(double value) {
virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {}; }
virtual void QueryVectorResponsePerSecondGaugeSet(double value) {};
virtual void CPUUsagePercentSet() {}; virtual void FaissDiskLoadDurationSecondsHistogramObserve(double value) {
virtual void RAMUsagePercentSet() {}; }
virtual void QueryResponsePerSecondGaugeSet(double value) {};
virtual void GPUPercentGaugeSet() {}; virtual void FaissDiskLoadSizeBytesHistogramObserve(double value) {
virtual void GPUMemoryUsageGaugeSet() {}; }
virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {};
virtual void QueryIndexTypePerSecondSet(std::string type, double value) {}; virtual void CacheAccessTotalIncrement(double value = 1) {
virtual void ConnectionGaugeIncrement() {}; }
virtual void ConnectionGaugeDecrement() {};
virtual void KeepingAliveCounterIncrement(double value = 1) {}; virtual void MemTableMergeDurationSecondsHistogramObserve(double value) {
virtual void OctetsSet() {}; }
virtual void CPUCoreUsagePercentSet() {}; virtual void SearchIndexDataDurationSecondsHistogramObserve(double value) {
virtual void GPUTemperature() {}; }
virtual void CPUTemperature() {};
virtual void SearchRawDataDurationSecondsHistogramObserve(double value) {
}
virtual void IndexFileSizeTotalIncrement(double value = 1) {
}
virtual void RawFileSizeTotalIncrement(double value = 1) {
}
virtual void IndexFileSizeGaugeSet(double value) {
}
virtual void RawFileSizeGaugeSet(double value) {
}
virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {
}
virtual void QueryResponseSummaryObserve(double value) {
}
virtual void DiskStoreIOSpeedGaugeSet(double value) {
}
virtual void DataFileSizeGaugeSet(double value) {
}
virtual void AddVectorsSuccessGaugeSet(double value) {
}
virtual void AddVectorsFailGaugeSet(double value) {
}
virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {
}
virtual void QueryVectorResponsePerSecondGaugeSet(double value) {
}
virtual void CPUUsagePercentSet() {
}
virtual void RAMUsagePercentSet() {
}
virtual void QueryResponsePerSecondGaugeSet(double value) {
}
virtual void GPUPercentGaugeSet() {
}
virtual void GPUMemoryUsageGaugeSet() {
}
virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
}
virtual void QueryIndexTypePerSecondSet(std::string type, double value) {
}
virtual void ConnectionGaugeIncrement() {
}
virtual void ConnectionGaugeDecrement() {
}
virtual void KeepingAliveCounterIncrement(double value = 1) {
}
virtual void OctetsSet() {
}
virtual void CPUCoreUsagePercentSet() {
}
virtual void GPUTemperature() {
}
virtual void CPUTemperature() {
}
}; };
} } // namespace server
} } // namespace milvus
} } // namespace zilliz
\ No newline at end of file
...@@ -15,10 +15,11 @@ ...@@ -15,10 +15,11 @@
// specific language governing permissions and limitations // specific language governing permissions and limitations
// under the License. // under the License.
#include "Metrics.h" #include "metrics/Metrics.h"
#include "server/Config.h" #include "server/Config.h"
#include "PrometheusMetrics.h" #include "PrometheusMetrics.h"
#include <string>
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
...@@ -44,6 +45,6 @@ Metrics::CreateMetricsCollector() { ...@@ -44,6 +45,6 @@ Metrics::CreateMetricsCollector() {
} }
} }
} } // namespace server
} } // namespace milvus
} } // namespace zilliz
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include "MetricBase.h" #include "MetricBase.h"
#include "db/meta/MetaTypes.h" #include "db/meta/MetaTypes.h"
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
namespace server { namespace server {
...@@ -44,7 +43,7 @@ class Metrics { ...@@ -44,7 +43,7 @@ class Metrics {
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectMetricsBase { class CollectMetricsBase {
protected: protected:
CollectMetricsBase() { CollectMetricsBase() {
start_time_ = METRICS_NOW_TIME; start_time_ = METRICS_NOW_TIME;
} }
...@@ -56,19 +55,19 @@ protected: ...@@ -56,19 +55,19 @@ protected:
return METRICS_MICROSECONDS(start_time_, end_time); return METRICS_MICROSECONDS(start_time_, end_time);
} }
protected: protected:
using TIME_POINT = std::chrono::system_clock::time_point; using TIME_POINT = std::chrono::system_clock::time_point;
TIME_POINT start_time_; TIME_POINT start_time_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectInsertMetrics : CollectMetricsBase { class CollectInsertMetrics : CollectMetricsBase {
public: public:
CollectInsertMetrics(size_t n, Status& status) : n_(n), status_(status) { CollectInsertMetrics(size_t n, Status &status) : n_(n), status_(status) {
} }
~CollectInsertMetrics() { ~CollectInsertMetrics() {
if(n_ > 0) { if (n_ > 0) {
auto total_time = TimeFromBegine(); auto total_time = TimeFromBegine();
double avg_time = total_time / n_; double avg_time = total_time / n_;
for (int i = 0; i < n_; ++i) { for (int i = 0; i < n_; ++i) {
...@@ -86,19 +85,19 @@ public: ...@@ -86,19 +85,19 @@ public:
} }
} }
private: private:
size_t n_; size_t n_;
Status& status_; Status &status_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectQueryMetrics : CollectMetricsBase { class CollectQueryMetrics : CollectMetricsBase {
public: public:
CollectQueryMetrics(size_t nq) : nq_(nq) { explicit CollectQueryMetrics(size_t nq) : nq_(nq) {
} }
~CollectQueryMetrics() { ~CollectQueryMetrics() {
if(nq_ > 0) { if (nq_ > 0) {
auto total_time = TimeFromBegine(); auto total_time = TimeFromBegine();
for (int i = 0; i < nq_; ++i) { for (int i = 0; i < nq_; ++i) {
server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time);
...@@ -109,13 +108,13 @@ public: ...@@ -109,13 +108,13 @@ public:
} }
} }
private: private:
size_t nq_; size_t nq_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectMergeFilesMetrics : CollectMetricsBase { class CollectMergeFilesMetrics : CollectMetricsBase {
public: public:
CollectMergeFilesMetrics() { CollectMergeFilesMetrics() {
} }
...@@ -127,7 +126,7 @@ public: ...@@ -127,7 +126,7 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectBuildIndexMetrics : CollectMetricsBase { class CollectBuildIndexMetrics : CollectMetricsBase {
public: public:
CollectBuildIndexMetrics() { CollectBuildIndexMetrics() {
} }
...@@ -139,8 +138,8 @@ public: ...@@ -139,8 +138,8 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectExecutionEngineMetrics : CollectMetricsBase { class CollectExecutionEngineMetrics : CollectMetricsBase {
public: public:
CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) { explicit CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) {
} }
~CollectExecutionEngineMetrics() { ~CollectExecutionEngineMetrics() {
...@@ -151,27 +150,28 @@ public: ...@@ -151,27 +150,28 @@ public:
server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size_ / double(total_time)); server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size_ / double(total_time));
} }
private: private:
double physical_size_; double physical_size_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectSerializeMetrics : CollectMetricsBase { class CollectSerializeMetrics : CollectMetricsBase {
public: public:
CollectSerializeMetrics(size_t size) : size_(size) { explicit CollectSerializeMetrics(size_t size) : size_(size) {
} }
~CollectSerializeMetrics() { ~CollectSerializeMetrics() {
auto total_time = TimeFromBegine(); auto total_time = TimeFromBegine();
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size_ / total_time); server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size_ / total_time);
} }
private:
private:
size_t size_; size_t size_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectAddMetrics : CollectMetricsBase { class CollectAddMetrics : CollectMetricsBase {
public: public:
CollectAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) { CollectAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) {
} }
...@@ -181,15 +181,16 @@ public: ...@@ -181,15 +181,16 @@ public:
static_cast<int>(dimension_), static_cast<int>(dimension_),
total_time); total_time);
} }
private:
private:
size_t n_; size_t n_;
uint16_t dimension_; uint16_t dimension_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectDurationMetrics : CollectMetricsBase { class CollectDurationMetrics : CollectMetricsBase {
public: public:
CollectDurationMetrics(int index_type) : index_type_(index_type) { explicit CollectDurationMetrics(int index_type) : index_type_(index_type) {
} }
~CollectDurationMetrics() { ~CollectDurationMetrics() {
...@@ -209,19 +210,20 @@ public: ...@@ -209,19 +210,20 @@ public:
} }
} }
} }
private:
private:
int index_type_; int index_type_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CollectSearchTaskMetrics : CollectMetricsBase { class CollectSearchTaskMetrics : CollectMetricsBase {
public: public:
CollectSearchTaskMetrics(int index_type) : index_type_(index_type) { explicit CollectSearchTaskMetrics(int index_type) : index_type_(index_type) {
} }
~CollectSearchTaskMetrics() { ~CollectSearchTaskMetrics() {
auto total_time = TimeFromBegine(); auto total_time = TimeFromBegine();
switch(index_type_) { switch (index_type_) {
case engine::meta::TableFileSchema::RAW: { case engine::meta::TableFileSchema::RAW: {
server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time);
break; break;
...@@ -237,13 +239,13 @@ public: ...@@ -237,13 +239,13 @@ public:
} }
} }
private: private:
int index_type_; int index_type_;
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class MetricCollector : CollectMetricsBase { class MetricCollector : CollectMetricsBase {
public: public:
MetricCollector() { MetricCollector() {
server::Metrics::GetInstance().MetaAccessTotalIncrement(); server::Metrics::GetInstance().MetaAccessTotalIncrement();
} }
...@@ -254,11 +256,6 @@ public: ...@@ -254,11 +256,6 @@ public:
} }
}; };
} // namespace server
} // namespace milvus
} } // namespace zilliz
}
}
...@@ -16,12 +16,14 @@ ...@@ -16,12 +16,14 @@
// under the License. // under the License.
#include "metrics/PrometheusMetrics.h"
#include "cache/GpuCacheMgr.h" #include "cache/GpuCacheMgr.h"
#include "PrometheusMetrics.h"
#include "server/Config.h" #include "server/Config.h"
#include "utils/Log.h" #include "utils/Log.h"
#include "SystemInfo.h" #include "SystemInfo.h"
#include <string>
#include <utility>
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
...@@ -47,93 +49,96 @@ PrometheusMetrics::Init() { ...@@ -47,93 +49,96 @@ PrometheusMetrics::Init() {
// Exposer Registry // Exposer Registry
exposer_ptr_->RegisterCollectable(registry_); exposer_ptr_->RegisterCollectable(registry_);
} catch (std::exception& ex) { } catch (std::exception &ex) {
SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what()); SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what());
return SERVER_UNEXPECTED_ERROR; return SERVER_UNEXPECTED_ERROR;
} }
return SERVER_SUCCESS; return SERVER_SUCCESS;
} }
void void
PrometheusMetrics::CPUUsagePercentSet() { PrometheusMetrics::CPUUsagePercentSet() {
if(!startup_) return ; if (!startup_) return;
double usage_percent = server::SystemInfo::GetInstance().CPUPercent(); double usage_percent = server::SystemInfo::GetInstance().CPUPercent();
CPU_usage_percent_.Set(usage_percent); CPU_usage_percent_.Set(usage_percent);
} }
void void
PrometheusMetrics::RAMUsagePercentSet() { PrometheusMetrics::RAMUsagePercentSet() {
if(!startup_) return ; if (!startup_) return;
double usage_percent = server::SystemInfo::GetInstance().MemoryPercent(); double usage_percent = server::SystemInfo::GetInstance().MemoryPercent();
RAM_usage_percent_.Set(usage_percent); RAM_usage_percent_.Set(usage_percent);
} }
void void
PrometheusMetrics::GPUPercentGaugeSet() { PrometheusMetrics::GPUPercentGaugeSet() {
if(!startup_) return; if (!startup_) return;
int numDevice = server::SystemInfo::GetInstance().num_device(); int numDevice = server::SystemInfo::GetInstance().num_device();
std::vector<unsigned long long > used_total = server::SystemInfo::GetInstance().GPUMemoryTotal(); std::vector<uint64_t> used_total = server::SystemInfo::GetInstance().GPUMemoryTotal();
std::vector<unsigned long long > used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed(); std::vector<uint64_t> used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed();
for (int i = 0; i < numDevice; ++i) { for (int i = 0; i < numDevice; ++i) {
prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}}); prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}});
double percent = (double)used_memory[i] / (double)used_total[i]; double percent = (double) used_memory[i] / (double) used_total[i];
GPU_percent.Set(percent * 100); GPU_percent.Set(percent * 100);
} }
} }
void PrometheusMetrics::GPUMemoryUsageGaugeSet() { void
if(!startup_) return; PrometheusMetrics::GPUMemoryUsageGaugeSet() {
std::vector<unsigned long long> values = server::SystemInfo::GetInstance().GPUMemoryUsed(); if (!startup_) return;
constexpr unsigned long long MtoB = 1024*1024; std::vector<uint64_t> values = server::SystemInfo::GetInstance().GPUMemoryUsed();
constexpr uint64_t MtoB = 1024 * 1024;
int numDevice = server::SystemInfo::GetInstance().num_device(); int numDevice = server::SystemInfo::GetInstance().num_device();
for (int i = 0; i < numDevice; ++i) { for (int i = 0; i < numDevice; ++i) {
prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}}); prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}});
GPU_memory.Set(values[i] / MtoB); GPU_memory.Set(values[i] / MtoB);
} }
} }
void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
// MB/s
if(!startup_) return;
long long MtoB = 1024*1024; void
long long size = num_vector * dim * 4; PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
add_vectors_per_second_gauge_.Set(size/time/MtoB); // MB/s
if (!startup_) return;
int64_t MtoB = 1024 * 1024;
int64_t size = num_vector * dim * 4;
add_vectors_per_second_gauge_.Set(size / time / MtoB);
} }
void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) {
if(!startup_) return; void
if(type == "IVF"){ PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) {
if (!startup_) return;
if (type == "IVF") {
query_index_IVF_type_per_second_gauge_.Set(value); query_index_IVF_type_per_second_gauge_.Set(value);
} else if(type == "IDMap"){ } else if (type == "IDMap") {
query_index_IDMAP_type_per_second_gauge_.Set(value); query_index_IDMAP_type_per_second_gauge_.Set(value);
} }
} }
void PrometheusMetrics::ConnectionGaugeIncrement() { void
if(!startup_) return; PrometheusMetrics::ConnectionGaugeIncrement() {
if (!startup_) return;
connection_gauge_.Increment(); connection_gauge_.Increment();
} }
void PrometheusMetrics::ConnectionGaugeDecrement() { void
if(!startup_) return; PrometheusMetrics::ConnectionGaugeDecrement() {
if (!startup_) return;
connection_gauge_.Decrement(); connection_gauge_.Decrement();
} }
void PrometheusMetrics::OctetsSet() { void
if(!startup_) return; PrometheusMetrics::OctetsSet() {
if (!startup_) return;
// get old stats and reset them // get old stats and reset them
unsigned long long old_inoctets = SystemInfo::GetInstance().get_inoctets(); uint64_t old_inoctets = SystemInfo::GetInstance().get_inoctets();
unsigned long long old_outoctets = SystemInfo::GetInstance().get_octets(); uint64_t old_outoctets = SystemInfo::GetInstance().get_octets();
auto old_time = SystemInfo::GetInstance().get_nettime(); auto old_time = SystemInfo::GetInstance().get_nettime();
std::pair<unsigned long long, unsigned long long> in_and_out_octets = SystemInfo::GetInstance().Octets(); std::pair<uint64_t, uint64_t> in_and_out_octets = SystemInfo::GetInstance().Octets();
SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first); SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first);
SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second); SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second);
SystemInfo::GetInstance().set_nettime(); SystemInfo::GetInstance().set_nettime();
...@@ -142,13 +147,14 @@ void PrometheusMetrics::OctetsSet() { ...@@ -142,13 +147,14 @@ void PrometheusMetrics::OctetsSet() {
constexpr double micro_to_second = 1e-6; constexpr double micro_to_second = 1e-6;
auto now_time = std::chrono::system_clock::now(); auto now_time = std::chrono::system_clock::now();
auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time); auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time);
auto total_second = total_microsecond*micro_to_second; auto total_second = total_microsecond * micro_to_second;
if(total_second == 0) return; if (total_second == 0) return;
inoctets_gauge_.Set((in_and_out_octets.first-old_inoctets)/total_second); inoctets_gauge_.Set((in_and_out_octets.first - old_inoctets) / total_second);
outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second); outoctets_gauge_.Set((in_and_out_octets.second - old_outoctets) / total_second);
} }
void PrometheusMetrics::CPUCoreUsagePercentSet() { void
PrometheusMetrics::CPUCoreUsagePercentSet() {
if (!startup_) if (!startup_)
return; return;
...@@ -160,11 +166,12 @@ void PrometheusMetrics::CPUCoreUsagePercentSet() { ...@@ -160,11 +166,12 @@ void PrometheusMetrics::CPUCoreUsagePercentSet() {
} }
} }
void PrometheusMetrics::GPUTemperature() { void
PrometheusMetrics::GPUTemperature() {
if (!startup_) if (!startup_)
return; return;
std::vector<unsigned int> GPU_temperatures = server::SystemInfo::GetInstance().GPUTemperature(); std::vector<uint64_t> GPU_temperatures = server::SystemInfo::GetInstance().GPUTemperature();
for (int i = 0; i < GPU_temperatures.size(); ++i) { for (int i = 0; i < GPU_temperatures.size(); ++i) {
prometheus::Gauge &gpu_temp = GPU_temperature_.Add({{"GPU", std::to_string(i)}}); prometheus::Gauge &gpu_temp = GPU_temperature_.Add({{"GPU", std::to_string(i)}});
...@@ -172,7 +179,8 @@ void PrometheusMetrics::GPUTemperature() { ...@@ -172,7 +179,8 @@ void PrometheusMetrics::GPUTemperature() {
} }
} }
void PrometheusMetrics::CPUTemperature() { void
PrometheusMetrics::CPUTemperature() {
if (!startup_) if (!startup_)
return; return;
...@@ -184,7 +192,8 @@ void PrometheusMetrics::CPUTemperature() { ...@@ -184,7 +192,8 @@ void PrometheusMetrics::CPUTemperature() {
} }
} }
void PrometheusMetrics::GpuCacheUsageGaugeSet() { void
PrometheusMetrics::GpuCacheUsageGaugeSet() {
// std::vector<uint64_t > gpu_ids = {0}; // std::vector<uint64_t > gpu_ids = {0};
// for(auto i = 0; i < gpu_ids.size(); ++i) { // for(auto i = 0; i < gpu_ids.size(); ++i) {
// uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage(); // uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage();
...@@ -194,6 +203,6 @@ void PrometheusMetrics::GpuCacheUsageGaugeSet() { ...@@ -194,6 +203,6 @@ void PrometheusMetrics::GpuCacheUsageGaugeSet() {
// } // }
} }
} } // namespace server
} } // namespace milvus
} } // namespace zilliz
此差异已折叠。
...@@ -16,29 +16,28 @@ ...@@ -16,29 +16,28 @@
// under the License. // under the License.
#include "SystemInfo.h" #include "metrics/SystemInfo.h"
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include "nvml.h" #include <nvml.h>
//#include <mutex> #include <string>
// #include <utility>
//std::mutex mutex;
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
namespace server { namespace server {
void SystemInfo::Init() { void
if(initialized_) return; SystemInfo::Init() {
if (initialized_) return;
initialized_ = true; initialized_ = true;
// initialize CPU information // initialize CPU information
FILE* file; FILE *file;
struct tms time_sample; struct tms time_sample;
char line[128]; char line[128];
last_cpu_ = times(&time_sample); last_cpu_ = times(&time_sample);
...@@ -46,7 +45,7 @@ void SystemInfo::Init() { ...@@ -46,7 +45,7 @@ void SystemInfo::Init() {
last_user_cpu_ = time_sample.tms_utime; last_user_cpu_ = time_sample.tms_utime;
file = fopen("/proc/cpuinfo", "r"); file = fopen("/proc/cpuinfo", "r");
num_processors_ = 0; num_processors_ = 0;
while(fgets(line, 128, file) != NULL){ while (fgets(line, 128, file) != NULL) {
if (strncmp(line, "processor", 9) == 0) num_processors_++; if (strncmp(line, "processor", 9) == 0) num_processors_++;
if (strncmp(line, "physical", 8) == 0) { if (strncmp(line, "physical", 8) == 0) {
num_physical_processors_ = ParseLine(line); num_physical_processors_ = ParseLine(line);
...@@ -58,24 +57,24 @@ void SystemInfo::Init() { ...@@ -58,24 +57,24 @@ void SystemInfo::Init() {
//initialize GPU information //initialize GPU information
nvmlReturn_t nvmlresult; nvmlReturn_t nvmlresult;
nvmlresult = nvmlInit(); nvmlresult = nvmlInit();
if(NVML_SUCCESS != nvmlresult) { if (NVML_SUCCESS != nvmlresult) {
printf("System information initilization failed"); printf("System information initilization failed");
return ; return;
} }
nvmlresult = nvmlDeviceGetCount(&num_device_); nvmlresult = nvmlDeviceGetCount(&num_device_);
if(NVML_SUCCESS != nvmlresult) { if (NVML_SUCCESS != nvmlresult) {
printf("Unable to get devidce number"); printf("Unable to get devidce number");
return ; return;
} }
//initialize network traffic information //initialize network traffic information
std::pair<unsigned long long, unsigned long long> in_and_out_octets = Octets(); std::pair<uint64_t, uint64_t> in_and_out_octets = Octets();
in_octets_ = in_and_out_octets.first; in_octets_ = in_and_out_octets.first;
out_octets_ = in_and_out_octets.second; out_octets_ = in_and_out_octets.second;
net_time_ = std::chrono::system_clock::now(); net_time_ = std::chrono::system_clock::now();
} }
long long uint64_t
SystemInfo::ParseLine(char *line) { SystemInfo::ParseLine(char *line) {
// This assumes that a digit will be found and the line ends in " Kb". // This assumes that a digit will be found and the line ends in " Kb".
int i = strlen(line); int i = strlen(line);
...@@ -83,53 +82,52 @@ SystemInfo::ParseLine(char *line) { ...@@ -83,53 +82,52 @@ SystemInfo::ParseLine(char *line) {
while (*p < '0' || *p > '9') p++; while (*p < '0' || *p > '9') p++;
line[i - 3] = '\0'; line[i - 3] = '\0';
i = atoi(p); i = atoi(p);
return static_cast<long long>(i); return static_cast<uint64_t>(i);
} }
unsigned long uint64_t
SystemInfo::GetPhysicalMemory() { SystemInfo::GetPhysicalMemory() {
struct sysinfo memInfo; struct sysinfo memInfo;
sysinfo (&memInfo); sysinfo(&memInfo);
unsigned long totalPhysMem = memInfo.totalram; uint64_t totalPhysMem = memInfo.totalram;
//Multiply in next statement to avoid int overflow on right hand side... //Multiply in next statement to avoid int overflow on right hand side...
totalPhysMem *= memInfo.mem_unit; totalPhysMem *= memInfo.mem_unit;
return totalPhysMem; return totalPhysMem;
} }
unsigned long uint64_t
SystemInfo::GetProcessUsedMemory() { SystemInfo::GetProcessUsedMemory() {
//Note: this value is in KB! //Note: this value is in KB!
FILE* file = fopen("/proc/self/status", "r"); FILE *file = fopen("/proc/self/status", "r");
constexpr int64_t line_length = 128; constexpr uint64_t line_length = 128;
long long result = -1; uint64_t result = -1;
constexpr int64_t KB_SIZE = 1024; constexpr uint64_t KB_SIZE = 1024;
char line[line_length]; char line[line_length];
while (fgets(line, line_length, file) != NULL){ while (fgets(line, line_length, file) != NULL) {
if (strncmp(line, "VmRSS:", 6) == 0){ if (strncmp(line, "VmRSS:", 6) == 0) {
result = ParseLine(line); result = ParseLine(line);
break; break;
} }
} }
fclose(file); fclose(file);
// return value in Byte // return value in Byte
return (result*KB_SIZE); return (result * KB_SIZE);
} }
double double
SystemInfo::MemoryPercent() { SystemInfo::MemoryPercent() {
if (!initialized_) Init(); if (!initialized_) Init();
return (double)(GetProcessUsedMemory()*100)/(double)total_ram_; return (double) (GetProcessUsedMemory() * 100) / (double) total_ram_;
} }
std::vector<double> std::vector<double>
SystemInfo::CPUCorePercent() { SystemInfo::CPUCorePercent() {
std::vector<unsigned long long> prev_work_time_array; std::vector<uint64_t> prev_work_time_array;
std::vector<unsigned long long> prev_total_time_array = getTotalCpuTime(prev_work_time_array); std::vector<uint64_t> prev_total_time_array = getTotalCpuTime(prev_work_time_array);
usleep(100000); usleep(100000);
std::vector<unsigned long long> cur_work_time_array; std::vector<uint64_t> cur_work_time_array;
std::vector<unsigned long long> cur_total_time_array = getTotalCpuTime(cur_work_time_array); std::vector<uint64_t> cur_total_time_array = getTotalCpuTime(cur_work_time_array);
std::vector<double> cpu_core_percent; std::vector<double> cpu_core_percent;
for (int i = 1; i < num_processors_; i++) { for (int i = 1; i < num_processors_; i++) {
...@@ -140,22 +138,21 @@ SystemInfo::CPUCorePercent() { ...@@ -140,22 +138,21 @@ SystemInfo::CPUCorePercent() {
return cpu_core_percent; return cpu_core_percent;
} }
std::vector<unsigned long long> std::vector<uint64_t>
SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array) SystemInfo::getTotalCpuTime(std::vector<uint64_t> &work_time_array) {
{ std::vector<uint64_t> total_time_array;
std::vector<unsigned long long> total_time_array; FILE *file = fopen("/proc/stat", "r");
FILE* file = fopen("/proc/stat", "r");
if (file == NULL) { if (file == NULL) {
perror("Could not open stat file"); perror("Could not open stat file");
return total_time_array; return total_time_array;
} }
unsigned long long user = 0, nice = 0, system = 0, idle = 0; uint64_t user = 0, nice = 0, system = 0, idle = 0;
unsigned long long iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0; uint64_t iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0;
for (int i = 0; i < num_processors_; i++) { for (int i = 0; i < num_processors_; i++) {
char buffer[1024]; char buffer[1024];
char* ret = fgets(buffer, sizeof(buffer) - 1, file); char *ret = fgets(buffer, sizeof(buffer) - 1, file);
if (ret == NULL) { if (ret == NULL) {
perror("Could not read stat file"); perror("Could not read stat file");
fclose(file); fclose(file);
...@@ -163,7 +160,7 @@ SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array) ...@@ -163,7 +160,7 @@ SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array)
} }
sscanf(buffer, sscanf(buffer,
"cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", "cpu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu",
&user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal, &guest, &guestnice); &user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal, &guest, &guestnice);
work_time_array.push_back(user + nice + system); work_time_array.push_back(user + nice + system);
...@@ -174,9 +171,6 @@ SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array) ...@@ -174,9 +171,6 @@ SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array)
return total_time_array; return total_time_array;
} }
double double
SystemInfo::CPUPercent() { SystemInfo::CPUPercent() {
if (!initialized_) Init(); if (!initialized_) Init();
...@@ -186,11 +180,10 @@ SystemInfo::CPUPercent() { ...@@ -186,11 +180,10 @@ SystemInfo::CPUPercent() {
now = times(&time_sample); now = times(&time_sample);
if (now <= last_cpu_ || time_sample.tms_stime < last_sys_cpu_ || if (now <= last_cpu_ || time_sample.tms_stime < last_sys_cpu_ ||
time_sample.tms_utime < last_user_cpu_){ time_sample.tms_utime < last_user_cpu_) {
//Overflow detection. Just skip this value. //Overflow detection. Just skip this value.
percent = -1.0; percent = -1.0;
} } else {
else{
percent = (time_sample.tms_stime - last_sys_cpu_) + percent = (time_sample.tms_stime - last_sys_cpu_) +
(time_sample.tms_utime - last_user_cpu_); (time_sample.tms_utime - last_user_cpu_);
percent /= (now - last_cpu_); percent /= (now - last_cpu_);
...@@ -203,12 +196,11 @@ SystemInfo::CPUPercent() { ...@@ -203,12 +196,11 @@ SystemInfo::CPUPercent() {
return percent; return percent;
} }
std::vector<uint64_t>
std::vector<unsigned long long>
SystemInfo::GPUMemoryTotal() { SystemInfo::GPUMemoryTotal() {
// get GPU usage percent // get GPU usage percent
if(!initialized_) Init(); if (!initialized_) Init();
std::vector<unsigned long long > result; std::vector<uint64_t> result;
nvmlMemory_t nvmlMemory; nvmlMemory_t nvmlMemory;
for (int i = 0; i < num_device_; ++i) { for (int i = 0; i < num_device_; ++i) {
nvmlDevice_t device; nvmlDevice_t device;
...@@ -219,21 +211,22 @@ SystemInfo::GPUMemoryTotal() { ...@@ -219,21 +211,22 @@ SystemInfo::GPUMemoryTotal() {
return result; return result;
} }
std::vector<unsigned int> std::vector<uint64_t>
SystemInfo::GPUTemperature(){ SystemInfo::GPUTemperature() {
if(!initialized_) Init(); if (!initialized_) Init();
std::vector<unsigned int > result; std::vector<uint64_t> result;
for (int i = 0; i < num_device_; i++) { for (int i = 0; i < num_device_; i++) {
nvmlDevice_t device; nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(i, &device); nvmlDeviceGetHandleByIndex(i, &device);
unsigned int temp; unsigned int temp;
nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,&temp); nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp);
result.push_back(temp); result.push_back(temp);
} }
return result; return result;
} }
std::vector<float> std::vector<float>
SystemInfo::CPUTemperature(){ SystemInfo::CPUTemperature() {
std::vector<float> result; std::vector<float> result;
for (int i = 0; i <= num_physical_processors_; ++i) { for (int i = 0; i <= num_physical_processors_; ++i) {
std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp"; std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp";
...@@ -247,15 +240,14 @@ SystemInfo::CPUTemperature(){ ...@@ -247,15 +240,14 @@ SystemInfo::CPUTemperature(){
result.push_back(temp / 1000); result.push_back(temp / 1000);
fclose(file); fclose(file);
} }
} }
std::vector<unsigned long long> std::vector<uint64_t>
SystemInfo::GPUMemoryUsed() { SystemInfo::GPUMemoryUsed() {
// get GPU memory used // get GPU memory used
if(!initialized_) Init(); if (!initialized_) Init();
std::vector<unsigned long long int> result; std::vector<uint64_t> result;
nvmlMemory_t nvmlMemory; nvmlMemory_t nvmlMemory;
for (int i = 0; i < num_device_; ++i) { for (int i = 0; i < num_device_; ++i) {
nvmlDevice_t device; nvmlDevice_t device;
...@@ -266,42 +258,41 @@ SystemInfo::GPUMemoryUsed() { ...@@ -266,42 +258,41 @@ SystemInfo::GPUMemoryUsed() {
return result; return result;
} }
std::pair<unsigned long long , unsigned long long > std::pair<uint64_t, uint64_t>
SystemInfo::Octets(){ SystemInfo::Octets() {
pid_t pid = getpid(); pid_t pid = getpid();
// const std::string filename = "/proc/"+std::to_string(pid)+"/net/netstat"; // const std::string filename = "/proc/"+std::to_string(pid)+"/net/netstat";
const std::string filename = "/proc/net/netstat"; const std::string filename = "/proc/net/netstat";
std::ifstream file(filename); std::ifstream file(filename);
std::string lastline = ""; std::string lastline = "";
std::string line = ""; std::string line = "";
while(file){ while (file) {
getline(file, line); getline(file, line);
if(file.fail()){ if (file.fail()) {
break; break;
} }
lastline = line; lastline = line;
} }
std::vector<size_t> space_position; std::vector<size_t> space_position;
size_t space_pos = lastline.find(" "); size_t space_pos = lastline.find(" ");
while(space_pos != std::string::npos){ while (space_pos != std::string::npos) {
space_position.push_back(space_pos); space_position.push_back(space_pos);
space_pos = lastline.find(" ",space_pos+1); space_pos = lastline.find(" ", space_pos + 1);
} }
// InOctets is between 6th and 7th " " and OutOctets is between 7th and 8th " " // InOctets is between 6th and 7th " " and OutOctets is between 7th and 8th " "
size_t inoctets_begin = space_position[6]+1; size_t inoctets_begin = space_position[6] + 1;
size_t inoctets_length = space_position[7]-inoctets_begin; size_t inoctets_length = space_position[7] - inoctets_begin;
size_t outoctets_begin = space_position[7]+1; size_t outoctets_begin = space_position[7] + 1;
size_t outoctets_length = space_position[8]-outoctets_begin; size_t outoctets_length = space_position[8] - outoctets_begin;
std::string inoctets = lastline.substr(inoctets_begin,inoctets_length); std::string inoctets = lastline.substr(inoctets_begin, inoctets_length);
std::string outoctets = lastline.substr(outoctets_begin,outoctets_length); std::string outoctets = lastline.substr(outoctets_begin, outoctets_length);
uint64_t inoctets_bytes = std::stoull(inoctets);
unsigned long long inoctets_bytes = std::stoull(inoctets); uint64_t outoctets_bytes = std::stoull(outoctets);
unsigned long long outoctets_bytes = std::stoull(outoctets); std::pair<uint64_t, uint64_t> res(inoctets_bytes, outoctets_bytes);
std::pair<unsigned long long , unsigned long long > res(inoctets_bytes, outoctets_bytes);
return res; return res;
} }
} } // namespace server
} } // namespace milvus
} } // namespace zilliz
\ No newline at end of file
...@@ -18,19 +18,17 @@ ...@@ -18,19 +18,17 @@
#pragma once #pragma once
#include "sys/types.h" #include <sys/types.h>
#include "sys/sysinfo.h" #include <sys/sysinfo.h>
#include "stdlib.h" #include <sys/times.h>
#include "stdio.h" #include <sys/vtimes.h>
#include "string.h" #include <stdlib.h>
#include "sys/times.h" #include <stdio.h>
#include "sys/vtimes.h" #include <string.h>
#include <chrono> #include <chrono>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <utility>
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
...@@ -38,7 +36,7 @@ namespace server { ...@@ -38,7 +36,7 @@ namespace server {
class SystemInfo { class SystemInfo {
private: private:
unsigned long total_ram_ = 0; uint64_t total_ram_ = 0;
clock_t last_cpu_ = clock_t(); clock_t last_cpu_ = clock_t();
clock_t last_sys_cpu_ = clock_t(); clock_t last_sys_cpu_ = clock_t();
clock_t last_user_cpu_ = clock_t(); clock_t last_user_cpu_ = clock_t();
...@@ -46,44 +44,71 @@ class SystemInfo { ...@@ -46,44 +44,71 @@ class SystemInfo {
int num_processors_ = 0; int num_processors_ = 0;
int num_physical_processors_ = 0; int num_physical_processors_ = 0;
//number of GPU //number of GPU
unsigned int num_device_ = 0; uint32_t num_device_ = 0;
unsigned long long in_octets_ = 0; uint64_t in_octets_ = 0;
unsigned long long out_octets_ = 0; uint64_t out_octets_ = 0;
bool initialized_ = false; bool initialized_ = false;
public: public:
static SystemInfo & static SystemInfo &
GetInstance(){ GetInstance() {
static SystemInfo instance; static SystemInfo instance;
return instance; return instance;
} }
void Init(); void Init();
int num_processor() const { return num_processors_;};
int num_physical_processors() const { return num_physical_processors_; }; int num_processor() const {
int num_device() const {return num_device_;}; return num_processors_;
unsigned long long get_inoctets() { return in_octets_;}; }
unsigned long long get_octets() { return out_octets_;};
std::chrono::system_clock::time_point get_nettime() { return net_time_;}; int num_physical_processors() const {
void set_inoctets(unsigned long long value) { in_octets_ = value;}; return num_physical_processors_;
void set_outoctets(unsigned long long value) { out_octets_ = value;}; }
void set_nettime() {net_time_ = std::chrono::system_clock::now();};
long long ParseLine(char* line); uint32_t num_device() const {
unsigned long GetPhysicalMemory(); return num_device_;
unsigned long GetProcessUsedMemory(); }
uint64_t get_inoctets() {
return in_octets_;
}
uint64_t get_octets() {
return out_octets_;
}
std::chrono::system_clock::time_point get_nettime() {
return net_time_;
}
void set_inoctets(uint64_t value) {
in_octets_ = value;
}
void set_outoctets(uint64_t value) {
out_octets_ = value;
}
void set_nettime() {
net_time_ = std::chrono::system_clock::now();
}
uint64_t ParseLine(char *line);
uint64_t GetPhysicalMemory();
uint64_t GetProcessUsedMemory();
double MemoryPercent(); double MemoryPercent();
double CPUPercent(); double CPUPercent();
std::pair<unsigned long long , unsigned long long > Octets(); std::pair<uint64_t, uint64_t> Octets();
std::vector<unsigned long long> GPUMemoryTotal(); std::vector<uint64_t> GPUMemoryTotal();
std::vector<unsigned long long> GPUMemoryUsed(); std::vector<uint64_t> GPUMemoryUsed();
std::vector<double> CPUCorePercent(); std::vector<double> CPUCorePercent();
std::vector<unsigned long long> getTotalCpuTime(std::vector<unsigned long long> &workTime); std::vector<uint64_t> getTotalCpuTime(std::vector<uint64_t> &workTime);
std::vector<unsigned int> GPUTemperature(); std::vector<uint64_t> GPUTemperature();
std::vector<float> CPUTemperature(); std::vector<float> CPUTemperature();
}; };
} } // namespace server
} } // namespace milvus
} } // namespace zilliz
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册