From 691df6ed18f80cffd94e1fb7286b68c62a225861 Mon Sep 17 00:00:00 2001 From: kun yu Date: Sat, 27 Jul 2019 16:19:34 +0800 Subject: [PATCH] add CPU_usage_rate Metrics Former-commit-id: a2a914159d759f9724b70954758cd22d7d5c98ff --- cpp/src/metrics/MetricBase.h | 2 + cpp/src/metrics/PrometheusMetrics.cpp | 59 +++++++++++++++++++-------- cpp/src/metrics/PrometheusMetrics.h | 45 +++++++++++--------- cpp/src/metrics/SystemInfo.cpp | 58 +++++++++++++++++++++++++- cpp/src/metrics/SystemInfo.h | 5 +++ 5 files changed, 131 insertions(+), 38 deletions(-) diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index fe9b2465..61e9e768 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -64,6 +64,8 @@ class MetricsBase{ virtual void ConnectionGaugeDecrement() {}; virtual void KeepingAliveCounterIncrement(double value = 1) {}; virtual void OctetsSet() {}; + + virtual void CPUCoreUsagePercentSet() {}; }; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index df051634..c8a09b8e 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -44,6 +44,8 @@ PrometheusMetrics::Init() { void PrometheusMetrics::CPUUsagePercentSet() { if(!startup_) return ; + int numProcessor = server::SystemInfo::GetInstance().num_processor(); + double usage_percent = server::SystemInfo::GetInstance().CPUPercent(); CPU_usage_percent_.Set(usage_percent); } @@ -60,14 +62,20 @@ PrometheusMetrics::GPUPercentGaugeSet() { if(!startup_) return; int numDevide = server::SystemInfo::GetInstance().num_device(); std::vector values = server::SystemInfo::GetInstance().GPUPercent(); - if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast(values[0])); - if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast(values[1])); - if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast(values[2])); - if(numDevide >= 4) GPU3_percent_gauge_.Set(static_cast(values[3])); - if(numDevide >= 5) GPU4_percent_gauge_.Set(static_cast(values[4])); - if(numDevide >= 6) GPU5_percent_gauge_.Set(static_cast(values[5])); - if(numDevide >= 7) GPU6_percent_gauge_.Set(static_cast(values[6])); - if(numDevide >= 8) GPU7_percent_gauge_.Set(static_cast(values[7])); + + for (int i = 0; i < values.size(); i++) { + prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}}); + GPU_percent.Set(static_cast(values[i])); + } + +// if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast(values[0])); +// if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast(values[1])); +// if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast(values[2])); +// if(numDevide >= 4) GPU3_percent_gauge_.Set(static_cast(values[3])); +// if(numDevide >= 5) GPU4_percent_gauge_.Set(static_cast(values[4])); +// if(numDevide >= 6) GPU5_percent_gauge_.Set(static_cast(values[5])); +// if(numDevide >= 7) GPU6_percent_gauge_.Set(static_cast(values[6])); +// if(numDevide >= 8) GPU7_percent_gauge_.Set(static_cast(values[7])); // to do } @@ -78,16 +86,21 @@ void PrometheusMetrics::GPUMemoryUsageGaugeSet() { constexpr unsigned long long MtoB = 1024*1024; int numDevice = values.size(); - if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB); - if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB); - if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB); - if(numDevice >=4) GPU3_memory_usage_gauge_.Set(values[3]/MtoB); - if(numDevice >=5) GPU4_memory_usage_gauge_.Set(values[4]/MtoB); - if(numDevice >=6) GPU5_memory_usage_gauge_.Set(values[5]/MtoB); - if(numDevice >=7) GPU6_memory_usage_gauge_.Set(values[6]/MtoB); - if(numDevice >=8) GPU7_memory_usage_gauge_.Set(values[7]/MtoB); + for (int i = 0; i < numDevice; i++) { + prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}}); + GPU_memory.Set(values[i] / MtoB); + } + + +// if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB); +// if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB); +// if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB); +// if(numDevice >=4) GPU3_memory_usage_gauge_.Set(values[3]/MtoB); +// if(numDevice >=5) GPU4_memory_usage_gauge_.Set(values[4]/MtoB); +// if(numDevice >=6) GPU5_memory_usage_gauge_.Set(values[5]/MtoB); +// if(numDevice >=7) GPU6_memory_usage_gauge_.Set(values[6]/MtoB); +// if(numDevice >=8) GPU7_memory_usage_gauge_.Set(values[7]/MtoB); - // to do } void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) { // MB/s @@ -140,6 +153,18 @@ void PrometheusMetrics::OctetsSet() { outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second); } +void PrometheusMetrics::CPUCoreUsagePercentSet() { + if (!startup_) + return; + + std::vector cpu_core_percent = server::SystemInfo::GetInstance().CPUCorePercent(); + + for (int i = 0; i < cpu_core_percent.size(); i++) { + prometheus::Gauge &core_percent = CPU_.Add({{"CPU", std::to_string(i)}}); + core_percent.Set(cpu_core_percent[i]); +// std::cout << cpu_core_percent[i] << "+"; + } +} } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index 5b651ec1..be735853 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -12,6 +12,7 @@ #include #include +#include #include "server/ServerConfig.h" #include "MetricBase.h" @@ -78,6 +79,9 @@ class PrometheusMetrics: public MetricsBase { void QueryVectorResponseSummaryObserve(double value, int count = 1) override { if (startup_) for(int i = 0 ; i < count ; ++i) query_vector_response_summary_.Observe(value);}; void QueryVectorResponsePerSecondGaugeSet(double value) override {if (startup_) query_vector_response_per_second_gauge_.Set(value);}; void CPUUsagePercentSet() override ; + + void CPUCoreUsagePercentSet() override; + void RAMUsagePercentSet() override ; void QueryResponsePerSecondGaugeSet(double value) override {if(startup_) query_response_per_second_gauge.Set(value);}; void GPUPercentGaugeSet() override ; @@ -322,7 +326,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB","Faiss"}}); - ////all from CacheMgr.cpp + ////all from CacheMgr.cpp //record cache access count prometheus::Family &cache_access_ = prometheus::BuildCounter() .Name("cache_access_total") @@ -392,7 +396,9 @@ class PrometheusMetrics: public MetricsBase { .Name("CPU_usage_percent") .Help("CPU usage percent by this this process") .Register(*registry_); - prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({}); + prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({{"CPU", "0"}}); +// std::vector &CPU_usage_percent_array; + prometheus::Family &RAM_ = prometheus::BuildGauge() .Name("RAM_usage_percent") @@ -405,15 +411,14 @@ class PrometheusMetrics: public MetricsBase { .Name("Gpu_usage_percent") .Help("GPU_usage_percent ") .Register(*registry_); - prometheus::Gauge &GPU0_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "0"}}); - prometheus::Gauge &GPU1_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "1"}}); - prometheus::Gauge &GPU2_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "2"}}); - prometheus::Gauge &GPU3_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "3"}}); - prometheus::Gauge &GPU4_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "4"}}); - prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}}); - prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}}); - prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}}); -// std::vector GPU_percent_gauges_; +// prometheus::Gauge &GPU0_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "0"}}); +// prometheus::Gauge &GPU1_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "1"}}); +// prometheus::Gauge &GPU2_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "2"}}); +// prometheus::Gauge &GPU3_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "3"}}); +// prometheus::Gauge &GPU4_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "4"}}); +// prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}}); +// prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}}); +// prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}}); @@ -423,15 +428,15 @@ class PrometheusMetrics: public MetricsBase { .Name("GPU_memory_usage_total") .Help("GPU memory usage total ") .Register(*registry_); - prometheus::Gauge &GPU0_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "0"}}); - prometheus::Gauge &GPU1_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "1"}}); - prometheus::Gauge &GPU2_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "2"}}); - prometheus::Gauge &GPU3_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "3"}}); - prometheus::Gauge &GPU4_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "4"}}); - prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}}); - prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}}); - prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}}); -// std::vector GPU_memory_usage_gauges_; +// prometheus::Gauge &GPU0_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "0"}}); +// prometheus::Gauge &GPU1_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "1"}}); +// prometheus::Gauge &GPU2_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "2"}}); +// prometheus::Gauge &GPU3_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "3"}}); +// prometheus::Gauge &GPU4_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "4"}}); +// prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}}); +// prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}}); +// prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}}); + prometheus::Family &query_index_type_per_second_ = prometheus::BuildGauge() .Name("query_index_throughtout_per_microsecond") diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index a64cbc49..9348b0d9 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -105,9 +105,65 @@ SystemInfo::GetProcessUsedMemory() { double SystemInfo::MemoryPercent() { if (!initialized_) Init(); - return GetProcessUsedMemory()*100/total_ram_; + return (double)(GetProcessUsedMemory()*100)/(double)total_ram_; } + + +std::vector +SystemInfo::CPUCorePercent() { + std::vector prev_work_time_array; + std::vector prev_total_time_array = getTotalCpuTime(prev_work_time_array); + usleep(100000); + std::vector cur_work_time_array; + std::vector cur_total_time_array = getTotalCpuTime(cur_work_time_array); + + std::vector cpu_core_percent; + for (int i = 0; i < num_processors_; i++) { + double total_cpu_time = cur_total_time_array[i] - prev_total_time_array[i]; + double cpu_work_time = cur_work_time_array[i] - prev_work_time_array[i]; + cpu_core_percent.push_back((cpu_work_time / total_cpu_time) * 100); + } + return cpu_core_percent; +} + +std::vector +SystemInfo::getTotalCpuTime(std::vector &work_time_array) +{ + std::vector total_time_array; + FILE* file = fopen("/proc/stat", "r"); + if (file == NULL) { + perror("Could not open stat file"); + return total_time_array; + } + + unsigned long long user = 0, nice = 0, system = 0, idle = 0; + unsigned long long iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0; + + for (int i = 0; i < num_processors_; i++) { + char buffer[1024]; + char* ret = fgets(buffer, sizeof(buffer) - 1, file); + if (ret == NULL) { + perror("Could not read stat file"); + fclose(file); + return total_time_array; + } + + sscanf(buffer, + "cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", + &user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal, &guest, &guestnice); + + work_time_array.push_back(user + nice + system); + total_time_array.push_back(user + nice + system + idle + iowait + irq + softirq + steal); + } + + fclose(file); + return total_time_array; +} + + + + double SystemInfo::CPUPercent() { if (!initialized_) Init(); diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index 2562e316..5ffb2c77 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -46,6 +46,7 @@ class SystemInfo { } void Init(); + int num_processor() const { return num_processors_;}; int num_device() const {return num_device_;}; unsigned long long get_inoctets() { return in_octets_;}; unsigned long long get_octets() { return out_octets_;}; @@ -62,6 +63,10 @@ class SystemInfo { std::vector GPUPercent(); std::vector GPUMemoryUsed(); + std::vector CPUCorePercent(); + std::vector getTotalCpuTime(std::vector &workTime); + + }; } -- GitLab