PrometheusMetrics.cpp 5.3 KB
Newer Older
Y
yu yunfeng 已提交
1 2 3 4 5 6 7
/*******************************************************************************
 * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
 * Unauthorized copying of this file, via any medium is strictly prohibited.
 * Proprietary and confidential.
 ******************************************************************************/

#include "PrometheusMetrics.h"
G
groot 已提交
8
#include "utils/Log.h"
Y
yu yunfeng 已提交
9
#include "SystemInfo.h"
Y
yu yunfeng 已提交
10

11

Y
yu yunfeng 已提交
12
namespace zilliz {
J
jinhai 已提交
13
namespace milvus {
Y
yu yunfeng 已提交
14 15 16 17
namespace server {

ServerError
PrometheusMetrics::Init() {
G
groot 已提交
18 19
    try {
        ConfigNode &configNode = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC);
Y
yu yunfeng 已提交
20 21
        startup_ = configNode.GetValue(CONFIG_METRIC_IS_STARTUP) == "on";
        if(!startup_) return SERVER_SUCCESS;
G
groot 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35
        // Following should be read from config file.
        const std::string bind_address = configNode.GetChild(CONFIG_PROMETHEUS).GetValue(CONFIG_METRIC_PROMETHEUS_PORT);
        const std::string uri = std::string("/metrics");
        const std::size_t num_threads = 2;

        // Init Exposer
        exposer_ptr_ = std::make_shared<prometheus::Exposer>(bind_address, uri, num_threads);

        // Exposer Registry
        exposer_ptr_->RegisterCollectable(registry_);
    } catch (std::exception& ex) {
        SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what());
        return SERVER_UNEXPECTED_ERROR;
    }
Y
yu yunfeng 已提交
36

Y
yu yunfeng 已提交
37 38
    //

Y
yu yunfeng 已提交
39
    return SERVER_SUCCESS;
Y
yu yunfeng 已提交
40 41 42

}

Y
fix  
yu yunfeng 已提交
43

Y
yu yunfeng 已提交
44 45 46
void
PrometheusMetrics::CPUUsagePercentSet()  {
    if(!startup_) return ;
K
kun yu 已提交
47 48
    int numProcessor = server::SystemInfo::GetInstance().num_processor();

Y
yu yunfeng 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62
    double usage_percent = server::SystemInfo::GetInstance().CPUPercent();
    CPU_usage_percent_.Set(usage_percent);
}

void
PrometheusMetrics::RAMUsagePercentSet() {
    if(!startup_) return ;
    double usage_percent = server::SystemInfo::GetInstance().MemoryPercent();
    RAM_usage_percent_.Set(usage_percent);
}

void
PrometheusMetrics::GPUPercentGaugeSet() {
    if(!startup_) return;
K
kun yu 已提交
63 64 65 66 67
    int numDevice = server::SystemInfo::GetInstance().num_device();
//    std::vector<unsigned int> values = server::SystemInfo::GetInstance().GPUPercent();
    std::vector<unsigned long long > used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed();
    constexpr unsigned long long MtoB = 1024*1024;

K
kun yu 已提交
68

K
kun yu 已提交
69
    for (int i = 0; i < numDevice; i++) {
K
kun yu 已提交
70
        prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}});
K
kun yu 已提交
71 72 73 74 75
//        std::cout << "nvmlDeviceGetUtilizationRates: " << values[i] << std::endl;
//        GPU_percent.Set(static_cast<double>(values[i]));
        double percent = (double)used_memory[i] / (double)MtoB;
        double res = (percent / 6078) * 100;
        GPU_percent.Set(res);
K
kun yu 已提交
76 77
    }

Y
yu yunfeng 已提交
78 79 80 81 82
}

void PrometheusMetrics::GPUMemoryUsageGaugeSet() {
    if(!startup_) return;
    std::vector<unsigned long long> values = server::SystemInfo::GetInstance().GPUMemoryUsed();
Y
fix  
yu yunfeng 已提交
83
    constexpr unsigned long long MtoB = 1024*1024;
K
kun yu 已提交
84
    int numDevice = server::SystemInfo::GetInstance().num_device();
Y
yu yunfeng 已提交
85

K
kun yu 已提交
86 87 88 89 90
    for (int i = 0; i < numDevice; i++) {
        prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}});
        GPU_memory.Set(values[i] / MtoB);
    }

Y
yu yunfeng 已提交
91 92 93 94 95 96 97 98 99
}
void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
    // MB/s
    if(!startup_) return;

    long long MtoB = 1024*1024;
    long long size = num_vector * dim * 4;
    add_vectors_per_second_gauge_.Set(size/time/MtoB);

Y
yu yunfeng 已提交
100
}
Y
yu yunfeng 已提交
101
void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) {
Y
yu yunfeng 已提交
102
    if(!startup_) return;
Y
yu yunfeng 已提交
103 104 105 106 107 108 109
    if(type == "IVF"){
        query_index_IVF_type_per_second_gauge_.Set(value);
    } else if(type == "IDMap"){
        query_index_IDMAP_type_per_second_gauge_.Set(value);
    }

}
Y
yu yunfeng 已提交
110

Y
yu yunfeng 已提交
111 112 113 114
void PrometheusMetrics::ConnectionGaugeIncrement() {
    if(!startup_) return;
    connection_gauge_.Increment();
}
Y
yu yunfeng 已提交
115

Y
yu yunfeng 已提交
116 117 118 119 120
void PrometheusMetrics::ConnectionGaugeDecrement() {
    if(!startup_) return;
    connection_gauge_.Decrement();
}

Y
yu yunfeng 已提交
121 122 123 124
void PrometheusMetrics::OctetsSet() {
    if(!startup_) return;

    // get old stats and reset them
Y
yu yunfeng 已提交
125 126 127
    unsigned long long old_inoctets = SystemInfo::GetInstance().get_inoctets();
    unsigned long long old_outoctets = SystemInfo::GetInstance().get_octets();
    auto old_time = SystemInfo::GetInstance().get_nettime();
Y
yu yunfeng 已提交
128 129 130 131 132 133
    std::pair<unsigned long long, unsigned long long> in_and_out_octets = SystemInfo::GetInstance().Octets();
    SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first);
    SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second);
    SystemInfo::GetInstance().set_nettime();

    //
Y
yu yunfeng 已提交
134
    constexpr double micro_to_second = 1e-6;
Y
yu yunfeng 已提交
135 136 137 138 139 140 141 142
    auto now_time = std::chrono::system_clock::now();
    auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time);
    auto total_second = total_microsecond*micro_to_second;
    if(total_second == 0) return;
    inoctets_gauge_.Set((in_and_out_octets.first-old_inoctets)/total_second);
    outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second);
}

K
kun yu 已提交
143 144 145 146 147 148 149 150 151 152 153
void PrometheusMetrics::CPUCoreUsagePercentSet() {
    if (!startup_)
        return;

    std::vector<double> cpu_core_percent = server::SystemInfo::GetInstance().CPUCorePercent();

    for (int i = 0; i < cpu_core_percent.size(); i++) {
        prometheus::Gauge &core_percent = CPU_.Add({{"CPU", std::to_string(i)}});
        core_percent.Set(cpu_core_percent[i]);
    }
}
Y
fix  
yu yunfeng 已提交
154

Y
yu yunfeng 已提交
155 156 157 158

}
}
}