提交 07de6000 编写于 作者: K kun yu

fix GPU Percent bug


Former-commit-id: 0afa5d7a6a50c6a545c6fde296702580885d27b5
上级 af86fb92
......@@ -61,18 +61,14 @@ void
PrometheusMetrics::GPUPercentGaugeSet() {
if(!startup_) return;
int numDevice = server::SystemInfo::GetInstance().num_device();
// std::vector<unsigned int> values = server::SystemInfo::GetInstance().GPUPercent();
std::vector<unsigned long long > used_total = server::SystemInfo::GetInstance().GPUMemoryTotal();
std::vector<unsigned long long > used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed();
constexpr unsigned long long MtoB = 1024*1024;
for (int i = 0; i < numDevice; i++) {
prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}});
// std::cout << "nvmlDeviceGetUtilizationRates: " << values[i] << std::endl;
// GPU_percent.Set(static_cast<double>(values[i]));
double percent = (double)used_memory[i] / (double)MtoB;
double res = (percent / 6078) * 100;
GPU_percent.Set(res);
double percent = (double)used_memory[i] / (double)used_total[i];
GPU_percent.Set(percent * 100);
}
}
......
......@@ -192,17 +192,17 @@ SystemInfo::CPUPercent() {
}
std::vector<unsigned int>
SystemInfo::GPUPercent() {
std::vector<unsigned long long>
SystemInfo::GPUMemoryTotal() {
// get GPU usage percent
if(!initialized_) Init();
std::vector<unsigned int> result;
nvmlUtilization_t utilization;
std::vector<unsigned long long > result;
nvmlMemory_t nvmlMemory;
for (int i = 0; i < num_device_; ++i) {
nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(i, &device);
nvmlDeviceGetUtilizationRates(device, &utilization);
result.push_back(utilization.gpu);
nvmlDeviceGetMemoryInfo(device, &nvmlMemory);
result.push_back(nvmlMemory.total);
}
return result;
}
......
......@@ -60,7 +60,7 @@ class SystemInfo {
double MemoryPercent();
double CPUPercent();
std::pair<unsigned long long , unsigned long long > Octets();
std::vector<unsigned int> GPUPercent();
std::vector<unsigned long long> GPUMemoryTotal();
std::vector<unsigned long long> GPUMemoryUsed();
std::vector<double> CPUCorePercent();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册