提交 9a49b9a8 编写于 作者: J jinhai

Merge branch 'fix_thermal_bug' into 'branch-0.5.0'

MS-601 Docker logs error caused by get CPUTemperature error

See merge request megasearch/milvus!666

Former-commit-id: 7d1493ad9cd68686bd284b0c1fbf7a0f40fa1a61
......@@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-577 - Unittest Query randomly hung
- MS-587 - Count get wrong result after adding vectors and index built immediately
- MS-599 - search wrong result when table created with metric_type: IP
- MS-601 - Docker logs error caused by get CPUTemperature error
## Improvement
- MS-552 - Add and change the easylogging library
......
......@@ -46,7 +46,7 @@ PrometheusMetrics::Init() {
return s.code();
}
const std::string uri = std::string("/tmp/metrics");
const std::string uri = std::string("/metrics");
const std::size_t num_threads = 2;
// Init Exposer
......
......@@ -16,6 +16,7 @@
// under the License.
#include "metrics/SystemInfo.h"
#include "utils/Log.h"
#include <nvml.h>
#include <sys/types.h>
......@@ -24,6 +25,9 @@
#include <iostream>
#include <string>
#include <utility>
#include<stdlib.h>
#include<dirent.h>
#include<stdio.h>
namespace milvus {
namespace server {
......@@ -60,12 +64,12 @@ SystemInfo::Init() {
nvmlReturn_t nvmlresult;
nvmlresult = nvmlInit();
if (NVML_SUCCESS != nvmlresult) {
printf("System information initilization failed");
SERVER_LOG_ERROR << "System information initilization failed";
return;
}
nvmlresult = nvmlDeviceGetCount(&num_device_);
if (NVML_SUCCESS != nvmlresult) {
printf("Unable to get devidce number");
SERVER_LOG_ERROR << "Unable to get devidce number";
return;
}
......@@ -151,7 +155,7 @@ SystemInfo::getTotalCpuTime(std::vector<uint64_t>& work_time_array) {
std::vector<uint64_t> total_time_array;
FILE* file = fopen("/proc/stat", "r");
if (file == NULL) {
perror("Could not open stat file");
SERVER_LOG_ERROR << "Could not open stat file";
return total_time_array;
}
......@@ -162,7 +166,7 @@ SystemInfo::getTotalCpuTime(std::vector<uint64_t>& work_time_array) {
char buffer[1024];
char* ret = fgets(buffer, sizeof(buffer) - 1, file);
if (ret == NULL) {
perror("Could not read stat file");
SERVER_LOG_ERROR << "Could not read stat file";
fclose(file);
return total_time_array;
}
......@@ -237,18 +241,39 @@ SystemInfo::GPUTemperature() {
std::vector<float>
SystemInfo::CPUTemperature() {
std::vector<float> result;
for (int i = 0; i <= num_physical_processors_; ++i) {
std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp";
FILE* file = fopen(path.data(), "r");
if (file == nullptr) {
perror("Could not open thermal file");
return result;
std::string path = "/sys/class/hwmon/";
DIR *dir = NULL;
dir = opendir(path.c_str());
if (!dir) {
SERVER_LOG_ERROR << "Could not open hwmon directory";
return result;
}
struct dirent *ptr = NULL;
while ((ptr = readdir(dir)) != NULL) {
std::string filename(path);
filename.append(ptr->d_name);
char buf[100];
if (readlink(filename.c_str(), buf, 100) != -1) {
std::string m(buf);
if (m.find("coretemp") != std::string::npos) {
std::string object = filename;
object += "/temp1_input";
FILE *file = fopen(object.c_str(), "r");
if (file == nullptr) {
SERVER_LOG_ERROR << "Could not open temperature file"
exit(1);
}
float temp;
fscanf(file, "%f", &temp);
result.push_back(temp / 1000);
}
}
float temp;
fscanf(file, "%f", &temp);
result.push_back(temp / 1000);
fclose(file);
}
closedir(dir);
return result;
}
std::vector<uint64_t>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册