提交 c4058c38 编写于 作者: J jinhai

MS-515 & MS-516 Fix index building gpu config issue


Former-commit-id: 14d56eef1f177ebcb24eb6b3186cdc2878dd80b4
上级 7b2a668b
......@@ -61,9 +61,9 @@ ServerConfig::LoadConfigFile(const std::string& config_filename) {
ErrorCode ServerConfig::ValidateConfig() const {
//server config validation
ConfigNode server_config = GetConfig(CONFIG_SERVER);
uint32_t gpu_index = (uint32_t)server_config.GetInt32Value(CONFIG_GPU_INDEX, 0);
if(ValidationUtil::ValidateGpuIndex(gpu_index) != SERVER_SUCCESS) {
std::cout << "Error: invalid gpu_index " << std::to_string(gpu_index) << std::endl;
uint32_t build_index_gpu_index = (uint32_t)server_config.GetInt32Value(CONFIG_GPU_INDEX, 0);
if(ValidationUtil::ValidateGpuIndex(build_index_gpu_index) != SERVER_SUCCESS) {
std::cerr << "Error: invalid gpu_index " << std::to_string(build_index_gpu_index) << std::endl;
return SERVER_INVALID_ARGUMENT;
}
......@@ -75,7 +75,7 @@ ErrorCode ServerConfig::ValidateConfig() const {
uint64_t insert_buffer_size = (uint64_t)db_config.GetInt32Value(CONFIG_DB_INSERT_BUFFER_SIZE, 4);
insert_buffer_size *= GB;
if(insert_buffer_size >= total_mem) {
std::cout << "Error: insert_buffer_size execeed system memory" << std::endl;
std::cerr << "Error: insert_buffer_size execeed system memory" << std::endl;
return SERVER_INVALID_ARGUMENT;
}
......@@ -84,20 +84,50 @@ ErrorCode ServerConfig::ValidateConfig() const {
uint64_t cache_cap = (uint64_t)cache_config.GetInt64Value(CONFIG_CPU_CACHE_CAPACITY, 16);
cache_cap *= GB;
if(cache_cap >= total_mem) {
std::cout << "Error: cpu_cache_capacity execeed system memory" << std::endl;
std::cerr << "Error: cpu_cache_capacity execeed system memory" << std::endl;
return SERVER_INVALID_ARGUMENT;
} if(cache_cap > (double)total_mem*0.9) {
std::cout << "Warnning: cpu_cache_capacity value is too aggressive" << std::endl;
std::cerr << "Warning: cpu_cache_capacity value is too aggressive" << std::endl;
}
if(insert_buffer_size + cache_cap >= total_mem) {
std::cout << "Error: sum of cpu_cache_capacity and insert_buffer_size execeed system memory" << std::endl;
std::cerr << "Error: sum of cpu_cache_capacity and insert_buffer_size execeed system memory" << std::endl;
return SERVER_INVALID_ARGUMENT;
}
double free_percent = cache_config.GetDoubleValue(server::CACHE_FREE_PERCENT, 0.85);
if(free_percent < std::numeric_limits<double>::epsilon() || free_percent > 1.0) {
std::cout << "Error: invalid cache_free_percent " << std::to_string(free_percent) << std::endl;
std::cerr << "Error: invalid cache_free_percent " << std::to_string(free_percent) << std::endl;
return SERVER_INVALID_ARGUMENT;
}
// Resource config validation
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
if (config.GetChildren().empty()) {
std::cerr << "Error: no context under resource" << std::endl;
return SERVER_INVALID_ARGUMENT;
}
auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren();
if (resources.empty()) {
std::cerr << "Children of resource_config null exception" << std::endl;
return SERVER_INVALID_ARGUMENT;
}
bool resource_valid_flag = false;
for (auto &resource : resources) {
auto &resconf = resource.second;
auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE);
if(type == "GPU") {
auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID, 0);
if(device_id == build_index_gpu_index) {
resource_valid_flag = true;
}
}
}
if(!resource_valid_flag) {
return SERVER_INVALID_ARGUMENT;
}
......
......@@ -78,6 +78,7 @@ constexpr ErrorCode DB_INVALID_PATH = ToDbErrorCode(5);
constexpr ErrorCode KNOWHERE_ERROR = ToKnowhereErrorCode(1);
constexpr ErrorCode KNOWHERE_INVALID_ARGUMENT = ToKnowhereErrorCode(2);
constexpr ErrorCode KNOWHERE_UNEXPECTED_ERROR = ToKnowhereErrorCode(3);
constexpr ErrorCode KNOWHERE_NO_SPACE = ToKnowhereErrorCode(4);
namespace server {
class ServerException : public std::exception {
......
......@@ -2,6 +2,7 @@
#include "db/meta/MetaTypes.h"
#include "Error.h"
#include "
namespace zilliz {
namespace milvus {
......@@ -38,6 +39,9 @@ public:
static ErrorCode
GetGpuMemory(uint32_t gpu_index, size_t &memory);
static ErrorCode
ValidateConfig();
};
}
......
......@@ -140,7 +140,7 @@ VecIndexPtr read_index(const std::string &location) {
FileIOReader reader(location);
reader.fs.seekg(0, reader.fs.end);
int64_t length = reader.fs.tellg();
if(length <= 0) {
if (length <= 0) {
return nullptr;
}
......@@ -201,7 +201,13 @@ ErrorCode write_index(VecIndexPtr index, const std::string &location) {
return KNOWHERE_UNEXPECTED_ERROR;
} catch (std::exception &e) {
WRAPPER_LOG_ERROR << e.what();
return KNOWHERE_ERROR;
std::string estring(e.what());
if (estring.find("No space left on device") != estring.npos) {
WRAPPER_LOG_ERROR << "No space left on the device";
return KNOWHERE_NO_SPACE;
} else {
return KNOWHERE_ERROR;
}
}
return KNOWHERE_SUCCESS;
}
......@@ -213,7 +219,7 @@ void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Co
if (size <= TYPICAL_COUNT / 16384 + 1) {
//handle less row count, avoid nlist set to 0
cfg["nlist"] = 1;
} else if (int(size / TYPICAL_COUNT) * nlist == 0) {
} else if (int(size / TYPICAL_COUNT) *nlist == 0) {
//calculate a proper nlist if nlist not specified or size less than TYPICAL_COUNT
cfg["nlist"] = int(size / TYPICAL_COUNT * 16384);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册