diff --git a/CHANGELOG.md b/CHANGELOG.md index 0925fa1a68c6506212cbbc6aaade09a04495ffdc..c4643c0adc0bbf006c9f734c5015e96e47dede52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#90 - The server start error messages could be improved to enhance user experience - \#104 - test_scheduler core dump ## Improvement diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 7de84cbccc6944b4845c5fceaee751fc27e9d923..b20d6c2436cfa59c4b2bd46ab5585aabd3526f76 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,7 +363,9 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config address: " + value); + std::string msg = "Invalid server IP address: " + value + + ". Possible reason: server_config.address is invalid in server_config.yaml."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -371,11 +373,15 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config port: " + value); + std::string msg = "Port " + value + " is not a number. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - return Status(SERVER_INVALID_ARGUMENT, "Server config port out of range (1024, 65535): " + value); + std::string msg = "Port " + value + " is not in range [1025, 65534]. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -385,7 +391,8 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Invalid server config mode [single, cluster_readonly, cluster_writable]: " + value); + "Error: server_config.deploy_mode in server_config.yaml is not one of " + "single, cluster_readonly, and cluster_writable."); } return Status::OK(); } @@ -411,7 +418,8 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "DB config primary_path empty"); + return Status(SERVER_INVALID_ARGUMENT, + "db_path is empty. Possible reason: db_config.db_path in server_config.yaml is empty."); } return Status::OK(); } @@ -424,7 +432,11 @@ Config::CheckDBConfigSecondaryPath(const std::string& value) { Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config backend_url: " + value); + std::string msg = + "Invalid db_backend_url: " + value + + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; + return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } return Status::OK(); } @@ -432,7 +444,9 @@ Config::CheckDBConfigBackendUrl(const std::string& value) { Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_disk_threshold: " + value); + std::string msg = "Invalid archive disk threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -440,7 +454,9 @@ Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_days_threshold: " + value); + std::string msg = "Invalid archive days threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -448,13 +464,17 @@ Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config insert_buffer_size: " + value); + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "DB config insert_buffer_size exceed system memory: " + value); + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: insert buffer size exceed system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -463,7 +483,9 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config auto_bootup: " + value); + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.enable_monitor is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -471,7 +493,9 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config collector: " + value); + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.collector is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -479,6 +503,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.prometheus_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -487,15 +513,19 @@ Config::CheckMetricConfigPrometheusPort(const std::string& value) { Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_capacity: " + value); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t cpu_cache_capacity = std::stoi(value) * GB; uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (cpu_cache_capacity >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "Cache config cpu_cache_capacity exceed system memory: " + value); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: Cache config cpu_cache_capacity exceed system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else if (cpu_cache_capacity > static_cast(total_mem * 0.9)) { - std::cerr << "Warning: cpu_cache_capacity value is too big" << std::endl; + std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; } int32_t buffer_value; @@ -506,7 +536,10 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "Sum of cpu_cache_capacity and buffer_size exceed system memory"); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: sum of cache_config.cpu_cache_capacity and " + "db_config.insert_buffer_size exceeds system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -515,11 +548,15 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value); + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value); + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -528,7 +565,9 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_capacity: " + value); + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; int gpu_index; @@ -539,13 +578,14 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { - return Status(SERVER_UNEXPECTED_ERROR, - "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index)); + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); + return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { - return Status(SERVER_INVALID_ARGUMENT, - "Cache config gpu_cache_capacity exceed GPU memory: " + std::to_string(gpu_memory)); + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity exceed GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu_cache_capacity value is too big" << std::endl; + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; } } return Status::OK(); @@ -554,11 +594,15 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value); + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value); + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -567,7 +611,9 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cache_insert_data: " + value); + std::string msg = "Invalid cache insert option: " + value + + "Possible reason: cache_config.cache_insert_data is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -575,7 +621,9 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config use_blas_threshold: " + value); + std::string msg = "Invalid blas threshold: " + value + + "Possible reason: engine_config.use_blas_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -583,14 +631,18 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value); + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t omp_thread = std::stoi(value); uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value); + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -598,7 +650,9 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config mode: " + value); + std::string msg = "Invalid resource mode: " + value + + "Possible reason: resource_config.mode is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -608,12 +662,16 @@ CheckGpuDevice(const std::string& value) { const std::regex pat("gpu(\\d+)"); std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value); + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value); + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -621,11 +679,15 @@ CheckGpuDevice(const std::string& value) { Status Config::CheckResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "Empty resource config search_resources"); + std::string msg = "Invalid search resource. " + "Possible reason: resource_config.search_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { + std::string msg = "Invalid search resource: " + gpu_device + + "Possible reason: resource_config.search_resources is invalid."; return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config search_resources: " + gpu_device); } } @@ -635,7 +697,9 @@ Config::CheckResourceConfigSearchResources(const std::vector& value Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config index_build_device: " + value); + std::string msg = "Invalid index build device: " + value + + "Possible reason: resource_config.index_build_device is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index a5b892ad47350a7b45d1c337500ca1fac84e4da3..2217b29e1c8fe54d36510b55504e15cbfbdc2d9d 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -54,7 +54,8 @@ DBWrapper::StartService() { std::string db_slave_path; s = config.GetDBConfigSecondaryPath(db_slave_path); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } StringHelpFunctions::SplitStringByDelimeter(db_slave_path, ";", opt.meta_.slave_paths_); @@ -62,13 +63,15 @@ DBWrapper::StartService() { // cache config s = config.GetCacheConfigCacheInsertData(opt.insert_cache_immediately_); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } std::string mode; s = config.GetServerConfigDeployMode(mode); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (mode == "single") { @@ -78,7 +81,8 @@ DBWrapper::StartService() { } else if (mode == "cluster_writable") { opt.mode_ = engine::DBOptions::MODE::CLUSTER_WRITABLE; } else { - std::cerr << "ERROR: mode specified in server_config must be ['single', 'cluster_readonly', 'cluster_writable']" + std::cerr << "Error: server_config.deploy_mode in server_config.yaml is not one of " + << "single, cluster_readonly, and cluster_writable." << std::endl; kill(0, SIGUSR1); } @@ -87,7 +91,8 @@ DBWrapper::StartService() { int32_t omp_thread; s = config.GetEngineConfigOmpThreadNum(omp_thread); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (omp_thread > 0) { @@ -105,7 +110,8 @@ DBWrapper::StartService() { int32_t use_blas_threshold; s = config.GetEngineConfigUseBlasThreshold(use_blas_threshold); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } faiss::distance_compute_blas_threshold = use_blas_threshold; @@ -115,7 +121,8 @@ DBWrapper::StartService() { int32_t disk, days; s = config.GetDBConfigArchiveDiskThreshold(disk); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (disk > 0) { @@ -124,7 +131,8 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDaysThreshold(days); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (days > 0) { @@ -133,16 +141,20 @@ DBWrapper::StartService() { opt.meta_.archive_conf_.SetCriterias(criterial); // create db root folder - Status status = CommonUtil::CreateDirectory(opt.meta_.path_); - if (!status.ok()) { - std::cerr << "ERROR! Failed to create database root path: " << opt.meta_.path_ << std::endl; + s = CommonUtil::CreateDirectory(opt.meta_.path_); + if (!s.ok()) { + std::cerr << "Error: Failed to create database primary path: " << path + << ". Possible reason: db_config.primary_path is wrong in server_config.yaml or not available." + << std::endl; kill(0, SIGUSR1); } for (auto& path : opt.meta_.slave_paths_) { - status = CommonUtil::CreateDirectory(path); - if (!status.ok()) { - std::cerr << "ERROR! Failed to create database slave path: " << path << std::endl; + s = CommonUtil::CreateDirectory(path); + if (!s.ok()) { + std::cerr << "Error: Failed to create database secondary path: " << path + << ". Possible reason: db_config.secondary_path is wrong in server_config.yaml or not available." + << std::endl; kill(0, SIGUSR1); } } @@ -151,7 +163,9 @@ DBWrapper::StartService() { try { db_ = engine::DBFactory::Build(opt); } catch (std::exception& ex) { - std::cerr << "ERROR! Failed to open database: " << ex.what() << std::endl; + std::cerr << "Error: failed to open database: " << ex.what() + << ". Possible reason: the meta system does not work." + << std::endl; kill(0, SIGUSR1); } @@ -161,7 +175,8 @@ DBWrapper::StartService() { std::string preload_tables; s = config.GetDBConfigPreloadTable(preload_tables); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } s = PreloadTables(preload_tables); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 8903ce14ea264ae253a0e45b81f065651732a85d..7cc2f287457a23887a3329d0bce955dd55363efe 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -68,7 +68,7 @@ static const char " blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index"; diff --git a/core/unittest/wrapper/utils.cpp b/core/unittest/wrapper/utils.cpp index 6204ac0c05677832f039caa855c56d006c1c2675..b397a35d7c00d58938a02990603a2101ed5e82b4 100644 --- a/core/unittest/wrapper/utils.cpp +++ b/core/unittest/wrapper/utils.cpp @@ -58,7 +58,7 @@ static const char " blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index";