From fddfd1eb03c067234e6127730da14677c03c7554 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 18 Nov 2019 18:47:00 +0800 Subject: [PATCH] #346 update config APIs to support build index with multiple GPUs --- core/conf/server_cpu_config.template | 6 +- core/conf/server_gpu_config.template | 3 +- core/src/server/Config.cpp | 84 +++++++++++++++------------- core/src/server/Config.h | 16 +++--- core/src/utils/ValidationUtil.cpp | 4 +- core/src/utils/ValidationUtil.h | 4 +- core/unittest/server/test_config.cpp | 33 ++++++----- 7 files changed, 81 insertions(+), 69 deletions(-) diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c9512639..bc8fc3bb3 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -27,7 +27,6 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cache_insert_data: false # whether to load inserted data into cache, must be a boolean @@ -38,6 +37,7 @@ engine_config: gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: - search_resources: # define the device used for search computation + search_resources: # define the devices used for search computation, must be in format: cpu or gpux + - cpu + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - cpu - index_build_device: cpu # CPU used for building index diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d13..c54ed408d 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -42,4 +42,5 @@ resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux + - gpu0 \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a8..5672ab52a 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -215,8 +215,8 @@ Config::ValidateConfig() { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + std::vector index_build_resources; + s = GetResourceConfigIndexBuildResources(index_build_resources); if (!s.ok()) { return s; } @@ -351,7 +351,7 @@ Config::ResetDefaultConfig() { return s; } - s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + s = SetResourceConfigIndexBuildResources(CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); if (!s.ok()) { return s; } @@ -599,22 +599,28 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); + std::vector resources; + Status s = GetResourceConfigIndexBuildResources(resources); if (!s.ok()) { return s; } size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + for (auto& resource : resources) { + if (resource == "cpu") { + continue; + } + int32_t device_id = std::stoi(resource.substr(3)); + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double) gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } } } return Status::OK(); @@ -745,10 +751,18 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckResourceConfigIndexBuildResources(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid build index resource. " + "Possible reason: resource_config.build_index_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + for (auto& resource : value) { + auto status = CheckResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } } return Status::OK(); } @@ -1030,27 +1044,18 @@ Status Config::GetResourceConfigSearchResources(std::vector& value) { std::string str = GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, - CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value); + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); return CheckResourceConfigSearchResources(value); } Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { +Config::GetResourceConfigIndexBuildResources(std::vector& value) { std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); - if (!s.ok()) { - return s; - } - - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); - } - - return Status::OK(); + GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); + return CheckResourceConfigIndexBuildResources(value); } /////////////////////////////////////////////////////////////////////////////// @@ -1305,7 +1310,7 @@ Config::SetResourceConfigMode(const std::string& value) { Status Config::SetResourceConfigSearchResources(const std::string& value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); Status s = CheckResourceConfigSearchResources(res_vec); if (!s.ok()) { @@ -1317,13 +1322,16 @@ Config::SetResourceConfigSearchResources(const std::string& value) { } Status -Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); +Config::SetResourceConfigIndexBuildResources(const std::string &value) { + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); + + Status s = CheckResourceConfigIndexBuildResources(res_vec); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value); + SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd805..0378a079f 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -91,20 +91,18 @@ static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; static const char* CONFIG_RESOURCE = "resource_config"; static const char* CONFIG_RESOURCE_MODE = "mode"; static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple"; +static const char* CONFIG_RESOURCE_RESOURCES_DELIMITER = ","; static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ","; - #ifdef MILVUS_CPU_VERSION static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu"; #else static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; #endif - -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES = "index_build_resources"; #ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "cpu"; #else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "gpu0"; #endif const int32_t CPU_DEVICE_ID = -1; @@ -190,7 +188,7 @@ class Config { Status CheckResourceConfigSearchResources(const std::vector& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckResourceConfigIndexBuildResources(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -259,7 +257,7 @@ class Config { Status GetResourceConfigSearchResources(std::vector& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetResourceConfigIndexBuildResources(std::vector& value); public: /* server config */ @@ -320,7 +318,7 @@ class Config { Status SetResourceConfigSearchResources(const std::string& value); Status - SetResourceConfigIndexBuildDevice(const std::string& value); + SetResourceConfigIndexBuildResources(const std::string& value); private: std::unordered_map> config_map_; diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index ec696ff3e..080de77e1 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector& partition_ } Status -ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { +ValidationUtil::ValidateGpuIndex(int32_t gpu_index) { #ifdef MILVUS_GPU_VERSION int num_devices = 0; auto cuda_err = cudaGetDeviceCount(&num_devices); @@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { } Status -ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) { +ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) { #ifdef MILVUS_GPU_VERSION cudaDeviceProp deviceProp; diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 01801e295..201ccef3b 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -59,10 +59,10 @@ class ValidationUtil { ValidatePartitionTags(const std::vector& partition_tags); static Status - ValidateGpuIndex(uint32_t gpu_index); + ValidateGpuIndex(int32_t gpu_index); static Status - GetGpuMemory(uint32_t gpu_index, size_t& memory); + GetGpuMemory(int32_t gpu_index, size_t& memory); static Status ValidateIpAddress(const std::string& ip_address); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732..37be36b7e 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -272,29 +272,34 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { #else std::vector search_resources = {"cpu", "gpu0"}; #endif - std::vector res_vec; - std::string res_str; + std::vector search_res_vec; + std::string search_res_str; milvus::server::StringHelpFunctions::MergeStringWithDelimeter( - search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str); - s = config.SetResourceConfigSearchResources(res_str); + search_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, search_res_str); + s = config.SetResourceConfigSearchResources(search_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigSearchResources(res_vec); + s = config.GetResourceConfigSearchResources(search_res_vec); ASSERT_TRUE(s.ok()); for (size_t i = 0; i < search_resources.size(); i++) { - ASSERT_TRUE(search_resources[i] == res_vec[i]); + ASSERT_TRUE(search_resources[i] == search_res_vec[i]); } #ifdef MILVUS_CPU_VERSION - int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID; - s = config.SetResourceConfigIndexBuildDevice("cpu"); + std::vector index_build_resources = {"cpu"}; #else - int32_t resource_index_build_device = 0; - s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); + std::vector index_build_resources = {"gpu0", "gpu1"}; #endif + std::vector index_build_res_vec; + std::string index_build_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + index_build_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, index_build_res_str); + s = config.SetResourceConfigIndexBuildResources(index_build_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); + s = config.GetResourceConfigIndexBuildResources(index_build_res_vec); ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int32_val == resource_index_build_device); + for (size_t i = 0; i < index_build_resources.size(); i++) { + ASSERT_TRUE(index_build_resources[i] == index_build_res_vec[i]); + } } TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { @@ -418,9 +423,9 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetResourceConfigSearchResources("cpu"); ASSERT_TRUE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gup2"); + s = config.SetResourceConfigIndexBuildResources("gup2"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gpu16"); + s = config.SetResourceConfigIndexBuildResources("gpu16"); ASSERT_FALSE(s.ok()); } -- GitLab