diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index bcfb5fa7cee272f9313bda2088e292078fffe9f7..bc8fc3bb3d4077e786fd0d9cee7581ec7aa97b6e 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -27,7 +27,6 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cache_insert_data: false # whether to load inserted data into cache, must be a boolean @@ -38,7 +37,7 @@ engine_config: gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: - search_resources: # define the device used for search computation + search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - index_build_device: # CPU used for building index + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - cpu diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index a347a9e5bd4691278145fb2f6ca329befe26073a..0910e22af350cfe6a15b4f30004f586e98dc3349 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -42,5 +42,5 @@ resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - gpu0 - index_build_device: # CPU / GPU used for building index, must be in format: cpu or gpux + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - gpu0 diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index f52ac6d4d2e5068e43836a54c59d16fd77bae7a9..7b2391369a1feda6d0c423532530e8f8cc9b05d8 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -355,6 +355,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { Status ExecutionEngineImpl::CopyToIndexFileToGpu(uint64_t device_id) { + gpu_num_ = device_id; auto to_index_data = std::make_shared(PhysicalSize()); cache::DataObjPtr obj = std::static_pointer_cast(to_index_data); milvus::cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(location_, obj); @@ -577,12 +578,7 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { // TODO(linxj): remove. Status ExecutionEngineImpl::Init() { - server::Config& config = server::Config::GetInstance(); - std::vector gpu_ids; - Status s = config.GetResourceConfigIndexBuildDevice(gpu_ids); - if (!s.ok()) { - return s; - } + auto gpu_ids = scheduler::get_build_resources(); for (auto id : gpu_ids) { if (gpu_num_ == id) { return Status::OK(); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index e064b6f08ce706da76603f5b5c510c2c93d5a384..4bb4092b75c123855e3449641cc668f6d823e91f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -47,6 +47,7 @@ class IVF : public VectorIndex, public FaissBaseIndex { void set_index_model(IndexModelPtr model) override; + void Add(const DatasetPtr& dataset, const Config& config) override; diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 407c3a44d18f05edccf5d4775dac4d7da7a1c2c7..3052e3f4c16753a21d3c9f39179836f3a99b8972 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -54,8 +54,7 @@ load_simple_config() { // get resources auto gpu_ids = get_gpu_pool(); - std::vector build_gpu_ids; - config.GetResourceConfigIndexBuildDevice(build_gpu_ids); + auto build_gpu_ids = get_build_resources(); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 4fd22f1fab85cb199b90ba92478256594d5525de..0e0dc0d9a171acea0c9fea0bc8bbf5b14359723c 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -28,6 +28,7 @@ #include "optimizer/OnlyGPUPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h" +#include "Utils.h" #include #include @@ -108,8 +109,8 @@ class OptimizerInst { } } - std::vector build_resources; - config.GetResourceConfigIndexBuildDevice(build_resources); + auto build_resources = get_build_resources(); + std::vector pass_list; pass_list.push_back(std::make_shared()); diff --git a/core/src/scheduler/Utils.cpp b/core/src/scheduler/Utils.cpp index 2fd573e47a3af61d7ae2905689b5999e8cf86393..2b1f09cdb83ad91e3de8322e5ed51b86ef69a04f 100644 --- a/core/src/scheduler/Utils.cpp +++ b/core/src/scheduler/Utils.cpp @@ -83,5 +83,43 @@ get_gpu_pool() { return gpu_pool; } +std::vector +get_build_resources() { + std::vector gpu_pool; + + server::Config& config = server::Config::GetInstance(); + std::vector pool; + Status s = config.GetResourceConfigIndexBuildResources(pool); + if (!s.ok()) { + SERVER_LOG_ERROR << s.message(); + } + + std::set gpu_ids; + + for (auto& resource : pool) { + if (resource == "cpu") { + gpu_pool.push_back(server::CPU_DEVICE_ID); + continue; + } else { + if (resource.length() < 4 || resource.substr(0, 3) != "gpu") { + // error + exit(-1); + } + auto gpu_id = std::stoi(resource.substr(3)); + if (gpu_id >= scheduler::get_num_gpu()) { + // error + exit(-1); + } + gpu_ids.insert(gpu_id); + } + } + + for (auto& gpu_id : gpu_ids) { + gpu_pool.push_back(gpu_id); + } + + return gpu_pool; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/Utils.h b/core/src/scheduler/Utils.h index 24876eeb965f8d9a05e7d9c5dc122206e048c928..32fda23f7f57c58aef17eaaa521de5fc16b1737d 100644 --- a/core/src/scheduler/Utils.h +++ b/core/src/scheduler/Utils.h @@ -30,5 +30,8 @@ get_num_gpu(); std::vector get_gpu_pool(); +std::vector +get_build_resources(); + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/BuildIndexPass.cpp b/core/src/scheduler/optimizer/BuildIndexPass.cpp index db04c4ac3cdcf4873982b41acfb7d54481d19818..d5a91ff4146e1c835c76ef866cd2e5377b2a2165 100644 --- a/core/src/scheduler/optimizer/BuildIndexPass.cpp +++ b/core/src/scheduler/optimizer/BuildIndexPass.cpp @@ -35,7 +35,7 @@ BuildIndexPass::Run(const TaskPtr& task) { return false; ResourcePtr res_ptr; - if (build_gpu_ids_[0] == server::CPU_DEVICE_ID && build_gpu_ids_.size() == 1) { + if (build_gpu_ids_[0] == server::CPU_DEVICE_ID) { res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 23171cb35ad96bec83d9d3f7827bd8fef0352997..7aca1b08ed9ca1a1cf29d679d3d22afe0c68ad14 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -215,8 +215,8 @@ Config::ValidateConfig() { return s; } - std::vector index_build_devices; - s = GetResourceConfigIndexBuildDevice(index_build_devices); + std::vector index_build_resources; + s = GetResourceConfigIndexBuildResources(index_build_resources); if (!s.ok()) { return s; } @@ -351,7 +351,7 @@ Config::ResetDefaultConfig() { return s; } - s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + s = SetResourceConfigIndexBuildResources(CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); if (!s.ok()) { return s; } @@ -599,14 +599,18 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - std::vector device_ids; - Status s = GetResourceConfigIndexBuildDevice(device_ids); + std::vector resources; + Status s = GetResourceConfigIndexBuildResources(resources); if (!s.ok()) { return s; } size_t gpu_memory; - for (auto& device_id : device_ids) { + for (auto& resource : resources) { + if (resource == "cpu") { + continue; + } + int32_t device_id = std::stoi(resource.substr(3)); if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); return Status(SERVER_UNEXPECTED_ERROR, msg); @@ -747,13 +751,19 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::vector& value) { +Config::CheckResourceConfigIndexBuildResources(const std::vector& value) { if (value.empty()) { std::string msg = - "Invalid index build resource. " - "Possible reason: resource_config.index_build_device is empty."; + "Invalid build index resource. " + "Possible reason: resource_config.build_index_resources is empty."; return Status(SERVER_INVALID_ARGUMENT, msg); } + for (auto& resource : value) { + auto status = CheckResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } + } for (auto& resource : value) { auto status = CheckResource(resource); @@ -1048,34 +1058,18 @@ Status Config::GetResourceConfigSearchResources(std::vector& value) { std::string str = GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, - CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value); + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); return CheckResourceConfigSearchResources(value); } Status -Config::GetResourceConfigIndexBuildDevice(std::vector& value) { +Config::GetResourceConfigIndexBuildResources(std::vector& value) { std::string str = - GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, - CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - std::vector resources; - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, resources); - - Status s = CheckResourceConfigIndexBuildDevice(resources); - if (!s.ok()) { - return s; - } - - for (auto res : resources) { - if (res == "cpu") { - value.emplace_back(CPU_DEVICE_ID); - break; - } - int64_t device_id = std::stoi(str.substr(3)); - value.emplace_back(device_id); - } - - return Status::OK(); + GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); + return CheckResourceConfigIndexBuildResources(value); } /////////////////////////////////////////////////////////////////////////////// @@ -1330,7 +1324,7 @@ Config::SetResourceConfigMode(const std::string& value) { Status Config::SetResourceConfigSearchResources(const std::string& value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); Status s = CheckResourceConfigSearchResources(res_vec); if (!s.ok()) { @@ -1342,16 +1336,16 @@ Config::SetResourceConfigSearchResources(const std::string& value) { } Status -Config::SetResourceConfigIndexBuildDevice(const std::string& value) { +Config::SetResourceConfigIndexBuildResources(const std::string &value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); - Status s = CheckResourceConfigIndexBuildDevice(res_vec); + Status s = CheckResourceConfigIndexBuildResources(res_vec); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value); + SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 08b3accdd6bdef5969fe6f306c2b7e4f64418511..0378a079fbd3bc90827eab0051a8cfd2de918a15 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -91,21 +91,18 @@ static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; static const char* CONFIG_RESOURCE = "resource_config"; static const char* CONFIG_RESOURCE_MODE = "mode"; static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple"; +static const char* CONFIG_RESOURCE_RESOURCES_DELIMITER = ","; static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ","; - #ifdef MILVUS_CPU_VERSION static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu"; #else static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; #endif - -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; -static const char* CONFIG_RESOURCE_INDEX_BUILD_DELIMITER = ","; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES = "index_build_resources"; #ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "cpu"; #else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu,gpu0"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "gpu0"; #endif const int32_t CPU_DEVICE_ID = -1; @@ -191,7 +188,7 @@ class Config { Status CheckResourceConfigSearchResources(const std::vector& value); Status - CheckResourceConfigIndexBuildDevice(const std::vector& value); + CheckResourceConfigIndexBuildResources(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -260,7 +257,7 @@ class Config { Status GetResourceConfigSearchResources(std::vector& value); Status - GetResourceConfigIndexBuildDevice(std::vector& value); + GetResourceConfigIndexBuildResources(std::vector& value); public: /* server config */ @@ -321,7 +318,7 @@ class Config { Status SetResourceConfigSearchResources(const std::string& value); Status - SetResourceConfigIndexBuildDevice(const std::string& value); + SetResourceConfigIndexBuildResources(const std::string& value); private: std::unordered_map> config_map_; diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index ec696ff3e0a1c27c736581ed3fad49137fc29b47..080de77e17d4aac6592c5fdfc0883a50abe5c9b0 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector& partition_ } Status -ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { +ValidationUtil::ValidateGpuIndex(int32_t gpu_index) { #ifdef MILVUS_GPU_VERSION int num_devices = 0; auto cuda_err = cudaGetDeviceCount(&num_devices); @@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { } Status -ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) { +ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) { #ifdef MILVUS_GPU_VERSION cudaDeviceProp deviceProp; diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 01801e295a0714e5815393e9e0cc90d4d149ab84..201ccef3bda5c4920d45bd804909d8147f3a47d5 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -59,10 +59,10 @@ class ValidationUtil { ValidatePartitionTags(const std::vector& partition_tags); static Status - ValidateGpuIndex(uint32_t gpu_index); + ValidateGpuIndex(int32_t gpu_index); static Status - GetGpuMemory(uint32_t gpu_index, size_t& memory); + GetGpuMemory(int32_t gpu_index, size_t& memory); static Status ValidateIpAddress(const std::string& ip_address); diff --git a/core/src/wrapper/KnowhereResource.cpp b/core/src/wrapper/KnowhereResource.cpp index ccfbcbb6cf8a08ce85484c3debf65707e114d1c9..0891d5b32d4b9a33d013f2a186e745dd38f2bc95 100644 --- a/core/src/wrapper/KnowhereResource.cpp +++ b/core/src/wrapper/KnowhereResource.cpp @@ -19,6 +19,8 @@ #ifdef MILVUS_GPU_VERSION #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #endif + +#include "scheduler/Utils.h" #include "server/Config.h" #include @@ -48,8 +50,7 @@ KnowhereResource::Initialize() { // get build index gpu resource server::Config& config = server::Config::GetInstance(); - std::vector build_index_gpus; - s = config.GetResourceConfigIndexBuildDevice(build_index_gpus); + auto build_index_gpus = scheduler::get_build_resources(); if (!s.ok()) return s; diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 0c65e9322cc03513ee9d7632d9c73011b4972d58..37be36b7eb48fd8ebd59774587188b20bca80a5a 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -272,30 +272,34 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { #else std::vector search_resources = {"cpu", "gpu0"}; #endif - std::vector res_vec; - std::string res_str; + std::vector search_res_vec; + std::string search_res_str; milvus::server::StringHelpFunctions::MergeStringWithDelimeter( - search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str); - s = config.SetResourceConfigSearchResources(res_str); + search_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, search_res_str); + s = config.SetResourceConfigSearchResources(search_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigSearchResources(res_vec); + s = config.GetResourceConfigSearchResources(search_res_vec); ASSERT_TRUE(s.ok()); for (size_t i = 0; i < search_resources.size(); i++) { - ASSERT_TRUE(search_resources[i] == res_vec[i]); + ASSERT_TRUE(search_resources[i] == search_res_vec[i]); } #ifdef MILVUS_CPU_VERSION - int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID; - s = config.SetResourceConfigIndexBuildDevice("cpu"); + std::vector index_build_resources = {"cpu"}; #else - int32_t resource_index_build_device = 0; - s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); + std::vector index_build_resources = {"gpu0", "gpu1"}; #endif + std::vector index_build_res_vec; + std::string index_build_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + index_build_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, index_build_res_str); + s = config.SetResourceConfigIndexBuildResources(index_build_res_str); ASSERT_TRUE(s.ok()); - std::vector device_ids; - s = config.GetResourceConfigIndexBuildDevice(device_ids); + s = config.GetResourceConfigIndexBuildResources(index_build_res_vec); ASSERT_TRUE(s.ok()); - ASSERT_TRUE(device_ids[0] == resource_index_build_device); + for (size_t i = 0; i < index_build_resources.size(); i++) { + ASSERT_TRUE(index_build_resources[i] == index_build_res_vec[i]); + } } TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { @@ -419,9 +423,9 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetResourceConfigSearchResources("cpu"); ASSERT_TRUE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gup2"); + s = config.SetResourceConfigIndexBuildResources("gup2"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gpu16"); + s = config.SetResourceConfigIndexBuildResources("gpu16"); ASSERT_FALSE(s.ok()); }