提交 caacf40e 编写于 作者: F fishpenguin

fix for config change

......@@ -27,7 +27,6 @@ metric_config:
port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534]
cache_config:
cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer
cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
cache_insert_data: false # whether to load inserted data into cache, must be a boolean
......@@ -38,7 +37,7 @@ engine_config:
gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only
resource_config:
search_resources: # define the device used for search computation
search_resources: # define the devices used for search computation, must be in format: cpu or gpux
- cpu
index_build_device: # CPU used for building index
index_build_resources: # define the devices used for index building, must be in format: cpu or gpux
- cpu
......@@ -42,5 +42,5 @@ resource_config:
search_resources: # define the devices used for search computation, must be in format: cpu or gpux
- cpu
- gpu0
index_build_device: # CPU / GPU used for building index, must be in format: cpu or gpux
index_build_resources: # define the devices used for index building, must be in format: cpu or gpux
- gpu0
......@@ -355,6 +355,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
Status
ExecutionEngineImpl::CopyToIndexFileToGpu(uint64_t device_id) {
gpu_num_ = device_id;
auto to_index_data = std::make_shared<ToIndexData>(PhysicalSize());
cache::DataObjPtr obj = std::static_pointer_cast<cache::DataObj>(to_index_data);
milvus::cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(location_, obj);
......@@ -577,12 +578,7 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) {
// TODO(linxj): remove.
Status
ExecutionEngineImpl::Init() {
server::Config& config = server::Config::GetInstance();
std::vector<int64_t> gpu_ids;
Status s = config.GetResourceConfigIndexBuildDevice(gpu_ids);
if (!s.ok()) {
return s;
}
auto gpu_ids = scheduler::get_build_resources();
for (auto id : gpu_ids) {
if (gpu_num_ == id) {
return Status::OK();
......
......@@ -47,6 +47,7 @@ class IVF : public VectorIndex, public FaissBaseIndex {
void
set_index_model(IndexModelPtr model) override;
void
Add(const DatasetPtr& dataset, const Config& config) override;
......
......@@ -54,8 +54,7 @@ load_simple_config() {
// get resources
auto gpu_ids = get_gpu_pool();
std::vector<int64_t> build_gpu_ids;
config.GetResourceConfigIndexBuildDevice(build_gpu_ids);
auto build_gpu_ids = get_build_resources();
// create and connect
ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false));
......
......@@ -28,6 +28,7 @@
#include "optimizer/OnlyGPUPass.h"
#include "optimizer/Optimizer.h"
#include "server/Config.h"
#include "Utils.h"
#include <memory>
#include <mutex>
......@@ -108,8 +109,8 @@ class OptimizerInst {
}
}
std::vector<int64_t> build_resources;
config.GetResourceConfigIndexBuildDevice(build_resources);
auto build_resources = get_build_resources();
std::vector<PassPtr> pass_list;
pass_list.push_back(std::make_shared<LargeSQ8HPass>());
......
......@@ -83,5 +83,43 @@ get_gpu_pool() {
return gpu_pool;
}
std::vector<int64_t>
get_build_resources() {
std::vector<int64_t> gpu_pool;
server::Config& config = server::Config::GetInstance();
std::vector<std::string> pool;
Status s = config.GetResourceConfigIndexBuildResources(pool);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
std::set<uint64_t> gpu_ids;
for (auto& resource : pool) {
if (resource == "cpu") {
gpu_pool.push_back(server::CPU_DEVICE_ID);
continue;
} else {
if (resource.length() < 4 || resource.substr(0, 3) != "gpu") {
// error
exit(-1);
}
auto gpu_id = std::stoi(resource.substr(3));
if (gpu_id >= scheduler::get_num_gpu()) {
// error
exit(-1);
}
gpu_ids.insert(gpu_id);
}
}
for (auto& gpu_id : gpu_ids) {
gpu_pool.push_back(gpu_id);
}
return gpu_pool;
}
} // namespace scheduler
} // namespace milvus
......@@ -30,5 +30,8 @@ get_num_gpu();
std::vector<uint64_t>
get_gpu_pool();
std::vector<int64_t>
get_build_resources();
} // namespace scheduler
} // namespace milvus
......@@ -35,7 +35,7 @@ BuildIndexPass::Run(const TaskPtr& task) {
return false;
ResourcePtr res_ptr;
if (build_gpu_ids_[0] == server::CPU_DEVICE_ID && build_gpu_ids_.size() == 1) {
if (build_gpu_ids_[0] == server::CPU_DEVICE_ID) {
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
auto label = std::make_shared<SpecResLabel>(std::weak_ptr<Resource>(res_ptr));
task->label() = label;
......
......@@ -215,8 +215,8 @@ Config::ValidateConfig() {
return s;
}
std::vector<int64_t> index_build_devices;
s = GetResourceConfigIndexBuildDevice(index_build_devices);
std::vector<std::string> index_build_resources;
s = GetResourceConfigIndexBuildResources(index_build_resources);
if (!s.ok()) {
return s;
}
......@@ -351,7 +351,7 @@ Config::ResetDefaultConfig() {
return s;
}
s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT);
s = SetResourceConfigIndexBuildResources(CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT);
if (!s.ok()) {
return s;
}
......@@ -599,14 +599,18 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) {
return Status(SERVER_INVALID_ARGUMENT, msg);
} else {
uint64_t gpu_cache_capacity = std::stoi(value) * GB;
std::vector<int64_t> device_ids;
Status s = GetResourceConfigIndexBuildDevice(device_ids);
std::vector<std::string> resources;
Status s = GetResourceConfigIndexBuildResources(resources);
if (!s.ok()) {
return s;
}
size_t gpu_memory;
for (auto& device_id : device_ids) {
for (auto& resource : resources) {
if (resource == "cpu") {
continue;
}
int32_t device_id = std::stoi(resource.substr(3));
if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) {
std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id);
return Status(SERVER_UNEXPECTED_ERROR, msg);
......@@ -747,13 +751,19 @@ Config::CheckResourceConfigSearchResources(const std::vector<std::string>& value
}
Status
Config::CheckResourceConfigIndexBuildDevice(const std::vector<std::string>& value) {
Config::CheckResourceConfigIndexBuildResources(const std::vector<std::string>& value) {
if (value.empty()) {
std::string msg =
"Invalid index build resource. "
"Possible reason: resource_config.index_build_device is empty.";
"Invalid build index resource. "
"Possible reason: resource_config.build_index_resources is empty.";
return Status(SERVER_INVALID_ARGUMENT, msg);
}
for (auto& resource : value) {
auto status = CheckResource(resource);
if (!status.ok()) {
return Status(SERVER_INVALID_ARGUMENT, status.message());
}
}
for (auto& resource : value) {
auto status = CheckResource(resource);
......@@ -1048,34 +1058,18 @@ Status
Config::GetResourceConfigSearchResources(std::vector<std::string>& value) {
std::string str =
GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES,
CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT);
server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value);
CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT);
server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value);
return CheckResourceConfigSearchResources(value);
}
Status
Config::GetResourceConfigIndexBuildDevice(std::vector<int64_t>& value) {
Config::GetResourceConfigIndexBuildResources(std::vector<std::string>& value) {
std::string str =
GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER,
CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT);
std::vector<std::string> resources;
server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, resources);
Status s = CheckResourceConfigIndexBuildDevice(resources);
if (!s.ok()) {
return s;
}
for (auto res : resources) {
if (res == "cpu") {
value.emplace_back(CPU_DEVICE_ID);
break;
}
int64_t device_id = std::stoi(str.substr(3));
value.emplace_back(device_id);
}
return Status::OK();
GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES,
CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT);
server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value);
return CheckResourceConfigIndexBuildResources(value);
}
///////////////////////////////////////////////////////////////////////////////
......@@ -1330,7 +1324,7 @@ Config::SetResourceConfigMode(const std::string& value) {
Status
Config::SetResourceConfigSearchResources(const std::string& value) {
std::vector<std::string> res_vec;
server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec);
server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec);
Status s = CheckResourceConfigSearchResources(res_vec);
if (!s.ok()) {
......@@ -1342,16 +1336,16 @@ Config::SetResourceConfigSearchResources(const std::string& value) {
}
Status
Config::SetResourceConfigIndexBuildDevice(const std::string& value) {
Config::SetResourceConfigIndexBuildResources(const std::string &value) {
std::vector<std::string> res_vec;
server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, res_vec);
server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec);
Status s = CheckResourceConfigIndexBuildDevice(res_vec);
Status s = CheckResourceConfigIndexBuildResources(res_vec);
if (!s.ok()) {
return s;
}
SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value);
SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, value);
return Status::OK();
}
......
......@@ -91,21 +91,18 @@ static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000";
static const char* CONFIG_RESOURCE = "resource_config";
static const char* CONFIG_RESOURCE_MODE = "mode";
static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple";
static const char* CONFIG_RESOURCE_RESOURCES_DELIMITER = ",";
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources";
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ",";
#ifdef MILVUS_CPU_VERSION
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu";
#else
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0";
#endif
static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device";
static const char* CONFIG_RESOURCE_INDEX_BUILD_DELIMITER = ",";
static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES = "index_build_resources";
#ifdef MILVUS_CPU_VERSION
static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu";
static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "cpu";
#else
static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu,gpu0";
static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "gpu0";
#endif
const int32_t CPU_DEVICE_ID = -1;
......@@ -191,7 +188,7 @@ class Config {
Status
CheckResourceConfigSearchResources(const std::vector<std::string>& value);
Status
CheckResourceConfigIndexBuildDevice(const std::vector<std::string>& value);
CheckResourceConfigIndexBuildResources(const std::vector<std::string>& value);
std::string
GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = "");
......@@ -260,7 +257,7 @@ class Config {
Status
GetResourceConfigSearchResources(std::vector<std::string>& value);
Status
GetResourceConfigIndexBuildDevice(std::vector<int64_t>& value);
GetResourceConfigIndexBuildResources(std::vector<std::string>& value);
public:
/* server config */
......@@ -321,7 +318,7 @@ class Config {
Status
SetResourceConfigSearchResources(const std::string& value);
Status
SetResourceConfigIndexBuildDevice(const std::string& value);
SetResourceConfigIndexBuildResources(const std::string& value);
private:
std::unordered_map<std::string, std::unordered_map<std::string, std::string>> config_map_;
......
......@@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector<std::string>& partition_
}
Status
ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) {
ValidationUtil::ValidateGpuIndex(int32_t gpu_index) {
#ifdef MILVUS_GPU_VERSION
int num_devices = 0;
auto cuda_err = cudaGetDeviceCount(&num_devices);
......@@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) {
}
Status
ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) {
ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) {
#ifdef MILVUS_GPU_VERSION
cudaDeviceProp deviceProp;
......
......@@ -59,10 +59,10 @@ class ValidationUtil {
ValidatePartitionTags(const std::vector<std::string>& partition_tags);
static Status
ValidateGpuIndex(uint32_t gpu_index);
ValidateGpuIndex(int32_t gpu_index);
static Status
GetGpuMemory(uint32_t gpu_index, size_t& memory);
GetGpuMemory(int32_t gpu_index, size_t& memory);
static Status
ValidateIpAddress(const std::string& ip_address);
......
......@@ -19,6 +19,8 @@
#ifdef MILVUS_GPU_VERSION
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
#endif
#include "scheduler/Utils.h"
#include "server/Config.h"
#include <map>
......@@ -48,8 +50,7 @@ KnowhereResource::Initialize() {
// get build index gpu resource
server::Config& config = server::Config::GetInstance();
std::vector<int64_t> build_index_gpus;
s = config.GetResourceConfigIndexBuildDevice(build_index_gpus);
auto build_index_gpus = scheduler::get_build_resources();
if (!s.ok())
return s;
......
......@@ -272,30 +272,34 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
#else
std::vector<std::string> search_resources = {"cpu", "gpu0"};
#endif
std::vector<std::string> res_vec;
std::string res_str;
std::vector<std::string> search_res_vec;
std::string search_res_str;
milvus::server::StringHelpFunctions::MergeStringWithDelimeter(
search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str);
s = config.SetResourceConfigSearchResources(res_str);
search_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, search_res_str);
s = config.SetResourceConfigSearchResources(search_res_str);
ASSERT_TRUE(s.ok());
s = config.GetResourceConfigSearchResources(res_vec);
s = config.GetResourceConfigSearchResources(search_res_vec);
ASSERT_TRUE(s.ok());
for (size_t i = 0; i < search_resources.size(); i++) {
ASSERT_TRUE(search_resources[i] == res_vec[i]);
ASSERT_TRUE(search_resources[i] == search_res_vec[i]);
}
#ifdef MILVUS_CPU_VERSION
int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID;
s = config.SetResourceConfigIndexBuildDevice("cpu");
std::vector<std::string> index_build_resources = {"cpu"};
#else
int32_t resource_index_build_device = 0;
s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device));
std::vector<std::string> index_build_resources = {"gpu0", "gpu1"};
#endif
std::vector<std::string> index_build_res_vec;
std::string index_build_res_str;
milvus::server::StringHelpFunctions::MergeStringWithDelimeter(
index_build_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, index_build_res_str);
s = config.SetResourceConfigIndexBuildResources(index_build_res_str);
ASSERT_TRUE(s.ok());
std::vector<int64_t> device_ids;
s = config.GetResourceConfigIndexBuildDevice(device_ids);
s = config.GetResourceConfigIndexBuildResources(index_build_res_vec);
ASSERT_TRUE(s.ok());
ASSERT_TRUE(device_ids[0] == resource_index_build_device);
for (size_t i = 0; i < index_build_resources.size(); i++) {
ASSERT_TRUE(index_build_resources[i] == index_build_res_vec[i]);
}
}
TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) {
......@@ -419,9 +423,9 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) {
s = config.SetResourceConfigSearchResources("cpu");
ASSERT_TRUE(s.ok());
s = config.SetResourceConfigIndexBuildDevice("gup2");
s = config.SetResourceConfigIndexBuildResources("gup2");
ASSERT_FALSE(s.ok());
s = config.SetResourceConfigIndexBuildDevice("gpu16");
s = config.SetResourceConfigIndexBuildResources("gpu16");
ASSERT_FALSE(s.ok());
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册