From 9d8f88febd304789b1579f487364683d67b59cdb Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 11 Sep 2019 11:14:51 +0800 Subject: [PATCH] refine config Former-commit-id: cffe2a35f6ea8aa2bdcd409825b97980b91c217f --- cpp/conf/server_config.template | 5 +-- cpp/src/cache/GpuCacheMgr.cpp | 39 +++++++----------- cpp/src/cache/GpuCacheMgr.h | 16 +------- cpp/src/metrics/PrometheusMetrics.cpp | 23 ++++------- cpp/src/server/ServerConfig.cpp | 41 +++++++------------ cpp/src/server/ServerConfig.h | 3 +- cpp/unittest/db/engine_test.cpp | 3 -- cpp/unittest/db/utils.cpp | 3 -- cpp/unittest/scheduler/scheduler_test.cpp | 4 -- .../server/appendix/server_config.yaml | 5 +-- cpp/unittest/server/cache_test.cpp | 3 -- cpp/unittest/server/config_test.cpp | 4 -- 12 files changed, 41 insertions(+), 108 deletions(-) diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index 99fa79c2..f1615b36 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -1,7 +1,6 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534 - gpu_index: 0 # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1 mode: single # milvus deployment type: single, cluster, read_only time_zone: UTC+8 # Use the UTC-x or UTC+x to specify a time zone. eg. UTC+8 for China Standard Time @@ -18,6 +17,7 @@ db_config: archive_days_threshold: 0 # files older than x days will be archived, 0 means no limit, unit: day insert_buffer_size: 4 # maximum insert buffer size allowed, default: 4, unit: GB, should be at least 1 GB. # the sum of insert_buffer_size and cpu_cache_capacity should be less than total memory, unit: GB + build_index_gpu: 0 # which gpu is used to build index, default: 0, range: 0 ~ gpu number - 1 metric_config: is_startup: off # if monitoring start: on, off @@ -33,9 +33,6 @@ cache_config: insert_cache_immediately: false # insert data will be load into cache immediately for hot query gpu_cache_capacity: 5 # how many memory are used as cache in gpu, unit: GB, RANGE: 0 ~ less than total memory gpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 - gpu_ids: # gpu id - - 0 - - 1 engine_config: use_blas_threshold: 20 diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp index ef2f307c..0b6a6132 100644 --- a/cpp/src/cache/GpuCacheMgr.cpp +++ b/cpp/src/cache/GpuCacheMgr.cpp @@ -17,36 +17,14 @@ std::mutex GpuCacheMgr::mutex_; std::unordered_map GpuCacheMgr::instance_; namespace { - constexpr int64_t unit = 1024 * 1024 * 1024; - - std::vector load() { - server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - auto conf_gpu_ids = config.GetSequence(server::CONFIG_GPU_IDS); - - std::vector gpu_ids; - - for (auto gpu_id : conf_gpu_ids) { - gpu_ids.push_back(std::atoi(gpu_id.c_str())); - } - - return gpu_ids; - } -} - - -bool GpuCacheMgr::GpuIdInConfig(uint64_t gpu_id) { - static std::vector ids = load(); - for (auto id : ids) { - if (gpu_id == id) return true; - } - return false; + constexpr int64_t G_BYTE = 1024 * 1024 * 1024; } GpuCacheMgr::GpuCacheMgr() { server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 2); - cap *= unit; + cap *= G_BYTE; cache_ = std::make_shared(cap, 1UL<<32); double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85); @@ -58,6 +36,19 @@ GpuCacheMgr::GpuCacheMgr() { } } +CacheMgr* GpuCacheMgr::GetInstance(uint64_t gpu_id) { + if (instance_.find(gpu_id) == instance_.end()) { + std::lock_guard lock(mutex_); + if (instance_.find(gpu_id) == instance_.end()) { + instance_.insert(std::pair(gpu_id, std::make_shared())); + } + return instance_[gpu_id].get(); + } else { + std::lock_guard lock(mutex_); + return instance_[gpu_id].get(); + } +} + void GpuCacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { //TODO: copy data to gpu if (cache_ == nullptr) { diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h index f26dfaa1..35756733 100644 --- a/cpp/src/cache/GpuCacheMgr.h +++ b/cpp/src/cache/GpuCacheMgr.h @@ -19,21 +19,7 @@ class GpuCacheMgr : public CacheMgr { public: GpuCacheMgr(); - static bool GpuIdInConfig(uint64_t gpu_id); - - static CacheMgr* GetInstance(uint64_t gpu_id) { - if (instance_.find(gpu_id) == instance_.end()) { - std::lock_guard lock(mutex_); - if (instance_.find(gpu_id) == instance_.end()) { - if (GpuIdInConfig(gpu_id)) { - instance_.insert(std::pair(gpu_id, std::make_shared())); - } else { - return nullptr; - } - } - } - return instance_[gpu_id].get(); - } + static CacheMgr* GetInstance(uint64_t gpu_id); void InsertItem(const std::string& key, const DataObjPtr& data) override; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index d12acca8..07ca9aa3 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -168,22 +168,13 @@ void PrometheusMetrics::CPUTemperature() { } void PrometheusMetrics::GpuCacheUsageGaugeSet() { - if(!startup_) return; - server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - auto conf_gpu_ids = config.GetSequence(server::CONFIG_GPU_IDS); - - std::vector gpu_ids; - - for (auto gpu_id : conf_gpu_ids) { - gpu_ids.push_back(std::atoi(gpu_id.c_str())); - } - - for(auto i = 0; i < gpu_ids.size(); ++i) { - uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage(); - uint64_t cache_capacity = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheCapacity(); - prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); - gpu_cache.Set(cache_usage * 100 / cache_capacity); - } +// std::vector gpu_ids = {0}; +// for(auto i = 0; i < gpu_ids.size(); ++i) { +// uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage(); +// uint64_t cache_capacity = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheCapacity(); +// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); +// gpu_cache.Set(cache_usage * 100 / cache_capacity); +// } } } diff --git a/cpp/src/server/ServerConfig.cpp b/cpp/src/server/ServerConfig.cpp index f476aa8e..da898d22 100644 --- a/cpp/src/server/ServerConfig.cpp +++ b/cpp/src/server/ServerConfig.cpp @@ -113,18 +113,6 @@ ServerConfig::CheckServerConfig() { } } - std::string gpu_index_str = server_config.GetValue(CONFIG_GPU_INDEX, "0"); - if (ValidationUtil::ValidateStringIsNumber(gpu_index_str) != SERVER_SUCCESS) { - std::cerr << "ERROR: gpu_index " << gpu_index_str << " is not a number" << std::endl; - okay = false; - } else { - int32_t gpu_index = std::stol(gpu_index_str); - if (ValidationUtil::ValidateGpuIndex(gpu_index) != SERVER_SUCCESS) { - std::cerr << "ERROR: invalid gpu_index " << gpu_index_str << std::endl; - okay = false; - } - } - std::string mode = server_config.GetValue(CONFIG_CLUSTER_MODE, "single"); if (mode != "single" && mode != "cluster" && mode != "read_only") { std::cerr << "ERROR: mode " << mode << " is not one of ['single', 'cluster', 'read_only']" << std::endl; @@ -214,6 +202,18 @@ ServerConfig::CheckDBConfig() { } } + std::string gpu_index_str = db_config.GetValue(CONFIG_DB_BUILD_INDEX_GPU, "0"); + if (ValidationUtil::ValidateStringIsNumber(gpu_index_str) != SERVER_SUCCESS) { + std::cerr << "ERROR: gpu_index " << gpu_index_str << " is not a number" << std::endl; + okay = false; + } else { + int32_t gpu_index = std::stol(gpu_index_str); + if (ValidationUtil::ValidateGpuIndex(gpu_index) != SERVER_SUCCESS) { + std::cerr << "ERROR: invalid gpu_index " << gpu_index_str << std::endl; + okay = false; + } + } + return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT); } @@ -313,7 +313,7 @@ ServerConfig::CheckCacheConfig() { else { uint64_t gpu_cache_capacity = (uint64_t) std::stol(gpu_cache_capacity_str); gpu_cache_capacity *= GB; - int gpu_index = GetConfig(CONFIG_SERVER).GetInt32Value(CONFIG_GPU_INDEX, 0); + int gpu_index = GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0); size_t gpu_memory; if (ValidationUtil::GetGpuMemory(gpu_index, gpu_memory) != SERVER_SUCCESS) { std::cerr << "ERROR: could not get gpu memory for device " << gpu_index << std::endl; @@ -340,19 +340,6 @@ ServerConfig::CheckCacheConfig() { okay = false; } - auto conf_gpu_ids = cache_config.GetSequence(server::CONFIG_GPU_IDS); - - for (std::string &gpu_id : conf_gpu_ids) { - if (ValidationUtil::ValidateStringIsNumber(gpu_id) != SERVER_SUCCESS) { - std::cerr << "ERROR: gpu_id " << gpu_id << " is not a number" << std::endl; - okay = false; - } - else if (ValidationUtil::ValidateGpuIndex(std::stol(gpu_id)) != SERVER_SUCCESS) { - std::cerr << "ERROR: gpu_id " << gpu_id << " is invalid" << std::endl; - okay = false; - } - } - return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT); } @@ -483,7 +470,7 @@ ServerConfig::CheckResourceConfig() { } } else if (type == "GPU") { - int build_index_gpu_index = GetConfig(CONFIG_SERVER).GetInt32Value(CONFIG_GPU_INDEX, 0); + int build_index_gpu_index = GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0); if (device_id == build_index_gpu_index) { resource_valid_flag = true; } diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index f9b43fdb..a8d906f7 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -18,7 +18,6 @@ static const char* CONFIG_SERVER = "server_config"; static const char* CONFIG_SERVER_ADDRESS = "address"; static const char* CONFIG_SERVER_PORT = "port"; static const char* CONFIG_CLUSTER_MODE = "mode"; -static const char* CONFIG_GPU_INDEX = "gpu_index"; static const char* CONFIG_TIME_ZONE = "time_zone"; static const char* CONFIG_DB = "db_config"; @@ -29,6 +28,7 @@ static const char* CONFIG_DB_ARCHIVE_DISK = "archive_disk_threshold"; static const char* CONFIG_DB_ARCHIVE_DAYS = "archive_days_threshold"; static const char* CONFIG_DB_INSERT_BUFFER_SIZE = "insert_buffer_size"; static const char* CONFIG_DB_PARALLEL_REDUCE = "parallel_reduce"; +static const char* CONFIG_DB_BUILD_INDEX_GPU = "build_index_gpu"; static const char* CONFIG_LOG = "log_config"; @@ -37,7 +37,6 @@ static const char* CONFIG_CPU_CACHE_CAPACITY = "cpu_cache_capacity"; static const char* CONFIG_GPU_CACHE_CAPACITY = "gpu_cache_capacity"; static const char* CACHE_FREE_PERCENT = "cpu_cache_free_percent"; static const char* CONFIG_INSERT_CACHE_IMMEDIATELY = "insert_cache_immediately"; -static const char* CONFIG_GPU_IDS = "gpu_ids"; static const char *GPU_CACHE_FREE_PERCENT = "gpu_cache_free_percent"; static const char* CONFIG_METRIC = "metric_config"; diff --git a/cpp/unittest/db/engine_test.cpp b/cpp/unittest/db/engine_test.cpp index 68ad4d76..5cdad9f1 100644 --- a/cpp/unittest/db/engine_test.cpp +++ b/cpp/unittest/db/engine_test.cpp @@ -105,9 +105,6 @@ TEST_F(EngineTest, ENGINE_IMPL_TEST) { ASSERT_EQ(engine_ptr->Dimension(), dimension); ASSERT_EQ(engine_ptr->Count(), ids.size()); -// server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); -// config.AddSequenceItem(server::CONFIG_GPU_IDS, "0"); -// // status = engine_ptr->CopyToGpu(0); // //ASSERT_TRUE(status.ok()); // diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index cc2f3cb3..873028b6 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -66,9 +66,6 @@ engine::Options BaseTest::GetOptions() { void DBTest::SetUp() { BaseTest::SetUp(); - server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - config.AddSequenceItem(server::CONFIG_GPU_IDS, "0"); - auto res_mgr = engine::ResMgrInst::GetInstance(); res_mgr->Clear(); res_mgr->Add(engine::ResourceFactory::Create("disk", "DISK", 0, true, false)); diff --git a/cpp/unittest/scheduler/scheduler_test.cpp b/cpp/unittest/scheduler/scheduler_test.cpp index f176311e..01baa87e 100644 --- a/cpp/unittest/scheduler/scheduler_test.cpp +++ b/cpp/unittest/scheduler/scheduler_test.cpp @@ -94,10 +94,6 @@ class SchedulerTest : public testing::Test { protected: void SetUp() override { - server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - config.AddSequenceItem(server::CONFIG_GPU_IDS, "0"); - config.AddSequenceItem(server::CONFIG_GPU_IDS, "1"); - ResourcePtr cpu = ResourceFactory::Create("cpu", "CPU", 0, true, false); ResourcePtr gpu_0 = ResourceFactory::Create("gpu0", "GPU", 0); ResourcePtr gpu_1 = ResourceFactory::Create("gpu1", "GPU", 1); diff --git a/cpp/unittest/server/appendix/server_config.yaml b/cpp/unittest/server/appendix/server_config.yaml index b0095b8d..1a3dca9d 100644 --- a/cpp/unittest/server/appendix/server_config.yaml +++ b/cpp/unittest/server/appendix/server_config.yaml @@ -1,8 +1,8 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534 - gpu_index: 0 # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1 mode: single # milvus deployment type: single, cluster, read_only + time_zone: UTC+8 # Use the UTC-x or UTC+x to specify a time zone. eg. UTC+8 for China Standard Time db_config: db_path: /tmp/milvus # milvus data storage path @@ -17,6 +17,7 @@ db_config: archive_days_threshold: 0 # files older than x days will be archived, 0 means no limit, unit: day insert_buffer_size: 4 # maximum insert buffer size allowed, default: 4, unit: GB, should be at least 1 GB. # the sum of insert_buffer_size and cpu_cache_capacity should be less than total memory, unit: GB + build_index_gpu: 0 # which gpu is used to build index, default: 0, range: 0 ~ gpu number - 1 metric_config: is_startup: off # if monitoring start: on, off @@ -32,8 +33,6 @@ cache_config: insert_cache_immediately: false # insert data will be load into cache immediately for hot query gpu_cache_capacity: 5 # how many memory are used as cache in gpu, unit: GB, RANGE: 0 ~ less than total memory gpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 - gpu_ids: # gpu id - - 0 engine_config: use_blas_threshold: 20 diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index d9a0dc41..501d3f48 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -166,9 +166,6 @@ TEST(CacheTest, CPU_CACHE_TEST) { } TEST(CacheTest, GPU_CACHE_TEST) { - server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - config.AddSequenceItem(server::CONFIG_GPU_IDS, "0"); - cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(0); const int dim = 256; diff --git a/cpp/unittest/server/config_test.cpp b/cpp/unittest/server/config_test.cpp index d512cd82..ddfca182 100644 --- a/cpp/unittest/server/config_test.cpp +++ b/cpp/unittest/server/config_test.cpp @@ -144,8 +144,4 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); err = config.ValidateConfig(); ASSERT_NE(err, SERVER_SUCCESS); - - server_config.SetValue(server::CONFIG_GPU_INDEX, "9999"); - err = config.ValidateConfig(); - ASSERT_NE(err, SERVER_SUCCESS); } \ No newline at end of file -- GitLab