提交 1fdfefbf 编写于 作者: S starlord

Merge remote-tracking branch 'source/branch-0.5.0' into branch-0.5.0


Former-commit-id: d945fae9a0724530b50807380c7b857d6b383cf3
...@@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA. ...@@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-644 - Search crashed with index-type: flat - MS-644 - Search crashed with index-type: flat
- MS-624 - Search vectors failed if time ranges long enough - MS-624 - Search vectors failed if time ranges long enough
- MS-652 - IVFSQH quantization double free - MS-652 - IVFSQH quantization double free
- MS-605 - Server going down during searching vectors
- MS-654 - Describe index timeout when building index - MS-654 - Describe index timeout when building index
## Improvement ## Improvement
......
...@@ -509,7 +509,13 @@ DBImpl::StartMetricTask() { ...@@ -509,7 +509,13 @@ DBImpl::StartMetricTask() {
server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total); if (cache_total > 0) {
double cache_usage_double = cache_usage;
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
} else {
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
}
server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
uint64_t size; uint64_t size;
Size(size); Size(size);
......
...@@ -80,8 +80,7 @@ class ExecutionEngine { ...@@ -80,8 +80,7 @@ class ExecutionEngine {
Merge(const std::string& location) = 0; Merge(const std::string& location) = 0;
virtual Status virtual Status
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0;
bool hybrid) const = 0;
virtual std::shared_ptr<ExecutionEngine> virtual std::shared_ptr<ExecutionEngine>
BuildIndex(const std::string& location, EngineType engine_type) = 0; BuildIndex(const std::string& location, EngineType engine_type) = 0;
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
//#define ON_SEARCH
namespace milvus { namespace milvus {
namespace engine { namespace engine {
...@@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) { ...@@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
Status Status
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
if (hybrid) { if (hybrid) {
auto key = location_ + ".quantizer";
auto quantizer =
std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key));
auto conf = std::make_shared<knowhere::QuantizerCfg>();
conf->gpu_id = device_id;
if (quantizer) {
// cache hit
conf->mode = 2;
auto new_index = index_->LoadData(quantizer->Data(), conf);
index_ = new_index;
} else {
auto pair = index_->CopyToGpuWithQuantizer(device_id);
index_ = pair.first;
// cache
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
}
return Status::OK(); return Status::OK();
} }
...@@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t ...@@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
Status Status
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
bool hybrid) const { bool hybrid) {
if (index_type_ == EngineType::FAISS_IVFSQ8H) {
if (!hybrid) {
const std::string key = location_ + ".quantizer";
std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
const int64_t NOT_FOUND = -1;
int64_t device_id = NOT_FOUND;
// cache hit
{
knowhere::QuantizerPtr quantizer = nullptr;
for (auto& gpu : gpus) {
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
if (auto cached_quantizer = cache->GetIndex(key)) {
device_id = gpu;
quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
}
}
if (device_id != NOT_FOUND) {
// cache hit
auto config = std::make_shared<knowhere::QuantizerCfg>();
config->gpu_id = device_id;
config->mode = 2;
auto new_index = index_->LoadData(quantizer, config);
index_ = new_index;
}
}
if (device_id == NOT_FOUND) {
// cache miss
std::vector<int64_t> all_free_mem;
for (auto& gpu : gpus) {
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
all_free_mem.push_back(free_mem);
}
auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
auto best_index = std::distance(all_free_mem.begin(), max_e);
device_id = gpus[best_index];
auto pair = index_->CopyToGpuWithQuantizer(device_id);
index_ = pair.first;
// cache
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
}
}
}
if (index_ == nullptr) { if (index_ == nullptr) {
ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search"; ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search";
return Status(DB_ERROR, "index is null"); return Status(DB_ERROR, "index is null");
......
...@@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine { ...@@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine {
Status Status
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
bool hybrid = false) const override; bool hybrid = false) override;
ExecutionEnginePtr ExecutionEnginePtr
BuildIndex(const std::string& location, EngineType engine_type) override; BuildIndex(const std::string& location, EngineType engine_type) override;
......
...@@ -93,11 +93,10 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { ...@@ -93,11 +93,10 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option); auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index); std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index);
auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res); auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
return new_idx; return new_idx;
} else { } else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
} }
} }
...@@ -153,7 +152,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { ...@@ -153,7 +152,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) {
gpu_mode = 1; gpu_mode = 1;
return q; return q;
} else { } else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
} }
} }
...@@ -216,7 +215,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { ...@@ -216,7 +215,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res); auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
return sq_idx; return sq_idx;
} else { } else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
} }
} }
...@@ -243,7 +242,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c ...@@ -243,7 +242,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c
q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
return std::make_pair(new_idx, q); return std::make_pair(new_idx, q);
} else { } else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
} }
} }
......
...@@ -96,10 +96,10 @@ class ParamGenerator { ...@@ -96,10 +96,10 @@ class ParamGenerator {
auto tempconf = std::make_shared<knowhere::IVFPQCfg>(); auto tempconf = std::make_shared<knowhere::IVFPQCfg>();
tempconf->d = DIM; tempconf->d = DIM;
tempconf->gpu_id = device_id; tempconf->gpu_id = device_id;
tempconf->nlist = 100; tempconf->nlist = 25;
tempconf->nprobe = 16; tempconf->nprobe = 4;
tempconf->k = K; tempconf->k = K;
tempconf->m = 8; tempconf->m = 4;
tempconf->nbits = 8; tempconf->nbits = 8;
tempconf->metric_type = knowhere::METRICTYPE::L2; tempconf->metric_type = knowhere::METRICTYPE::L2;
return tempconf; return tempconf;
......
...@@ -94,7 +94,6 @@ class OptimizerInst { ...@@ -94,7 +94,6 @@ class OptimizerInst {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
if (instance == nullptr) { if (instance == nullptr) {
std::vector<PassPtr> pass_list; std::vector<PassPtr> pass_list;
pass_list.push_back(std::make_shared<LargeSQ8HPass>());
pass_list.push_back(std::make_shared<HybridPass>()); pass_list.push_back(std::make_shared<HybridPass>());
instance = std::make_shared<Optimizer>(pass_list); instance = std::make_shared<Optimizer>(pass_list);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册