From 6d120c14d7b1d95aabd5d08bd7a48693e34bbf9f Mon Sep 17 00:00:00 2001
From: JinHai-CN <hai.jin@zilliz.com>
Date: Tue, 15 Oct 2019 12:07:47 +0800
Subject: [PATCH] Fix MS-605 Searching vectors crash

Former-commit-id: d8605bfbdf33df6481aee9d502df85f820f51170
---
 core/src/db/DBImpl.cpp                        |  8 +-
 core/src/db/engine/ExecutionEngine.h          |  3 +-
 core/src/db/engine/ExecutionEngineImpl.cpp    | 76 ++++++++++++++-----
 core/src/db/engine/ExecutionEngineImpl.h      |  2 +-
 .../index/vector_index/IndexIVFSQHybrid.cpp   | 10 +--
 core/src/scheduler/SchedInst.h                |  1 -
 6 files changed, 69 insertions(+), 31 deletions(-)
diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp
index cf8f1824..6d239122 100644
--- a/core/src/db/DBImpl.cpp
+++ b/core/src/db/DBImpl.cpp
@@ -540,7 +540,13 @@ DBImpl::StartMetricTask() {
     server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
     int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
     int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
-    server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total);
+    if (cache_total > 0) {
+        double cache_usage_double = cache_usage;
+        server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
+    } else {
+        server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
+    }
+
     server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
     uint64_t size;
     Size(size);
diff --git a/core/src/db/engine/ExecutionEngine.h b/core/src/db/engine/ExecutionEngine.h
index 2c4960e6..51c77eb7 100644
--- a/core/src/db/engine/ExecutionEngine.h
+++ b/core/src/db/engine/ExecutionEngine.h
@@ -80,8 +80,7 @@ class ExecutionEngine {
     Merge(const std::string& location) = 0;
 
     virtual Status
-    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
-           bool hybrid) const = 0;
+    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0;
 
     virtual std::shared_ptr<ExecutionEngine>
     BuildIndex(const std::string& location, EngineType engine_type) = 0;
diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp
index 1d758f38..7d10ab5c 100644
--- a/core/src/db/engine/ExecutionEngineImpl.cpp
+++ b/core/src/db/engine/ExecutionEngineImpl.cpp
@@ -37,6 +37,7 @@
 #include <utility>
 #include <vector>
 
+//#define ON_SEARCH
 namespace milvus {
 namespace engine {
 
@@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
 Status
 ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
     if (hybrid) {
-        auto key = location_ + ".quantizer";
-        auto quantizer =
-            std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key));
-
-        auto conf = std::make_shared<knowhere::QuantizerCfg>();
-        conf->gpu_id = device_id;
-
-        if (quantizer) {
-            // cache hit
-            conf->mode = 2;
-            auto new_index = index_->LoadData(quantizer->Data(), conf);
-            index_ = new_index;
-        } else {
-            auto pair = index_->CopyToGpuWithQuantizer(device_id);
-            index_ = pair.first;
-
-            // cache
-            auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
-            cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
-        }
         return Status::OK();
     }
 
@@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
 
 Status
 ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
-                            bool hybrid) const {
+                            bool hybrid) {
+    if (index_type_ == EngineType::FAISS_IVFSQ8H) {
+        if (!hybrid) {
+            const std::string key = location_ + ".quantizer";
+            std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
+
+            const int64_t NOT_FOUND = -1;
+            int64_t device_id = NOT_FOUND;
+
+            // cache hit
+            {
+                knowhere::QuantizerPtr quantizer = nullptr;
+
+                for (auto& gpu : gpus) {
+                    auto cache = cache::GpuCacheMgr::GetInstance(gpu);
+                    if (auto cached_quantizer = cache->GetIndex(key)) {
+                        device_id = gpu;
+                        quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
+                    }
+                }
+
+                if (device_id != NOT_FOUND) {
+                    // cache hit
+                    auto config = std::make_shared<knowhere::QuantizerCfg>();
+                    config->gpu_id = device_id;
+                    config->mode = 2;
+                    auto new_index = index_->LoadData(quantizer, config);
+                    index_ = new_index;
+                }
+            }
+
+            if (device_id == NOT_FOUND) {
+                // cache miss
+                std::vector<int64_t> all_free_mem;
+                for (auto& gpu : gpus) {
+                    auto cache = cache::GpuCacheMgr::GetInstance(gpu);
+                    auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
+                    all_free_mem.push_back(free_mem);
+                }
+
+                auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
+                auto best_index = std::distance(all_free_mem.begin(), max_e);
+                device_id = gpus[best_index];
+
+                auto pair = index_->CopyToGpuWithQuantizer(device_id);
+                index_ = pair.first;
+
+                // cache
+                auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
+                cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
+            }
+        }
+    }
+
     if (index_ == nullptr) {
         ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search";
         return Status(DB_ERROR, "index is null");
diff --git a/core/src/db/engine/ExecutionEngineImpl.h b/core/src/db/engine/ExecutionEngineImpl.h
index 10379d16..7eb30442 100644
--- a/core/src/db/engine/ExecutionEngineImpl.h
+++ b/core/src/db/engine/ExecutionEngineImpl.h
@@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine {
 
     Status
     Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
-           bool hybrid = false) const override;
+           bool hybrid = false) override;
 
     ExecutionEnginePtr
     BuildIndex(const std::string& location, EngineType engine_type) override;
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
index 9b6bebd7..3408e480 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
@@ -91,11 +91,11 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
 
         auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
 
-        std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index);;
+        std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index);
         auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
         return new_idx;
     } else {
-        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
+        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
     }
 }
 
@@ -151,7 +151,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) {
         gpu_mode = 1;
         return q;
     } else {
-        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
+        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
     }
 }
 
@@ -214,7 +214,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
         auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
         return sq_idx;
     } else {
-        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
+        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
     }
 }
 
@@ -241,7 +241,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c
         q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
         return std::make_pair(new_idx, q);
     } else {
-        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
+        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
     }
 }
 
diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h
index b9153d3b..60033731 100644
--- a/core/src/scheduler/SchedInst.h
+++ b/core/src/scheduler/SchedInst.h
@@ -94,7 +94,6 @@ class OptimizerInst {
             std::lock_guard<std::mutex> lock(mutex_);
             if (instance == nullptr) {
                 std::vector<PassPtr> pass_list;
-                pass_list.push_back(std::make_shared<LargeSQ8HPass>());
                 pass_list.push_back(std::make_shared<HybridPass>());
                 instance = std::make_shared<Optimizer>(pass_list);
             }
-- 
GitLab