add debug APIs and update GPU temp memory size (#2420)

* add debug APIs and update GPU temp memory size Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * update Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * update thread name Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * update DESIGN.md Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * retry ci Signed-off-by: N yudong.cai <yudong.cai@zilliz.com>

add debug APIs and update GPU temp memory size (#2420)
* add debug APIs and update GPU temp memory size Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * update Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * update thread name Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * update DESIGN.md Signed-off-by: N yudong.cai <yudong.cai@zilliz.com> * retry ci Signed-off-by: N yudong.cai <yudong.cai@zilliz.com>
ce4dcf69 · Cai Yudong · GitHub · 2f94eb65 · ce4dcf69 · ce4dcf69
15 changed file
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Please mark all change in change log and use the issue from GitHub
 ## Task
 # Milvus 0.9.0 (2020-05-15)
 ## Bug

--- a/DESIGN.md
+++ b/DESIGN.md
@@ -8,3 +8,6 @@ The following list contains existing design documents for Milvus.
 -   [Support delete/search by ID, attribute filtering, ID de-duplication](https://docs.google.com/document/d/1CDKdTj_DnE90YaZrPgsMaphqOTkMdbKETNrsFKj_Bco/edit?usp=sharing)
 -   [Support write-ahead logging](https://docs.google.com/document/d/12N8RC_wJb2dvEKY9jrlh8hU_eH8jxQVBewoPuHNqcXE/edit?usp=sharing)
 -   [Support in-service config modification](https://docs.google.com/document/d/1pK1joWJgAHM5nVp3q005iLbLqU5bn9InWeBy0mRAoSg/edit?usp=sharing)
+-   [Support Multi-Storage](https://docs.google.com/document/d/1iwwLH4Jtm3OXIVb7jFYsfmcbOyX6AWZKaNJAaXC7-cw/edit?usp=sharing)
+-   [Support AVX-512](https://docs.google.com/document/d/1do6_JgRCYdcV95sTPE6rLoiBK8wAcZki5Ypp7jbgqK0/edit?usp=sharing)
+-   [Refactor Knowhere](https://docs.google.com/document/d/1HY27EXV4UjJhDEmJ9t4Rjh7I1sB8iJHvqvliM6HHLS8/edit?usp=sharing)
--- a/core/src/db/engine/ExecutionEngine.h
+++ b/core/src/db/engine/ExecutionEngine.h
@@ -110,10 +110,10 @@ class ExecutionEngine {
    //    Merge(const std::string& location) = 0;
    virtual Status
-    GetVectorByID(const int64_t& id, float* vector, bool hybrid) = 0;
+    GetVectorByID(const int64_t id, float* vector, bool hybrid) = 0;
    virtual Status
-    GetVectorByID(const int64_t& id, uint8_t* vector, bool hybrid) = 0;
+    GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) = 0;
    virtual Status
    ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset,

--- a/core/src/db/engine/ExecutionEngineImpl.cpp
+++ b/core/src/db/engine/ExecutionEngineImpl.cpp
@@ -1213,7 +1213,7 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
 }
 Status
-ExecutionEngineImpl::GetVectorByID(const int64_t& id, float* vector, bool hybrid) {
+ExecutionEngineImpl::GetVectorByID(const int64_t id, float* vector, bool hybrid) {
    if (index_ == nullptr) {
        LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
        return Status(DB_ERROR, "index is null");
@@ -1238,7 +1238,7 @@ ExecutionEngineImpl::GetVectorByID(const int64_t& id, float* vector, bool hybrid
 }
 Status
-ExecutionEngineImpl::GetVectorByID(const int64_t& id, uint8_t* vector, bool hybrid) {
+ExecutionEngineImpl::GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) {
    if (index_ == nullptr) {
        LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
        return Status(DB_ERROR, "index is null");

--- a/core/src/db/engine/ExecutionEngineImpl.h
+++ b/core/src/db/engine/ExecutionEngineImpl.h
@@ -64,10 +64,10 @@ class ExecutionEngineImpl : public ExecutionEngine {
    CopyToCpu() override;
    Status
-    GetVectorByID(const int64_t& id, float* vector, bool hybrid) override;
+    GetVectorByID(const int64_t id, float* vector, bool hybrid) override;
    Status
-    GetVectorByID(const int64_t& id, uint8_t* vector, bool hybrid) override;
+    GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) override;
    Status
    ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset,

--- a/core/src/index/archive/KnowhereResource.cpp
+++ b/core/src/index/archive/KnowhereResource.cpp
@@ -70,8 +70,8 @@ KnowhereResource::Initialize() {
        return Status::OK();
    struct GpuResourceSetting {
-        int64_t pinned_memory = 300 * M_BYTE;
+        int64_t pinned_memory = 256 * M_BYTE;
-        int64_t temp_memory = 300 * M_BYTE;
+        int64_t temp_memory = 256 * M_BYTE;
        int64_t resource_num = 2;
    };
    using GpuResourcesArray = std::map<int64_t, GpuResourceSetting>;

--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h
@@ -27,7 +27,7 @@ class NsgIndex;
 class NSG : public VecIndex {
 public:
-    explicit NSG(const int64_t& gpu_num = -1) : gpu_(gpu_num) {
+    explicit NSG(const int64_t gpu_num = -1) : gpu_(gpu_num) {
        if (gpu_ >= 0) {
            index_mode_ = IndexMode::MODE_GPU;
        }

--- a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
@@ -25,7 +25,7 @@ using Graph = std::vector<std::vector<int64_t>>;
 class GPUIDMAP : public IDMAP, public GPUIndex {
 public:
-    explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& res)
+    explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& res)
        : IDMAP(std::move(index)), GPUIndex(device_id, res) {
        index_mode_ = IndexMode::MODE_GPU;
    }

--- a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h
@@ -46,7 +46,7 @@ class IVFSQHybrid : public GPUIVFSQ {
        gpu_mode_ = 0;
    }
-    explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& resource)
+    explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& resource)
        : GPUIVFSQ(index, device_id, resource) {
        index_type_ = IndexEnum::INDEX_FAISS_IVFSQ8H;
        gpu_mode_ = 2;

--- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp
@@ -18,6 +18,8 @@
 namespace milvus {
 namespace knowhere {
+constexpr int64_t MB = 1LL << 20;
 FaissGpuResourceMgr&
 FaissGpuResourceMgr::GetInstance() {
    static FaissGpuResourceMgr instance;
@@ -45,8 +47,8 @@ FaissGpuResourceMgr::InitDevice(int64_t device_id, int64_t pin_mem_size, int64_t
    params.resource_num = res_num;
    devices_params_.emplace(device_id, params);
-    LOG_KNOWHERE_DEBUG_ << "DEVICEID " << device_id << ", pin_mem_size " << pin_mem_size << ", temp_mem_size "
+    LOG_KNOWHERE_DEBUG_ << "DEVICEID " << device_id << ", pin_mem_size " << pin_mem_size / MB << "MB, temp_mem_size "
-                        << temp_mem_size << ", resource count " << res_num;
+                        << temp_mem_size / MB << "MB, resource count " << res_num;
 }
 void

--- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
@@ -87,19 +87,19 @@ class FaissGpuResourceMgr {
 class ResScope {
 public:
-    ResScope(ResPtr& res, const int64_t& device_id, const bool& isown)
+    ResScope(ResPtr& res, const int64_t device_id, const bool isown)
        : resource(res), device_id(device_id), move(true), own(isown) {
        Lock();
    }
-    ResScope(ResWPtr& res, const int64_t& device_id, const bool& isown)
+    ResScope(ResWPtr& res, const int64_t device_id, const bool isown)
        : resource(res), device_id(device_id), move(true), own(isown) {
        Lock();
    }
    // specif for search
    // get the ownership of gpuresource and gpu
-    ResScope(ResWPtr& res, const int64_t& device_id) : device_id(device_id), move(false), own(true) {
+    ResScope(ResWPtr& res, const int64_t device_id) : device_id(device_id), move(false), own(true) {
        resource = res.lock();
        Lock();
    }

--- a/core/src/scheduler/resource/Resource.cpp
+++ b/core/src/scheduler/resource/Resource.cpp
@@ -184,7 +184,7 @@ Resource::loader_function() {
 void
 Resource::executor_function() {
-    SetThreadName("taskexector_th");
+    SetThreadName("taskexecutor_th");
    if (subscriber_) {
        auto event = std::make_shared<StartUpEvent>(shared_from_this());
        subscriber_(std::static_pointer_cast<Event>(event));

--- a/core/src/scheduler/task/Path.h
+++ b/core/src/scheduler/task/Path.h
@@ -62,6 +62,15 @@ class Path {
        }
    }
+    std::string
+    ToString() {
+        std::string str = path_[index_];
+        for (int64_t i = index_; i > 0; i--) {
+            str += "->" + path_[i - 1];
+        }
+        return str;
+    }
 public:
    std::string& operator[](uint64_t index) {
        return path_[index];

--- a/core/src/scheduler/tasklabel/SpecResLabel.h
+++ b/core/src/scheduler/tasklabel/SpecResLabel.h
@@ -35,14 +35,13 @@ class SpecResLabel : public TaskLabel {
        return resource_;
    }
-    inline std::string&
+    inline std::string
-    resource_name() {
+    name() const override {
-        return resource_name_;
+        return resource_.lock()->name();
    }
 private:
    ResourceWPtr resource_;
-    std::string resource_name_;
 };
 using SpecResLabelPtr = std::shared_ptr<SpecResLabel>();

--- a/core/src/scheduler/tasklabel/TaskLabel.h
+++ b/core/src/scheduler/tasklabel/TaskLabel.h
@@ -12,6 +12,7 @@
 #pragma once
 #include <memory>
+#include <string>
 namespace milvus {
 namespace scheduler {
@@ -28,6 +29,11 @@ class TaskLabel {
        return type_;
    }
+    virtual inline std::string
+    name() const {
+        return "";
+    }
 protected:
    explicit TaskLabel(TaskLabelType type) : type_(type) {
    }