未验证 提交 ce4dcf69 编写于 作者: C Cai Yudong 提交者: GitHub

add debug APIs and update GPU temp memory size (#2420)

* add debug APIs and update GPU temp memory size
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* update
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* update thread name
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* update DESIGN.md
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* retry ci
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>
上级 2f94eb65
...@@ -21,6 +21,7 @@ Please mark all change in change log and use the issue from GitHub ...@@ -21,6 +21,7 @@ Please mark all change in change log and use the issue from GitHub
## Task ## Task
# Milvus 0.9.0 (2020-05-15) # Milvus 0.9.0 (2020-05-15)
## Bug ## Bug
......
...@@ -8,3 +8,6 @@ The following list contains existing design documents for Milvus. ...@@ -8,3 +8,6 @@ The following list contains existing design documents for Milvus.
- [Support delete/search by ID, attribute filtering, ID de-duplication](https://docs.google.com/document/d/1CDKdTj_DnE90YaZrPgsMaphqOTkMdbKETNrsFKj_Bco/edit?usp=sharing) - [Support delete/search by ID, attribute filtering, ID de-duplication](https://docs.google.com/document/d/1CDKdTj_DnE90YaZrPgsMaphqOTkMdbKETNrsFKj_Bco/edit?usp=sharing)
- [Support write-ahead logging](https://docs.google.com/document/d/12N8RC_wJb2dvEKY9jrlh8hU_eH8jxQVBewoPuHNqcXE/edit?usp=sharing) - [Support write-ahead logging](https://docs.google.com/document/d/12N8RC_wJb2dvEKY9jrlh8hU_eH8jxQVBewoPuHNqcXE/edit?usp=sharing)
- [Support in-service config modification](https://docs.google.com/document/d/1pK1joWJgAHM5nVp3q005iLbLqU5bn9InWeBy0mRAoSg/edit?usp=sharing) - [Support in-service config modification](https://docs.google.com/document/d/1pK1joWJgAHM5nVp3q005iLbLqU5bn9InWeBy0mRAoSg/edit?usp=sharing)
- [Support Multi-Storage](https://docs.google.com/document/d/1iwwLH4Jtm3OXIVb7jFYsfmcbOyX6AWZKaNJAaXC7-cw/edit?usp=sharing)
- [Support AVX-512](https://docs.google.com/document/d/1do6_JgRCYdcV95sTPE6rLoiBK8wAcZki5Ypp7jbgqK0/edit?usp=sharing)
- [Refactor Knowhere](https://docs.google.com/document/d/1HY27EXV4UjJhDEmJ9t4Rjh7I1sB8iJHvqvliM6HHLS8/edit?usp=sharing)
...@@ -110,10 +110,10 @@ class ExecutionEngine { ...@@ -110,10 +110,10 @@ class ExecutionEngine {
// Merge(const std::string& location) = 0; // Merge(const std::string& location) = 0;
virtual Status virtual Status
GetVectorByID(const int64_t& id, float* vector, bool hybrid) = 0; GetVectorByID(const int64_t id, float* vector, bool hybrid) = 0;
virtual Status virtual Status
GetVectorByID(const int64_t& id, uint8_t* vector, bool hybrid) = 0; GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) = 0;
virtual Status virtual Status
ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset, ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset,
......
...@@ -1213,7 +1213,7 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil ...@@ -1213,7 +1213,7 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
} }
Status Status
ExecutionEngineImpl::GetVectorByID(const int64_t& id, float* vector, bool hybrid) { ExecutionEngineImpl::GetVectorByID(const int64_t id, float* vector, bool hybrid) {
if (index_ == nullptr) { if (index_ == nullptr) {
LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search"; LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
return Status(DB_ERROR, "index is null"); return Status(DB_ERROR, "index is null");
...@@ -1238,7 +1238,7 @@ ExecutionEngineImpl::GetVectorByID(const int64_t& id, float* vector, bool hybrid ...@@ -1238,7 +1238,7 @@ ExecutionEngineImpl::GetVectorByID(const int64_t& id, float* vector, bool hybrid
} }
Status Status
ExecutionEngineImpl::GetVectorByID(const int64_t& id, uint8_t* vector, bool hybrid) { ExecutionEngineImpl::GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) {
if (index_ == nullptr) { if (index_ == nullptr) {
LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search"; LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
return Status(DB_ERROR, "index is null"); return Status(DB_ERROR, "index is null");
......
...@@ -64,10 +64,10 @@ class ExecutionEngineImpl : public ExecutionEngine { ...@@ -64,10 +64,10 @@ class ExecutionEngineImpl : public ExecutionEngine {
CopyToCpu() override; CopyToCpu() override;
Status Status
GetVectorByID(const int64_t& id, float* vector, bool hybrid) override; GetVectorByID(const int64_t id, float* vector, bool hybrid) override;
Status Status
GetVectorByID(const int64_t& id, uint8_t* vector, bool hybrid) override; GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) override;
Status Status
ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset, ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset,
......
...@@ -70,8 +70,8 @@ KnowhereResource::Initialize() { ...@@ -70,8 +70,8 @@ KnowhereResource::Initialize() {
return Status::OK(); return Status::OK();
struct GpuResourceSetting { struct GpuResourceSetting {
int64_t pinned_memory = 300 * M_BYTE; int64_t pinned_memory = 256 * M_BYTE;
int64_t temp_memory = 300 * M_BYTE; int64_t temp_memory = 256 * M_BYTE;
int64_t resource_num = 2; int64_t resource_num = 2;
}; };
using GpuResourcesArray = std::map<int64_t, GpuResourceSetting>; using GpuResourcesArray = std::map<int64_t, GpuResourceSetting>;
......
...@@ -27,7 +27,7 @@ class NsgIndex; ...@@ -27,7 +27,7 @@ class NsgIndex;
class NSG : public VecIndex { class NSG : public VecIndex {
public: public:
explicit NSG(const int64_t& gpu_num = -1) : gpu_(gpu_num) { explicit NSG(const int64_t gpu_num = -1) : gpu_(gpu_num) {
if (gpu_ >= 0) { if (gpu_ >= 0) {
index_mode_ = IndexMode::MODE_GPU; index_mode_ = IndexMode::MODE_GPU;
} }
......
...@@ -25,7 +25,7 @@ using Graph = std::vector<std::vector<int64_t>>; ...@@ -25,7 +25,7 @@ using Graph = std::vector<std::vector<int64_t>>;
class GPUIDMAP : public IDMAP, public GPUIndex { class GPUIDMAP : public IDMAP, public GPUIndex {
public: public:
explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& res) explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& res)
: IDMAP(std::move(index)), GPUIndex(device_id, res) { : IDMAP(std::move(index)), GPUIndex(device_id, res) {
index_mode_ = IndexMode::MODE_GPU; index_mode_ = IndexMode::MODE_GPU;
} }
......
...@@ -46,7 +46,7 @@ class IVFSQHybrid : public GPUIVFSQ { ...@@ -46,7 +46,7 @@ class IVFSQHybrid : public GPUIVFSQ {
gpu_mode_ = 0; gpu_mode_ = 0;
} }
explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& resource) explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& resource)
: GPUIVFSQ(index, device_id, resource) { : GPUIVFSQ(index, device_id, resource) {
index_type_ = IndexEnum::INDEX_FAISS_IVFSQ8H; index_type_ = IndexEnum::INDEX_FAISS_IVFSQ8H;
gpu_mode_ = 2; gpu_mode_ = 2;
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
namespace milvus { namespace milvus {
namespace knowhere { namespace knowhere {
constexpr int64_t MB = 1LL << 20;
FaissGpuResourceMgr& FaissGpuResourceMgr&
FaissGpuResourceMgr::GetInstance() { FaissGpuResourceMgr::GetInstance() {
static FaissGpuResourceMgr instance; static FaissGpuResourceMgr instance;
...@@ -45,8 +47,8 @@ FaissGpuResourceMgr::InitDevice(int64_t device_id, int64_t pin_mem_size, int64_t ...@@ -45,8 +47,8 @@ FaissGpuResourceMgr::InitDevice(int64_t device_id, int64_t pin_mem_size, int64_t
params.resource_num = res_num; params.resource_num = res_num;
devices_params_.emplace(device_id, params); devices_params_.emplace(device_id, params);
LOG_KNOWHERE_DEBUG_ << "DEVICEID " << device_id << ", pin_mem_size " << pin_mem_size << ", temp_mem_size " LOG_KNOWHERE_DEBUG_ << "DEVICEID " << device_id << ", pin_mem_size " << pin_mem_size / MB << "MB, temp_mem_size "
<< temp_mem_size << ", resource count " << res_num; << temp_mem_size / MB << "MB, resource count " << res_num;
} }
void void
......
...@@ -87,19 +87,19 @@ class FaissGpuResourceMgr { ...@@ -87,19 +87,19 @@ class FaissGpuResourceMgr {
class ResScope { class ResScope {
public: public:
ResScope(ResPtr& res, const int64_t& device_id, const bool& isown) ResScope(ResPtr& res, const int64_t device_id, const bool isown)
: resource(res), device_id(device_id), move(true), own(isown) { : resource(res), device_id(device_id), move(true), own(isown) {
Lock(); Lock();
} }
ResScope(ResWPtr& res, const int64_t& device_id, const bool& isown) ResScope(ResWPtr& res, const int64_t device_id, const bool isown)
: resource(res), device_id(device_id), move(true), own(isown) { : resource(res), device_id(device_id), move(true), own(isown) {
Lock(); Lock();
} }
// specif for search // specif for search
// get the ownership of gpuresource and gpu // get the ownership of gpuresource and gpu
ResScope(ResWPtr& res, const int64_t& device_id) : device_id(device_id), move(false), own(true) { ResScope(ResWPtr& res, const int64_t device_id) : device_id(device_id), move(false), own(true) {
resource = res.lock(); resource = res.lock();
Lock(); Lock();
} }
......
...@@ -184,7 +184,7 @@ Resource::loader_function() { ...@@ -184,7 +184,7 @@ Resource::loader_function() {
void void
Resource::executor_function() { Resource::executor_function() {
SetThreadName("taskexector_th"); SetThreadName("taskexecutor_th");
if (subscriber_) { if (subscriber_) {
auto event = std::make_shared<StartUpEvent>(shared_from_this()); auto event = std::make_shared<StartUpEvent>(shared_from_this());
subscriber_(std::static_pointer_cast<Event>(event)); subscriber_(std::static_pointer_cast<Event>(event));
......
...@@ -62,6 +62,15 @@ class Path { ...@@ -62,6 +62,15 @@ class Path {
} }
} }
std::string
ToString() {
std::string str = path_[index_];
for (int64_t i = index_; i > 0; i--) {
str += "->" + path_[i - 1];
}
return str;
}
public: public:
std::string& operator[](uint64_t index) { std::string& operator[](uint64_t index) {
return path_[index]; return path_[index];
......
...@@ -35,14 +35,13 @@ class SpecResLabel : public TaskLabel { ...@@ -35,14 +35,13 @@ class SpecResLabel : public TaskLabel {
return resource_; return resource_;
} }
inline std::string& inline std::string
resource_name() { name() const override {
return resource_name_; return resource_.lock()->name();
} }
private: private:
ResourceWPtr resource_; ResourceWPtr resource_;
std::string resource_name_;
}; };
using SpecResLabelPtr = std::shared_ptr<SpecResLabel>(); using SpecResLabelPtr = std::shared_ptr<SpecResLabel>();
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string>
namespace milvus { namespace milvus {
namespace scheduler { namespace scheduler {
...@@ -28,6 +29,11 @@ class TaskLabel { ...@@ -28,6 +29,11 @@ class TaskLabel {
return type_; return type_;
} }
virtual inline std::string
name() const {
return "";
}
protected: protected:
explicit TaskLabel(TaskLabelType type) : type_(type) { explicit TaskLabel(TaskLabelType type) : type_(type) {
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册