diff --git a/cpp/src/db/DB.cpp b/cpp/src/db/DB.cpp index 610e353358078496c685d1be27843fcf9d3b622e..bfb83a446c061ebc3dd6dbb97f2249a953a4f071 100644 --- a/cpp/src/db/DB.cpp +++ b/cpp/src/db/DB.cpp @@ -16,15 +16,7 @@ namespace engine { DB::~DB() {} void DB::Open(const Options& options, DB** dbptr) { - *dbptr = nullptr; - -#ifdef GPU_VERSION - std::string default_index_type{"Faiss,IVF"}; -#else - std::string default_index_type{"Faiss,IDMap"}; -#endif - - *dbptr = DBFactory::Build(options, default_index_type); + *dbptr = DBFactory::Build(options); return; } diff --git a/cpp/src/db/DBImpl.inl b/cpp/src/db/DBImpl.cpp similarity index 75% rename from cpp/src/db/DBImpl.inl rename to cpp/src/db/DBImpl.cpp index 10f16ee9eb77dd6a441c27ee810b2df0807c6792..ea380adff8419b7ce28e24f90a9022d008dedfe0 100644 --- a/cpp/src/db/DBImpl.inl +++ b/cpp/src/db/DBImpl.cpp @@ -3,11 +3,10 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -#pragma once - #include "DBImpl.h" #include "DBMetaImpl.h" #include "Env.h" +#include "EngineFactory.h" #include "metrics/Metrics.h" #include "scheduler/SearchScheduler.h" @@ -23,80 +22,107 @@ namespace zilliz { namespace vecwise { namespace engine { +namespace { + +void CollectInsertMetrics(double total_time, size_t n, bool succeed) { + double avg_time = total_time / n; + for (int i = 0; i < n; ++i) { + server::Metrics::GetInstance().AddVectorsDurationHistogramOberve(avg_time); + } + +// server::Metrics::GetInstance().add_vector_duration_seconds_quantiles().Observe((average_time)); + if (succeed) { + server::Metrics::GetInstance().AddVectorsSuccessTotalIncrement(n); + server::Metrics::GetInstance().AddVectorsSuccessGaugeSet(n); + } + else { + server::Metrics::GetInstance().AddVectorsFailTotalIncrement(n); + server::Metrics::GetInstance().AddVectorsFailGaugeSet(n); + } +} + +void CollectQueryMetrics(double total_time, size_t nq) { + for (int i = 0; i < nq; ++i) { + server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); + } + auto average_time = total_time / nq; + server::Metrics::GetInstance().QueryVectorResponseSummaryObserve(average_time, nq); + server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); +} + +void CollectFileMetrics(meta::TableFileSchema::FILE_TYPE file_type, size_t file_size, double total_time) { + switch(file_type) { + case meta::TableFileSchema::RAW: + case meta::TableFileSchema::TO_INDEX: { + server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); + server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size); + server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size); + server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size); + break; + } + default: { + server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); + server::Metrics::GetInstance().IndexFileSizeHistogramObserve(file_size); + server::Metrics::GetInstance().IndexFileSizeTotalIncrement(file_size); + server::Metrics::GetInstance().IndexFileSizeGaugeSet(file_size); + break; + } + } +} + +} -template -DBImpl::DBImpl(const Options& options) + +DBImpl::DBImpl(const Options& options) : env_(options.env), options_(options), bg_compaction_scheduled_(false), shutting_down_(false), bg_build_index_started_(false), pMeta_(new meta::DBMetaImpl(options_.meta)), - pMemMgr_(new MemManager(pMeta_, options_)) { + pMemMgr_(new MemManager(pMeta_, options_)) { StartTimerTasks(options_.memory_sync_interval); } -template -Status DBImpl::CreateTable(meta::TableSchema& table_schema) { +Status DBImpl::CreateTable(meta::TableSchema& table_schema) { return pMeta_->CreateTable(table_schema); } -template -Status DBImpl::DescribeTable(meta::TableSchema& table_schema) { +Status DBImpl::DescribeTable(meta::TableSchema& table_schema) { return pMeta_->DescribeTable(table_schema); } -template -Status DBImpl::HasTable(const std::string& table_id, bool& has_or_not) { +Status DBImpl::HasTable(const std::string& table_id, bool& has_or_not) { return pMeta_->HasTable(table_id, has_or_not); } -template -Status DBImpl::InsertVectors(const std::string& table_id_, +Status DBImpl::InsertVectors(const std::string& table_id_, size_t n, const float* vectors, IDNumbers& vector_ids_) { auto start_time = METRICS_NOW_TIME; Status status = pMemMgr_->InsertVectors(table_id_, n, vectors, vector_ids_); auto end_time = METRICS_NOW_TIME; - + double total_time = METRICS_MICROSECONDS(start_time,end_time); // std::chrono::microseconds time_span = std::chrono::duration_cast(end_time - start_time); // double average_time = double(time_span.count()) / n; - double total_time = METRICS_MICROSECONDS(start_time,end_time); - double avg_time = total_time / n; - for (int i = 0; i < n; ++i) { - server::Metrics::GetInstance().AddVectorsDurationHistogramOberve(avg_time); - } - -// server::Metrics::GetInstance().add_vector_duration_seconds_quantiles().Observe((average_time)); - if (!status.ok()) { - server::Metrics::GetInstance().AddVectorsFailTotalIncrement(n); - server::Metrics::GetInstance().AddVectorsFailGaugeSet(n); - return status; - } - server::Metrics::GetInstance().AddVectorsSuccessTotalIncrement(n); - server::Metrics::GetInstance().AddVectorsSuccessGaugeSet(n); + CollectInsertMetrics(total_time, n, status.ok()); + return status; } -template -Status DBImpl::Query(const std::string &table_id, size_t k, size_t nq, +Status DBImpl::Query(const std::string &table_id, size_t k, size_t nq, const float *vectors, QueryResults &results) { auto start_time = METRICS_NOW_TIME; meta::DatesT dates = {meta::Meta::GetDate()}; Status result = Query(table_id, k, nq, vectors, dates, results); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time,end_time); - auto average_time = total_time / nq; - for (int i = 0; i < nq; ++i) { - server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); - } - server::Metrics::GetInstance().QueryVectorResponseSummaryObserve(average_time, nq); - server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); + + CollectQueryMetrics(total_time, nq); return result; } -template -Status DBImpl::Query(const std::string& table_id, size_t k, size_t nq, +Status DBImpl::Query(const std::string& table_id, size_t k, size_t nq, const float* vectors, const meta::DatesT& dates, QueryResults& results) { #if 0 return QuerySync(table_id, k, nq, vectors, dates, results); @@ -105,8 +131,7 @@ Status DBImpl::Query(const std::string& table_id, size_t k, size_t nq, #endif } -template -Status DBImpl::QuerySync(const std::string& table_id, size_t k, size_t nq, +Status DBImpl::QuerySync(const std::string& table_id, size_t k, size_t nq, const float* vectors, const meta::DatesT& dates, QueryResults& results) { meta::DatePartionedTableFilesSchema files; auto status = pMeta_->FilesToSearch(table_id, dates, files); @@ -160,38 +185,20 @@ Status DBImpl::QuerySync(const std::string& table_id, size_t k, size_t auto search_in_index = [&](meta::TableFilesSchema& file_vec) -> void { for (auto &file : file_vec) { - EngineT index(file.dimension, file.location); - index.Load(); - auto file_size = index.PhysicalSize()/(1024*1024); + ExecutionEnginePtr index = EngineFactory::Build(file.dimension, file.location, (EngineType)file.engine_type_); + index->Load(); + auto file_size = index->PhysicalSize(); search_set_size += file_size; LOG(DEBUG) << "Search file_type " << file.file_type << " Of Size: " - << file_size << " M"; + << file_size/(1024*1024) << " M"; - int inner_k = index.Count() < k ? index.Count() : k; + int inner_k = index->Count() < k ? index->Count() : k; auto start_time = METRICS_NOW_TIME; - index.Search(nq, vectors, inner_k, output_distence, output_ids); + index->Search(nq, vectors, inner_k, output_distence, output_ids); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - if(file.file_type == meta::TableFileSchema::RAW) { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size*1024*1024); - server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size*1024*1024); - server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size*1024*1024); - - } else if(file.file_type == meta::TableFileSchema::TO_INDEX) { - - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size*1024*1024); - server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size*1024*1024); - server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size*1024*1024); - - } else { - server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().IndexFileSizeHistogramObserve(file_size*1024*1024); - server::Metrics::GetInstance().IndexFileSizeTotalIncrement(file_size*1024*1024); - server::Metrics::GetInstance().IndexFileSizeGaugeSet(file_size*1024*1024); - } + CollectFileMetrics((meta::TableFileSchema::FILE_TYPE)file.file_type, file_size, total_time); cluster(output_ids, output_distence, inner_k); // cluster to each query memset(output_distence, 0, k * nq * sizeof(float)); memset(output_ids, 0, k * nq * sizeof(long)); @@ -258,8 +265,7 @@ Status DBImpl::QuerySync(const std::string& table_id, size_t k, size_t return Status::OK(); } -template -Status DBImpl::QueryAsync(const std::string& table_id, size_t k, size_t nq, +Status DBImpl::QueryAsync(const std::string& table_id, size_t k, size_t nq, const float* vectors, const meta::DatesT& dates, QueryResults& results) { meta::DatePartionedTableFilesSchema files; auto status = pMeta_->FilesToSearch(table_id, dates, files); @@ -292,13 +298,12 @@ Status DBImpl::QueryAsync(const std::string& table_id, size_t k, size_t return Status::OK(); } -template -void DBImpl::StartTimerTasks(int interval) { - bg_timer_thread_ = std::thread(&DBImpl::BackgroundTimerTask, this, interval); +void DBImpl::StartTimerTasks(int interval) { + bg_timer_thread_ = std::thread(&DBImpl::BackgroundTimerTask, this, interval); } -template -void DBImpl::BackgroundTimerTask(int interval) { + +void DBImpl::BackgroundTimerTask(int interval) { Status status; while (true) { if (!bg_error_.ok()) break; @@ -315,22 +320,19 @@ void DBImpl::BackgroundTimerTask(int interval) { } } -template -void DBImpl::TrySchedule() { +void DBImpl::TrySchedule() { if (bg_compaction_scheduled_) return; if (!bg_error_.ok()) return; bg_compaction_scheduled_ = true; - env_->Schedule(&DBImpl::BGWork, this); + env_->Schedule(&DBImpl::BGWork, this); } -template -void DBImpl::BGWork(void* db_) { +void DBImpl::BGWork(void* db_) { reinterpret_cast(db_)->BackgroundCall(); } -template -void DBImpl::BackgroundCall() { +void DBImpl::BackgroundCall() { std::lock_guard lock(mutex_); assert(bg_compaction_scheduled_); @@ -343,9 +345,7 @@ void DBImpl::BackgroundCall() { bg_work_finish_signal_.notify_all(); } - -template -Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, +Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, const meta::TableFilesSchema& files) { meta::TableFileSchema table_file; table_file.table_id = table_id; @@ -357,7 +357,7 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::Date return status; } - EngineT index(table_file.dimension, table_file.location); + ExecutionEnginePtr index = EngineFactory::Build(table_file.dimension, table_file.location, (EngineType)table_file.engine_type_); meta::TableFilesSchema updated; long index_size = 0; @@ -365,7 +365,7 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::Date for (auto& file : files) { auto start_time = METRICS_NOW_TIME; - index.Merge(file.location); + index->Merge(file.location); auto file_schema = file; auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time,end_time); @@ -374,13 +374,13 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::Date file_schema.file_type = meta::TableFileSchema::TO_DELETE; updated.push_back(file_schema); LOG(DEBUG) << "Merging file " << file_schema.file_id; - index_size = index.Size(); + index_size = index->Size(); if (index_size >= options_.index_trigger_size) break; } - index.Serialize(); + index->Serialize(); if (index_size >= options_.index_trigger_size) { table_file.file_type = meta::TableFileSchema::TO_INDEX; @@ -391,15 +391,14 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::Date updated.push_back(table_file); status = pMeta_->UpdateTableFiles(updated); LOG(DEBUG) << "New merged file " << table_file.file_id << - " of size=" << index.PhysicalSize()/(1024*1024) << " M"; + " of size=" << index->PhysicalSize()/(1024*1024) << " M"; - index.Cache(); + index->Cache(); return status; } -template -Status DBImpl::BackgroundMergeFiles(const std::string& table_id) { +Status DBImpl::BackgroundMergeFiles(const std::string& table_id) { meta::DatePartionedTableFilesSchema raw_files; auto status = pMeta_->FilesToMerge(table_id, raw_files); if (!status.ok()) { @@ -426,8 +425,7 @@ Status DBImpl::BackgroundMergeFiles(const std::string& table_id) { return Status::OK(); } -template -Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { +Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { meta::TableFileSchema table_file; table_file.table_id = file.table_id; table_file.date = file.date; @@ -436,11 +434,11 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { return status; } - EngineT to_index(file.dimension, file.location); + ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension, file.location, (EngineType)file.engine_type_); - to_index.Load(); + to_index->Load(); auto start_time = METRICS_NOW_TIME; - auto index = to_index.BuildIndex(table_file.location); + auto index = to_index->BuildIndex(table_file.location); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time); @@ -464,8 +462,7 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { return Status::OK(); } -template -void DBImpl::BackgroundBuildIndex() { +void DBImpl::BackgroundBuildIndex() { std::lock_guard lock(build_index_mutex_); assert(bg_build_index_started_); meta::TableFilesSchema to_index_files; @@ -485,18 +482,16 @@ void DBImpl::BackgroundBuildIndex() { bg_build_index_finish_signal_.notify_all(); } -template -Status DBImpl::TryBuildIndex() { +Status DBImpl::TryBuildIndex() { if (bg_build_index_started_) return Status::OK(); if (shutting_down_.load(std::memory_order_acquire)) return Status::OK(); bg_build_index_started_ = true; - std::thread build_index_task(&DBImpl::BackgroundBuildIndex, this); + std::thread build_index_task(&DBImpl::BackgroundBuildIndex, this); build_index_task.detach(); return Status::OK(); } -template -void DBImpl::BackgroundCompaction() { +void DBImpl::BackgroundCompaction() { std::vector table_ids; pMemMgr_->Serialize(table_ids); @@ -510,18 +505,15 @@ void DBImpl::BackgroundCompaction() { } } -template -Status DBImpl::DropAll() { +Status DBImpl::DropAll() { return pMeta_->DropAll(); } -template -Status DBImpl::Size(long& result) { +Status DBImpl::Size(long& result) { return pMeta_->Size(result); } -template -DBImpl::~DBImpl() { +DBImpl::~DBImpl() { { std::unique_lock lock(mutex_); shutting_down_.store(true, std::memory_order_release); diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index b67aa8a0a9ac54176d85150783e509d8202648de..4f9ab54cfbebd4e64e72a262e8fb3ba9bb9d0173 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -8,12 +8,12 @@ #include "DB.h" #include "MemManager.h" #include "Types.h" -#include "Traits.h" #include #include #include #include +#include namespace zilliz { namespace vecwise { @@ -25,11 +25,10 @@ namespace meta { class Meta; } -template class DBImpl : public DB { public: using MetaPtr = meta::Meta::Ptr; - using MemManagerPtr = typename MemManager::Ptr; + using MemManagerPtr = typename MemManager::Ptr; DBImpl(const Options& options); @@ -100,5 +99,3 @@ private: } // namespace engine } // namespace vecwise } // namespace zilliz - -#include "DBImpl.inl" diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index ea325c0c54c6227a2591135d70163944387c4a36..9eab7b07aa2c8d3df7fd7e4c19bcf89beef1d589 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -7,6 +7,7 @@ #include "IDGenerator.h" #include "Utils.h" #include "MetaConsts.h" +#include "Factories.h" #include "metrics/Metrics.h" #include @@ -33,10 +34,12 @@ inline auto StoragePrototype(const std::string &path) { make_column("table_id", &TableSchema::table_id, unique()), make_column("dimension", &TableSchema::dimension), make_column("created_on", &TableSchema::created_on), - make_column("files_cnt", &TableSchema::files_cnt, default_value(0))), + make_column("files_cnt", &TableSchema::files_cnt, default_value(0)), + make_column("engine_type", &TableSchema::engine_type_)), make_table("TableFile", make_column("id", &TableFileSchema::id, primary_key()), make_column("table_id", &TableFileSchema::table_id), + make_column("engine_type", &TableFileSchema::engine_type_), make_column("file_id", &TableFileSchema::file_id), make_column("file_type", &TableFileSchema::file_type), make_column("size", &TableFileSchema::size, default_value(0)), diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a9b0bc744d4d21558195e0a1c6b56fea3baa20b5 --- /dev/null +++ b/cpp/src/db/EngineFactory.cpp @@ -0,0 +1,31 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#include "EngineFactory.h" +#include "FaissExecutionEngine.h" +#include "Log.h" + +namespace zilliz { +namespace vecwise { +namespace engine { + +ExecutionEnginePtr +EngineFactory::Build(uint16_t dimension, + const std::string& location, + EngineType type) { + switch(type) { + case EngineType::FAISS_IDMAP: + return ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IDMap", "IDMap,Flat")); + case EngineType::FAISS_IVFFLAT: + return ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat")); + default: + ENGINE_LOG_ERROR << "Unsupportted engine type"; + return nullptr; + } +} + +} +} +} \ No newline at end of file diff --git a/cpp/src/db/Traits.h b/cpp/src/db/EngineFactory.h similarity index 61% rename from cpp/src/db/Traits.h rename to cpp/src/db/EngineFactory.h index 1740bc87ed6e821f36b0add1a282ba3a62b5b591..fe984d70cd52b80c36ecfeecb15669888c132f15 100644 --- a/cpp/src/db/Traits.h +++ b/cpp/src/db/EngineFactory.h @@ -5,22 +5,20 @@ ******************************************************************************/ #pragma once +#include "Status.h" +#include "ExecutionEngine.h" namespace zilliz { namespace vecwise { namespace engine { -struct IVFIndexTrait { - static const char* BuildIndexType; - static const char* RawIndexType; +class EngineFactory { +public: + static ExecutionEnginePtr Build(uint16_t dimension, + const std::string& location, + EngineType type); }; -struct IDMapIndexTrait { - static const char* BuildIndexType; - static const char* RawIndexType; -}; - - -} // namespace engine -} // namespace vecwise -} // namespace zilliz +} +} +} diff --git a/cpp/src/db/ExecutionEngine.cpp b/cpp/src/db/ExecutionEngine.cpp index 6eb04bb788e17dc403c49975c760a369efb9b8d9..54349b7b69ff436b1677560f50a378f25eff76d5 100644 --- a/cpp/src/db/ExecutionEngine.cpp +++ b/cpp/src/db/ExecutionEngine.cpp @@ -11,8 +11,7 @@ namespace zilliz { namespace vecwise { namespace engine { -template -Status ExecutionEngine::AddWithIds(const std::vector& vectors, const std::vector& vector_ids) { +Status ExecutionEngine::AddWithIds(const std::vector& vectors, const std::vector& vector_ids) { long n1 = (long)vectors.size(); long n2 = (long)vector_ids.size(); if (n1 != n2) { @@ -22,60 +21,6 @@ Status ExecutionEngine::AddWithIds(const std::vector& vectors, c return AddWithIds(n1, vectors.data(), vector_ids.data()); } -template -Status ExecutionEngine::AddWithIds(long n, const float *xdata, const long *xids) { - return static_cast(this)->AddWithIds(n, xdata, xids); -} - -template -size_t ExecutionEngine::Count() const { - return static_cast(this)->Count(); -} - -template -size_t ExecutionEngine::Size() const { - return static_cast(this)->Size(); -} - -template -size_t ExecutionEngine::PhysicalSize() const { - return static_cast(this)->PhysicalSize(); -} - -template -Status ExecutionEngine::Serialize() { - return static_cast(this)->Serialize(); -} - -template -Status ExecutionEngine::Load() { - return static_cast(this)->Load(); -} - -template -Status ExecutionEngine::Merge(const std::string& location) { - return static_cast(this)->Merge(location); -} - -template -Status ExecutionEngine::Search(long n, - const float *data, - long k, - float *distances, - long *labels) const { - return static_cast(this)->Search(n, data, k, distances, labels); -} - -template -Status ExecutionEngine::Cache() { - return static_cast(this)->Cache(); -} - -template -std::shared_ptr ExecutionEngine::BuildIndex(const std::string& location) { - return static_cast(this)->BuildIndex(location); -} - } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/ExecutionEngine.h b/cpp/src/db/ExecutionEngine.h index 6ad91959e8fe8c95a5f8e0d32ef2b098525b993b..0773e4a837db8ef335d307b9b27f98a8f7bf80eb 100644 --- a/cpp/src/db/ExecutionEngine.h +++ b/cpp/src/db/ExecutionEngine.h @@ -14,38 +14,44 @@ namespace zilliz { namespace vecwise { namespace engine { -template +enum class EngineType { + FAISS_IDMAP = 1, + FAISS_IVFFLAT, +}; + class ExecutionEngine { public: - Status AddWithIds(const std::vector& vectors, - const std::vector& vector_ids); + virtual Status AddWithIds(const std::vector& vectors, + const std::vector& vector_ids); - Status AddWithIds(long n, const float *xdata, const long *xids); + virtual Status AddWithIds(long n, const float *xdata, const long *xids) = 0; - size_t Count() const; + virtual size_t Count() const = 0; - size_t Size() const; + virtual size_t Size() const = 0; - size_t PhysicalSize() const; + virtual size_t PhysicalSize() const = 0; - Status Serialize(); + virtual Status Serialize() = 0; - Status Load(); + virtual Status Load() = 0; - Status Merge(const std::string& location); + virtual Status Merge(const std::string& location) = 0; - Status Search(long n, + virtual Status Search(long n, const float *data, long k, float *distances, - long *labels) const; + long *labels) const = 0; - std::shared_ptr BuildIndex(const std::string&); + virtual std::shared_ptr BuildIndex(const std::string&) = 0; - Status Cache(); + virtual Status Cache() = 0; }; +using ExecutionEnginePtr = std::shared_ptr; + } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index 191d3dbae5f04ca722143a71e5b274b2534389c9..bbaa17343b8738cfd306c19dd1d86f619671676c 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -5,8 +5,6 @@ //////////////////////////////////////////////////////////////////////////////// #include "Factories.h" #include "DBImpl.h" -#include "FaissExecutionEngine.h" -#include "Traits.h" #include #include @@ -45,28 +43,14 @@ std::shared_ptr DBMetaImplFactory::Build() { return std::shared_ptr(new meta::DBMetaImpl(options)); } -std::shared_ptr DBFactory::Build(const std::string& db_type) { +std::shared_ptr DBFactory::Build() { auto options = OptionsFactory::Build(); - auto db = DBFactory::Build(options, db_type); + auto db = DBFactory::Build(options); return std::shared_ptr(db); } -DB* DBFactory::Build(const Options& options, const std::string& db_type) { - std::stringstream ss(db_type); - std::string token; - std::vector tokens; - while (std::getline(ss, token, ',')) { - tokens.push_back(token); - } - - assert(tokens.size()==2); - assert(tokens[0]=="Faiss"); - if (tokens[1] == "IVF") { - return new DBImpl>(options); - } else if (tokens[1] == "IDMap") { - return new DBImpl>(options); - } - return nullptr; +DB* DBFactory::Build(const Options& options) { + return new DBImpl(options); } } // namespace engine diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 0e9ab71bb12edb5719288875be52a23ab43a3e02..9bcd3ffb6776f28b97b8727aa9ffe9ad1c83a153 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -8,6 +8,7 @@ #include "DB.h" #include "DBMetaImpl.h" #include "Options.h" +#include "ExecutionEngine.h" #include #include @@ -29,8 +30,8 @@ struct DBMetaImplFactory { }; struct DBFactory { - static std::shared_ptr Build(const std::string& db_type = "Faiss,IVF"); - static DB* Build(const Options&, const std::string& db_type = "Faiss,IVF"); + static std::shared_ptr Build(); + static DB* Build(const Options&); }; } // namespace engine diff --git a/cpp/src/db/FaissExecutionEngine.inl b/cpp/src/db/FaissExecutionEngine.cpp similarity index 69% rename from cpp/src/db/FaissExecutionEngine.inl rename to cpp/src/db/FaissExecutionEngine.cpp index 8dd701d54df474333197b17da3201e1b5736e143..a338ddf5cbb910f0f43febe737f59fc130cec582 100644 --- a/cpp/src/db/FaissExecutionEngine.inl +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -3,8 +3,6 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -#pragma once - #include "FaissExecutionEngine.h" #include @@ -23,47 +21,49 @@ namespace vecwise { namespace engine { -template -FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension, const std::string& location) - : pIndex_(faiss::index_factory(dimension, IndexTrait::RawIndexType)), - location_(location) { +FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension, + const std::string& location, + const std::string& build_index_type, + const std::string& raw_index_type) + : pIndex_(faiss::index_factory(dimension, raw_index_type.c_str())), + location_(location), + build_index_type_(build_index_type), + raw_index_type_(raw_index_type) { } -template -FaissExecutionEngine::FaissExecutionEngine(std::shared_ptr index, const std::string& location) +FaissExecutionEngine::FaissExecutionEngine(std::shared_ptr index, + const std::string& location, + const std::string& build_index_type, + const std::string& raw_index_type) : pIndex_(index), - location_(location) { + location_(location), + build_index_type_(build_index_type), + raw_index_type_(raw_index_type) { } -template -Status FaissExecutionEngine::AddWithIds(long n, const float *xdata, const long *xids) { +Status FaissExecutionEngine::AddWithIds(long n, const float *xdata, const long *xids) { pIndex_->add_with_ids(n, xdata, xids); return Status::OK(); } -template -size_t FaissExecutionEngine::Count() const { +size_t FaissExecutionEngine::Count() const { return (size_t)(pIndex_->ntotal); } -template -size_t FaissExecutionEngine::Size() const { +size_t FaissExecutionEngine::Size() const { return (size_t)(Count() * pIndex_->d)*sizeof(float); } -template -size_t FaissExecutionEngine::PhysicalSize() const { +size_t FaissExecutionEngine::PhysicalSize() const { return (size_t)(Count() * pIndex_->d)*sizeof(float); } -template -Status FaissExecutionEngine::Serialize() { +Status FaissExecutionEngine::Serialize() { write_index(pIndex_.get(), location_.c_str()); return Status::OK(); } -template -Status FaissExecutionEngine::Load() { +Status FaissExecutionEngine::Load() { auto index = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); bool to_cache = false; auto start_time = METRICS_NOW_TIME; @@ -90,8 +90,7 @@ Status FaissExecutionEngine::Load() { return Status::OK(); } -template -Status FaissExecutionEngine::Merge(const std::string& location) { +Status FaissExecutionEngine::Merge(const std::string& location) { if (location == location_) { return Status::Error("Cannot Merge Self"); } @@ -105,12 +104,11 @@ Status FaissExecutionEngine::Merge(const std::string& location) { return Status::OK(); } -template -typename FaissExecutionEngine::Ptr -FaissExecutionEngine::BuildIndex(const std::string& location) { +ExecutionEnginePtr +FaissExecutionEngine::BuildIndex(const std::string& location) { auto opd = std::make_shared(); opd->d = pIndex_->d; - opd->index_type = IndexTrait::BuildIndexType; + opd->index_type = build_index_type_; IndexBuilderPtr pBuilder = GetIndexBuilder(opd); auto from_index = dynamic_cast(pIndex_.get()); @@ -119,13 +117,12 @@ FaissExecutionEngine::BuildIndex(const std::string& location) { dynamic_cast(from_index->index)->xb.data(), from_index->id_map.data()); - Ptr new_ee(new FaissExecutionEngine(index->data(), location)); + ExecutionEnginePtr new_ee(new FaissExecutionEngine(index->data(), location, build_index_type_, raw_index_type_)); new_ee->Serialize(); return new_ee; } -template -Status FaissExecutionEngine::Search(long n, +Status FaissExecutionEngine::Search(long n, const float *data, long k, float *distances, @@ -135,8 +132,7 @@ Status FaissExecutionEngine::Search(long n, return Status::OK(); } -template -Status FaissExecutionEngine::Cache() { +Status FaissExecutionEngine::Cache() { zilliz::vecwise::cache::CpuCacheMgr::GetInstance( )->InsertItem(location_, std::make_shared(pIndex_)); diff --git a/cpp/src/db/FaissExecutionEngine.h b/cpp/src/db/FaissExecutionEngine.h index 2915acc0ebc7325dc790f03441bc6b3e30c3be2f..d1981502b9e25a30ed6849b4f6c1f637e918fd9e 100644 --- a/cpp/src/db/FaissExecutionEngine.h +++ b/cpp/src/db/FaissExecutionEngine.h @@ -19,50 +19,52 @@ namespace vecwise { namespace engine { -template -class FaissExecutionEngine : public ExecutionEngine> { +class FaissExecutionEngine : public ExecutionEngine { public: - using Ptr = std::shared_ptr>; - FaissExecutionEngine(uint16_t dimension, const std::string& location); - FaissExecutionEngine(std::shared_ptr index, const std::string& location); + FaissExecutionEngine(uint16_t dimension, + const std::string& location, + const std::string& build_index_type, + const std::string& raw_index_type); - Status AddWithIds(const std::vector& vectors, - const std::vector& vector_ids); + FaissExecutionEngine(std::shared_ptr index, + const std::string& location, + const std::string& build_index_type, + const std::string& raw_index_type); - Status AddWithIds(long n, const float *xdata, const long *xids); + Status AddWithIds(long n, const float *xdata, const long *xids) override; - size_t Count() const; + size_t Count() const override; - size_t Size() const; + size_t Size() const override; - size_t PhysicalSize() const; + size_t PhysicalSize() const override; - Status Serialize(); + Status Serialize() override; - Status Load(); + Status Load() override; - Status Merge(const std::string& location); + Status Merge(const std::string& location) override; Status Search(long n, const float *data, long k, float *distances, - long *labels) const; + long *labels) const override; - Ptr BuildIndex(const std::string&); + ExecutionEnginePtr BuildIndex(const std::string&) override; - Status Cache(); + Status Cache() override; protected: - std::shared_ptr pIndex_; std::string location_; + + std::string build_index_type_; + std::string raw_index_type_; }; } // namespace engine } // namespace vecwise } // namespace zilliz - -#include "FaissExecutionEngine.inl" diff --git a/cpp/src/db/Log.h b/cpp/src/db/Log.h new file mode 100644 index 0000000000000000000000000000000000000000..1e938591a0697105f3a257bbc095159295744512 --- /dev/null +++ b/cpp/src/db/Log.h @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include + +namespace zilliz { +namespace vecwise { +namespace engine { + +#define ENGINE_DOMAIN_NAME "[ENGINE] " +#define ENGINE_ERROR_TEXT "ENGINE Error:" + +#define ENGINE_LOG_TRACE LOG(TRACE) << ENGINE_DOMAIN_NAME +#define ENGINE_LOG_DEBUG LOG(DEBUG) << ENGINE_DOMAIN_NAME +#define ENGINE_LOG_INFO LOG(INFO) << ENGINE_DOMAIN_NAME +#define ENGINE_LOG_WARNING LOG(WARNING) << ENGINE_DOMAIN_NAME +#define ENGINE_LOG_ERROR LOG(ERROR) << ENGINE_DOMAIN_NAME +#define ENGINE_LOG_FATAL LOG(FATAL) << ENGINE_DOMAIN_NAME + +} // namespace sql +} // namespace zilliz +} // namespace server diff --git a/cpp/src/db/MemManager.inl b/cpp/src/db/MemManager.cpp similarity index 73% rename from cpp/src/db/MemManager.inl rename to cpp/src/db/MemManager.cpp index 528622795de8f8c29227bdeded2232bf07ad2b4e..c8b9e0fd5ae2a9d8d8e85917a8c54c7f096900a3 100644 --- a/cpp/src/db/MemManager.inl +++ b/cpp/src/db/MemManager.cpp @@ -3,11 +3,10 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -#pragma once - #include "MemManager.h" #include "Meta.h" #include "MetaConsts.h" +#include "EngineFactory.h" #include "metrics/Metrics.h" #include @@ -19,34 +18,29 @@ namespace zilliz { namespace vecwise { namespace engine { -template -MemVectors::MemVectors(const std::shared_ptr& meta_ptr, +MemVectors::MemVectors(const std::shared_ptr& meta_ptr, const meta::TableFileSchema& schema, const Options& options) : pMeta_(meta_ptr), options_(options), schema_(schema), pIdGenerator_(new SimpleIDGenerator()), - pEE_(new EngineT(schema_.dimension, schema_.location)) { + pEE_(EngineFactory::Build(schema_.dimension, schema_.location, (EngineType)schema_.engine_type_)) { } -template -void MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { +void MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { pIdGenerator_->GetNextIDNumbers(n_, vector_ids_); pEE_->AddWithIds(n_, vectors_, vector_ids_.data()); } -template -size_t MemVectors::Total() const { +size_t MemVectors::Total() const { return pEE_->Count(); } -template -size_t MemVectors::ApproximateSize() const { +size_t MemVectors::ApproximateSize() const { return pEE_->Size(); } -template -Status MemVectors::Serialize(std::string& table_id) { +Status MemVectors::Serialize(std::string& table_id) { table_id = schema_.table_id; auto size = ApproximateSize(); auto start_time = METRICS_NOW_TIME; @@ -70,8 +64,7 @@ Status MemVectors::Serialize(std::string& table_id) { return status; } -template -MemVectors::~MemVectors() { +MemVectors::~MemVectors() { if (pIdGenerator_ != nullptr) { delete pIdGenerator_; pIdGenerator_ = nullptr; @@ -81,9 +74,7 @@ MemVectors::~MemVectors() { /* * MemManager */ - -template -typename MemManager::MemVectorsPtr MemManager::GetMemByTable( +MemManager::MemVectorsPtr MemManager::GetMemByTable( const std::string& table_id) { auto memIt = memMap_.find(table_id); if (memIt != memMap_.end()) { @@ -97,12 +88,11 @@ typename MemManager::MemVectorsPtr MemManager::GetMemByTable( return nullptr; } - memMap_[table_id] = MemVectorsPtr(new MemVectors(pMeta_, table_file, options_)); + memMap_[table_id] = MemVectorsPtr(new MemVectors(pMeta_, table_file, options_)); return memMap_[table_id]; } -template -Status MemManager::InsertVectors(const std::string& table_id_, +Status MemManager::InsertVectors(const std::string& table_id_, size_t n_, const float* vectors_, IDNumbers& vector_ids_) { @@ -110,8 +100,7 @@ Status MemManager::InsertVectors(const std::string& table_id_, return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); } -template -Status MemManager::InsertVectorsNoLock(const std::string& table_id, +Status MemManager::InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids) { @@ -124,8 +113,7 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, return Status::OK(); } -template -Status MemManager::ToImmutable() { +Status MemManager::ToImmutable() { std::unique_lock lock(mutex_); for (auto& kv: memMap_) { immMems_.push_back(kv.second); @@ -135,8 +123,7 @@ Status MemManager::ToImmutable() { return Status::OK(); } -template -Status MemManager::Serialize(std::vector& table_ids) { +Status MemManager::Serialize(std::vector& table_ids) { ToImmutable(); std::unique_lock lock(serialization_mtx_); std::string table_id; diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 35272f4211d32e4340ad8252e9a0f963e21c1432..2503b6fb9856a17703a70c2b7093b99206cc6ba7 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -5,6 +5,7 @@ ******************************************************************************/ #pragma once +#include "ExecutionEngine.h" #include "IDGenerator.h" #include "Status.h" #include "Meta.h" @@ -23,12 +24,10 @@ namespace meta { class Meta; } -template class MemVectors { public: - using EnginePtr = typename EngineT::Ptr; using MetaPtr = meta::Meta::Ptr; - using Ptr = std::shared_ptr>; + using Ptr = std::shared_ptr; explicit MemVectors(const std::shared_ptr&, const meta::TableFileSchema&, const Options&); @@ -54,18 +53,17 @@ private: Options options_; meta::TableFileSchema schema_; IDGenerator* pIdGenerator_; - EnginePtr pEE_; + ExecutionEnginePtr pEE_; }; // MemVectors -template class MemManager { public: using MetaPtr = meta::Meta::Ptr; - using MemVectorsPtr = typename MemVectors::Ptr; - using Ptr = std::shared_ptr>; + using MemVectorsPtr = typename MemVectors::Ptr; + using Ptr = std::shared_ptr; MemManager(const std::shared_ptr& meta, const Options& options) : pMeta_(meta), options_(options) {} @@ -96,4 +94,3 @@ private: } // namespace engine } // namespace vecwise } // namespace zilliz -#include "MemManager.inl" diff --git a/cpp/src/db/MetaTypes.h b/cpp/src/db/MetaTypes.h index b2fe7833236df885e28d16e4c7d1ac39f7298738..ca110be1a5d142c7518a02e4563055cc8c1ede8b 100644 --- a/cpp/src/db/MetaTypes.h +++ b/cpp/src/db/MetaTypes.h @@ -5,6 +5,8 @@ ******************************************************************************/ #pragma once +#include "ExecutionEngine.h" + #include #include #include @@ -25,6 +27,7 @@ struct TableSchema { uint16_t dimension; std::string location; long created_on; + int engine_type_ = (int)EngineType::FAISS_IDMAP; }; // TableSchema struct TableFileSchema { @@ -38,6 +41,7 @@ struct TableFileSchema { size_t id; std::string table_id; + int engine_type_ = (int)EngineType::FAISS_IDMAP; std::string file_id; int file_type = NEW; size_t size; diff --git a/cpp/src/db/Traits.cpp b/cpp/src/db/Traits.cpp deleted file mode 100644 index a30b9fc036cfb4c7e02ee6e9b79799577d07ec5d..0000000000000000000000000000000000000000 --- a/cpp/src/db/Traits.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/******************************************************************************* - * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved - * Unauthorized copying of this file, via any medium is strictly prohibited. - * Proprietary and confidential. - ******************************************************************************/ -#include "Traits.h" - -namespace zilliz { -namespace vecwise { -namespace engine { - -const char* IVFIndexTrait::BuildIndexType = "IVF"; -const char* IVFIndexTrait::RawIndexType = "IDMap,Flat"; - -const char* IDMapIndexTrait::BuildIndexType = "IDMap"; -const char* IDMapIndexTrait::RawIndexType = "IDMap,Flat"; - -} // namespace engine -} // namespace vecwise -} // namespace zilliz diff --git a/cpp/src/db/scheduler/SearchScheduler.cpp b/cpp/src/db/scheduler/SearchScheduler.cpp index 4d3a12fdf81b670de21c4fa0640194b721991668..9309b131e62baa3ccd6e725acff84806fe329fa2 100644 --- a/cpp/src/db/scheduler/SearchScheduler.cpp +++ b/cpp/src/db/scheduler/SearchScheduler.cpp @@ -10,6 +10,7 @@ #include "utils/Log.h" #include "utils/TimeRecorder.h" #include "metrics/Metrics.h" +#include "db/EngineFactory.h" namespace zilliz { namespace vecwise { @@ -79,7 +80,9 @@ SearchScheduler::IndexLoadWorker() { server::TimeRecorder rc("Load index"); //load index - IndexEnginePtr index_ptr = std::make_shared(context->file_->dimension, context->file_->location); + ExecutionEnginePtr index_ptr = EngineFactory::Build(context->file_->dimension, + context->file_->location, + (EngineType)context->file_->engine_type_); index_ptr->Load(); rc.Record("load index file to memory"); @@ -111,7 +114,7 @@ SearchScheduler::IndexLoadWorker() { } //put search task to another queue - SearchTaskPtr task_ptr = std::make_shared(); + SearchTaskPtr task_ptr = std::make_shared(); task_ptr->index_id_ = context->file_->id; task_ptr->index_type_ = context->file_->file_type; task_ptr->index_engine_ = index_ptr; diff --git a/cpp/src/db/scheduler/SearchTaskQueue.inl b/cpp/src/db/scheduler/SearchTaskQueue.cpp similarity index 97% rename from cpp/src/db/scheduler/SearchTaskQueue.inl rename to cpp/src/db/scheduler/SearchTaskQueue.cpp index 7816ba8878ce05dbd85efa63582acac07088d4a1..86478477d1f2008c0bb04ac2934f1f55cbe57a34 100644 --- a/cpp/src/db/scheduler/SearchTaskQueue.inl +++ b/cpp/src/db/scheduler/SearchTaskQueue.cpp @@ -3,8 +3,6 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -#pragma once - #include "SearchTaskQueue.h" #include "utils/Log.h" #include "utils/TimeRecorder.h" @@ -70,8 +68,7 @@ SearchTaskQueue::GetInstance() { return instance; } -template -bool SearchTask::DoSearch() { +bool SearchTask::DoSearch() { if(index_engine_ == nullptr) { return false; } diff --git a/cpp/src/db/scheduler/SearchTaskQueue.h b/cpp/src/db/scheduler/SearchTaskQueue.h index 3b58294811e1de0dc8bd5e9247872373940a9cb1..bd8e9d7f245859d16d9720ce92ed29b772f48349 100644 --- a/cpp/src/db/scheduler/SearchTaskQueue.h +++ b/cpp/src/db/scheduler/SearchTaskQueue.h @@ -7,8 +7,7 @@ #include "SearchContext.h" #include "utils/BlockingQueue.h" -#include "../FaissExecutionEngine.h" -#include "../Traits.h" +#include "db/ExecutionEngine.h" #include @@ -16,16 +15,6 @@ namespace zilliz { namespace vecwise { namespace engine { -#ifdef GPU_VERSION -using IndexTraitClass = IVFIndexTrait; -#else -using IndexTraitClass = IDMapIndexTrait; -#endif - -using IndexClass = FaissExecutionEngine; -using IndexEnginePtr = std::shared_ptr; - -template class SearchTask { public: bool DoSearch(); @@ -33,12 +22,11 @@ public: public: size_t index_id_ = 0; int index_type_ = 0; //for metrics - IndexEnginePtr index_engine_; + ExecutionEnginePtr index_engine_; std::vector search_contexts_; }; -using SearchTaskClass = SearchTask; -using SearchTaskPtr = std::shared_ptr; +using SearchTaskPtr = std::shared_ptr; class SearchTaskQueue : public server::BlockingQueue { private: @@ -58,6 +46,4 @@ private: } } -} - -#include "SearchTaskQueue.inl" \ No newline at end of file +} \ No newline at end of file diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index aa3ea560f3d2228aca0ec3535724c743f6bfb808..3e7c588ef6dcebf7338841d1fced9f7539b129bc 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -38,7 +38,7 @@ engine::Options DBTest::GetOptions() { void DBTest::SetUp() { InitLog(); auto options = GetOptions(); - db_ = engine::DBFactory::Build(options, "Faiss,IDMap"); + db_ = engine::DBFactory::Build(options); } void DBTest::TearDown() {