提交 1aa55949 编写于 作者: J jinhai

Merge branch 'NewEngine' into 'branch-0.3.2'

MS-137 New engine

See merge request megasearch/milvus!169

Former-commit-id: adf409fedb96628ab9e0df344c34109570f5f56e
......@@ -3,17 +3,6 @@
Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.3.2 (2019-07-10)
## Bug
## Improvement
## New Feature
- MS-154 - Integrate knowhere
## Task
# Milvus 0.3.1 (2019-07-10)
## Bug
......@@ -21,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA.
## Improvement
## New Feature
- MS-137 - Integrate knowhere
## Task
......
......@@ -10,7 +10,7 @@ aux_source_directory(config config_files)
aux_source_directory(server server_files)
aux_source_directory(utils utils_files)
aux_source_directory(db db_files)
aux_source_directory(wrapper wrapper_files)
#aux_source_directory(wrapper wrapper_files)
aux_source_directory(metrics metrics_files)
aux_source_directory(wrapper/knowhere knowhere_files)
......@@ -161,6 +161,17 @@ set(server_libs
metrics
)
set(knowhere_libs
knowhere
SPTAGLibStatic
arrow
jemalloc_pic
faiss
openblas
lapack
tbb
)
add_executable(milvus_server
${config_files}
${server_files}
......@@ -170,9 +181,17 @@ add_executable(milvus_server
)
if (ENABLE_LICENSE STREQUAL "ON")
target_link_libraries(milvus_server ${server_libs} license_check ${third_party_libs})
target_link_libraries(milvus_server
${server_libs}
license_check
${third_party_libs}
${knowhere_libs})
else ()
target_link_libraries(milvus_server ${server_libs} ${third_party_libs})
target_link_libraries(milvus_server
${server_libs}
${third_party_libs}
${knowhere_libs}
)
endif()
if (ENABLE_LICENSE STREQUAL "ON")
......
......@@ -28,7 +28,7 @@ public:
return 0;
}
return index_->ntotal*(index_->dim*4);
return index_->Count() * index_->Dimension() * sizeof(float);
}
private:
......
......@@ -4,14 +4,15 @@
* Proprietary and confidential.
******************************************************************************/
#include "EngineFactory.h"
#include "FaissExecutionEngine.h"
//#include "FaissExecutionEngine.h"
#include "ExecutionEngineImpl.h"
#include "Log.h"
namespace zilliz {
namespace milvus {
namespace engine {
#if 0
ExecutionEnginePtr
EngineFactory::Build(uint16_t dimension,
const std::string &location,
......@@ -26,7 +27,7 @@ EngineFactory::Build(uint16_t dimension,
break;
}
case EngineType::FAISS_IVFFLAT: {
case EngineType::FAISS_IVFFLAT_GPU: {
execution_engine_ptr =
ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat"));
break;
......@@ -41,6 +42,24 @@ EngineFactory::Build(uint16_t dimension,
execution_engine_ptr->Init();
return execution_engine_ptr;
}
#else
ExecutionEnginePtr
EngineFactory::Build(uint16_t dimension,
const std::string &location,
EngineType type) {
if(type == EngineType::INVALID) {
ENGINE_LOG_ERROR << "Unsupported engine type";
return nullptr;
}
ExecutionEnginePtr execution_engine_ptr =
std::make_shared<ExecutionEngineImpl>(dimension, location, type);
execution_engine_ptr->Init();
return execution_engine_ptr;
}
#endif
}
}
......
......@@ -17,7 +17,9 @@ namespace engine {
enum class EngineType {
INVALID = 0,
FAISS_IDMAP = 1,
FAISS_IVFFLAT,
FAISS_IVFFLAT_GPU,
FAISS_IVFFLAT_CPU,
SPTAG_KDT_RNT_CPU,
};
class ExecutionEngine {
......
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include <src/server/ServerConfig.h>
#include "Log.h"
#include "src/cache/CpuCacheMgr.h"
#include "ExecutionEngineImpl.h"
#include "wrapper/knowhere/vec_index.h"
#include "wrapper/knowhere/vec_impl.h"
namespace zilliz {
namespace milvus {
namespace engine {
struct FileIOWriter {
std::fstream fs;
std::string name;
FileIOWriter(const std::string &fname);
~FileIOWriter();
size_t operator()(void *ptr, size_t size);
};
struct FileIOReader {
std::fstream fs;
std::string name;
FileIOReader(const std::string &fname);
~FileIOReader();
size_t operator()(void *ptr, size_t size);
size_t operator()(void *ptr, size_t size, size_t pos);
};
FileIOReader::FileIOReader(const std::string &fname) {
name = fname;
fs = std::fstream(name, std::ios::in | std::ios::binary);
}
FileIOReader::~FileIOReader() {
fs.close();
}
size_t FileIOReader::operator()(void *ptr, size_t size) {
fs.read(reinterpret_cast<char *>(ptr), size);
}
size_t FileIOReader::operator()(void *ptr, size_t size, size_t pos) {
return 0;
}
FileIOWriter::FileIOWriter(const std::string &fname) {
name = fname;
fs = std::fstream(name, std::ios::out | std::ios::binary);
}
FileIOWriter::~FileIOWriter() {
fs.close();
}
size_t FileIOWriter::operator()(void *ptr, size_t size) {
fs.write(reinterpret_cast<char *>(ptr), size);
}
ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension,
const std::string &location,
EngineType type)
: location_(location), dim(dimension), build_type(type) {
index_ = CreatetVecIndex(EngineType::FAISS_IDMAP);
current_type = EngineType::FAISS_IDMAP;
std::static_pointer_cast<BFIndex>(index_)->Build(dimension);
}
ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index,
const std::string &location,
EngineType type)
: index_(std::move(index)), location_(location), build_type(type) {
current_type = type;
}
VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) {
std::shared_ptr<VecIndex> index;
switch (type) {
case EngineType::FAISS_IDMAP: {
index = GetVecIndexFactory(IndexType::FAISS_IDMAP);
break;
}
case EngineType::FAISS_IVFFLAT_GPU: {
index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_GPU);
break;
}
case EngineType::FAISS_IVFFLAT_CPU: {
index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU);
break;
}
case EngineType::SPTAG_KDT_RNT_CPU: {
index = GetVecIndexFactory(IndexType::SPTAG_KDT_RNT_CPU);
break;
}
default: {
ENGINE_LOG_ERROR << "Invalid engine type";
return nullptr;
}
}
return index;
}
Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) {
index_->Add(n, xdata, xids, Config::object{{"dim", dim}});
return Status::OK();
}
size_t ExecutionEngineImpl::Count() const {
return index_->Count();
}
size_t ExecutionEngineImpl::Size() const {
return (size_t) (Count() * Dimension()) * sizeof(float);
}
size_t ExecutionEngineImpl::Dimension() const {
return index_->Dimension();
}
size_t ExecutionEngineImpl::PhysicalSize() const {
return (size_t) (Count() * Dimension()) * sizeof(float);
}
Status ExecutionEngineImpl::Serialize() {
auto binaryset = index_->Serialize();
FileIOWriter writer(location_);
writer(&current_type, sizeof(current_type));
for (auto &iter: binaryset.binary_map_) {
auto meta = iter.first.c_str();
size_t meta_length = iter.first.length();
writer(&meta_length, sizeof(meta_length));
writer((void *) meta, meta_length);
auto binary = iter.second;
size_t binary_length = binary->size;
writer(&binary_length, sizeof(binary_length));
writer((void *) binary->data.get(), binary_length);
}
return Status::OK();
}
Status ExecutionEngineImpl::Load() {
index_ = Load(location_);
return Status::OK();
}
VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) {
knowhere::BinarySet load_data_list;
FileIOReader reader(location);
reader.fs.seekg(0, reader.fs.end);
size_t length = reader.fs.tellg();
reader.fs.seekg(0);
size_t rp = 0;
reader(&current_type, sizeof(current_type));
rp += sizeof(current_type);
while (rp < length) {
size_t meta_length;
reader(&meta_length, sizeof(meta_length));
rp += sizeof(meta_length);
reader.fs.seekg(rp);
auto meta = new char[meta_length];
reader(meta, meta_length);
rp += meta_length;
reader.fs.seekg(rp);
size_t bin_length;
reader(&bin_length, sizeof(bin_length));
rp += sizeof(bin_length);
reader.fs.seekg(rp);
auto bin = new uint8_t[bin_length];
reader(bin, bin_length);
rp += bin_length;
auto binptr = std::make_shared<uint8_t>();
binptr.reset(bin);
load_data_list.Append(std::string(meta, meta_length), binptr, bin_length);
}
auto index_type = IndexType::INVALID;
switch (current_type) {
case EngineType::FAISS_IDMAP: {
index_type = IndexType::FAISS_IDMAP;
break;
}
case EngineType::FAISS_IVFFLAT_CPU: {
index_type = IndexType::FAISS_IVFFLAT_CPU;
break;
}
case EngineType::FAISS_IVFFLAT_GPU: {
index_type = IndexType::FAISS_IVFFLAT_GPU;
break;
}
case EngineType::SPTAG_KDT_RNT_CPU: {
index_type = IndexType::SPTAG_KDT_RNT_CPU;
break;
}
default: {
ENGINE_LOG_ERROR << "wrong index_type";
return nullptr;
}
}
return LoadVecIndex(index_type, load_data_list);
}
Status ExecutionEngineImpl::Merge(const std::string &location) {
if (location == location_) {
return Status::Error("Cannot Merge Self");
}
ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_;
auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
if (!to_merge) {
to_merge = Load(location);
}
auto file_index = std::dynamic_pointer_cast<BFIndex>(to_merge);
index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds());
return Status::OK();
}
// TODO(linxj): add config
ExecutionEnginePtr
ExecutionEngineImpl::BuildIndex(const std::string &location) {
ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_;
auto from_index = std::dynamic_pointer_cast<BFIndex>(index_);
auto to_index = CreatetVecIndex(build_type);
to_index->BuildAll(Count(),
from_index->GetRawVectors(),
from_index->GetRawIds(),
Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}});
return std::make_shared<ExecutionEngineImpl>(to_index, location, build_type);
}
Status ExecutionEngineImpl::Search(long n,
const float *data,
long k,
float *distances,
long *labels) const {
index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}});
return Status::OK();
}
Status ExecutionEngineImpl::Cache() {
zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, index_);
return Status::OK();
}
Status ExecutionEngineImpl::Init() {
using namespace zilliz::milvus::server;
ServerConfig &config = ServerConfig::GetInstance();
ConfigNode server_config = config.GetConfig(CONFIG_SERVER);
gpu_num = server_config.GetInt32Value("gpu_index", 0);
switch (build_type) {
case EngineType::FAISS_IVFFLAT_GPU: {
}
case EngineType::FAISS_IVFFLAT_CPU: {
ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE);
nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000);
break;
}
}
return Status::OK();
}
} // namespace engine
} // namespace milvus
} // namespace zilliz
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "ExecutionEngine.h"
#include "wrapper/knowhere/vec_index.h"
#include <memory>
#include <string>
namespace zilliz {
namespace milvus {
namespace engine {
class ExecutionEngineImpl : public ExecutionEngine {
public:
ExecutionEngineImpl(uint16_t dimension,
const std::string &location,
EngineType type);
ExecutionEngineImpl(VecIndexPtr index,
const std::string &location,
EngineType type);
Status AddWithIds(long n, const float *xdata, const long *xids) override;
size_t Count() const override;
size_t Size() const override;
size_t Dimension() const override;
size_t PhysicalSize() const override;
Status Serialize() override;
Status Load() override;
Status Merge(const std::string &location) override;
Status Search(long n,
const float *data,
long k,
float *distances,
long *labels) const override;
ExecutionEnginePtr BuildIndex(const std::string &) override;
Status Cache() override;
Status Init() override;
private:
VecIndexPtr CreatetVecIndex(EngineType type);
VecIndexPtr Load(const std::string &location);
protected:
VecIndexPtr index_ = nullptr;
EngineType build_type;
EngineType current_type;
int64_t dim;
std::string location_;
size_t nprobe_ = 0;
int64_t gpu_num = 0;
};
} // namespace engine
} // namespace milvus
} // namespace zilliz
......@@ -3,6 +3,7 @@
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#if 0
#include "FaissExecutionEngine.h"
#include "Log.h"
......@@ -181,3 +182,4 @@ Status FaissExecutionEngine::Init() {
} // namespace engine
} // namespace milvus
} // namespace zilliz
#endif
......@@ -5,6 +5,7 @@
******************************************************************************/
#pragma once
#if 0
#include "ExecutionEngine.h"
#include "faiss/Index.h"
......@@ -71,3 +72,4 @@ protected:
} // namespace engine
} // namespace milvus
} // namespace zilliz
#endif
......@@ -151,7 +151,7 @@ std::shared_ptr<IScheduleTask> SearchTask::Execute() {
std::vector<float> output_distence;
for(auto& context : search_contexts_) {
//step 1: allocate memory
auto inner_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk();
auto inner_k = context->topk();
output_ids.resize(inner_k*context->nq());
output_distence.resize(inner_k*context->nq());
......@@ -164,7 +164,8 @@ std::shared_ptr<IScheduleTask> SearchTask::Execute() {
//step 3: cluster result
SearchContext::ResultSet result_set;
ClusterResult(output_ids, output_distence, context->nq(), inner_k, result_set);
auto spec_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk();
ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set);
rc.Record("cluster result");
//step 4: pick up topk result
......
......@@ -29,7 +29,7 @@ namespace {
static std::map<int, engine::EngineType> map_type = {
{0, engine::EngineType::INVALID},
{1, engine::EngineType::FAISS_IDMAP},
{2, engine::EngineType::FAISS_IVFFLAT},
{2, engine::EngineType::FAISS_IVFFLAT_GPU},
};
if(map_type.find(type) == map_type.end()) {
......@@ -43,7 +43,7 @@ namespace {
static std::map<engine::EngineType, int> map_type = {
{engine::EngineType::INVALID, 0},
{engine::EngineType::FAISS_IDMAP, 1},
{engine::EngineType::FAISS_IVFFLAT, 2},
{engine::EngineType::FAISS_IVFFLAT_GPU, 2},
};
if(map_type.find(type) == map_type.end()) {
......
......@@ -4,6 +4,7 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#if 0
// TODO: maybe support static search
#ifdef GPU_VERSION
#include "faiss/gpu/GpuAutoTune.h"
......@@ -80,3 +81,4 @@ Index_ptr read_index(const std::string &file_name) {
}
}
}
#endif
......@@ -6,23 +6,29 @@
#pragma once
#include <vector>
#include <string>
#include <unordered_map>
#include <memory>
#include <fstream>
//#include <vector>
//#include <string>
//#include <unordered_map>
//#include <memory>
//#include <fstream>
//
#include "faiss/AutoTune.h"
#include "faiss/index_io.h"
//
//#include "Operand.h"
#include "knowhere/vec_index.h"
#include "Operand.h"
namespace zilliz {
namespace milvus {
namespace engine {
class Index;
using Index_ptr = std::shared_ptr<Index>;
using Index_ptr = VecIndexPtr;
#if 0
//class Index;
//using Index_ptr = std::shared_ptr<Index>;
class Index {
typedef long idx_t;
......@@ -75,6 +81,7 @@ private:
void write_index(const Index_ptr &index, const std::string &file_name);
extern Index_ptr read_index(const std::string &file_name);
#endif
}
......
......@@ -4,6 +4,7 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#if 0
#include "mutex"
......@@ -128,10 +129,8 @@ Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *i
return std::make_shared<Index>(index);
}
// TODO: Be Factory pattern later
IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) {
if (opd->index_type == "IDMap") {
// TODO: fix hardcode
IndexBuilderPtr index = nullptr;
return std::make_shared<BgCpuBuilder>(opd);
}
......@@ -142,3 +141,4 @@ IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) {
}
}
}
#endif
......@@ -4,6 +4,7 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#if 0
#pragma once
#include "faiss/Index.h"
......@@ -64,3 +65,4 @@ extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd);
}
}
}
#endif
......@@ -4,6 +4,7 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#if 0
#include "Operand.h"
......@@ -90,3 +91,4 @@ Operand_ptr str_to_operand(const std::string &input) {
}
}
}
#endif
......@@ -4,6 +4,7 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#if 0
#pragma once
#include <string>
......@@ -42,3 +43,4 @@ extern Operand_ptr str_to_operand(const std::string &input);
}
}
}
#endif
......@@ -8,7 +8,7 @@
namespace zilliz {
namespace vecwise {
namespace milvus {
namespace engine {
using namespace zilliz::knowhere;
......
......@@ -10,7 +10,7 @@
namespace zilliz {
namespace vecwise {
namespace milvus {
namespace engine {
extern zilliz::knowhere::DatasetPtr
......
......@@ -4,18 +4,15 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "knowhere/index/index.h"
#include "knowhere/index/index_model.h"
#include "knowhere/index/index_type.h"
#include "knowhere/adapter/sptag.h"
#include "knowhere/common/tensor.h"
#include <src/utils/Log.h>
#include "knowhere/index/vector_index/idmap.h"
#include "vec_impl.h"
#include "data_transfer.h"
namespace zilliz {
namespace vecwise {
namespace milvus {
namespace engine {
using namespace zilliz::knowhere;
......@@ -26,12 +23,14 @@ void VecIndexImpl::BuildAll(const long &nb,
const Config &cfg,
const long &nt,
const float *xt) {
auto d = cfg["dim"].as<int>();
auto dataset = GenDatasetWithIds(nb, d, xb, ids);
dim = cfg["dim"].as<int>();
auto dataset = GenDatasetWithIds(nb, dim, xb, ids);
auto preprocessor = index_->BuildPreprocessor(dataset, cfg);
index_->set_preprocessor(preprocessor);
auto model = index_->Train(dataset, cfg);
auto nlist = int(nb / 1000000.0 * 16384);
auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}};
auto model = index_->Train(dataset, cfg_t);
index_->set_index_model(model);
index_->Add(dataset, cfg);
}
......@@ -39,7 +38,7 @@ void VecIndexImpl::BuildAll(const long &nb,
void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) {
// TODO(linxj): Assert index is trained;
auto d = cfg["dim"].as<int>();
auto d = cfg.get_with_default("dim", dim);
auto dataset = GenDatasetWithIds(nb, d, xb, ids);
index_->Add(dataset, cfg);
......@@ -48,8 +47,8 @@ void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const C
void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) {
// TODO: Assert index is trained;
auto d = cfg["dim"].as<int>();
auto k = cfg["k"].as<int>();
auto d = cfg.get_with_default("dim", dim);
auto dataset = GenDataset(nq, d, xq);
Config search_cfg;
......@@ -75,7 +74,7 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id
//}
auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
auto p_dist = ids_array->data()->GetValues<float>(1, 0);
auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// TODO(linxj): avoid copy here.
memcpy(ids, p_ids, sizeof(int64_t) * nq * k);
......@@ -88,6 +87,43 @@ zilliz::knowhere::BinarySet VecIndexImpl::Serialize() {
void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) {
index_->Load(index_binary);
dim = Dimension();
}
int64_t VecIndexImpl::Dimension() {
return index_->Dimension();
}
int64_t VecIndexImpl::Count() {
return index_->Count();
}
float *BFIndex::GetRawVectors() {
auto raw_index = std::dynamic_pointer_cast<IDMAP>(index_);
if (raw_index) { return raw_index->GetRawVectors(); }
return nullptr;
}
int64_t *BFIndex::GetRawIds() {
return std::static_pointer_cast<IDMAP>(index_)->GetRawIds();
}
void BFIndex::Build(const int64_t &d) {
dim = d;
std::static_pointer_cast<IDMAP>(index_)->Train(dim);
}
void BFIndex::BuildAll(const long &nb,
const float *xb,
const long *ids,
const Config &cfg,
const long &nt,
const float *xt) {
dim = cfg["dim"].as<int>();
auto dataset = GenDatasetWithIds(nb, dim, xb, ids);
std::static_pointer_cast<IDMAP>(index_)->Train(dim);
index_->Add(dataset, cfg);
}
}
......
......@@ -12,7 +12,7 @@
namespace zilliz {
namespace vecwise {
namespace milvus {
namespace engine {
class VecIndexImpl : public VecIndex {
......@@ -24,15 +24,32 @@ class VecIndexImpl : public VecIndex {
const Config &cfg,
const long &nt,
const float *xt) override;
int64_t Dimension() override;
int64_t Count() override;
void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override;
zilliz::knowhere::BinarySet Serialize() override;
void Load(const zilliz::knowhere::BinarySet &index_binary) override;
void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override;
private:
protected:
int64_t dim = 0;
std::shared_ptr<zilliz::knowhere::VectorIndex> index_ = nullptr;
};
class BFIndex : public VecIndexImpl {
public:
explicit BFIndex(std::shared_ptr<zilliz::knowhere::VectorIndex> index) : VecIndexImpl(std::move(index)) {};
void Build(const int64_t& d);
float* GetRawVectors();
void BuildAll(const long &nb,
const float *xb,
const long *ids,
const Config &cfg,
const long &nt,
const float *xt) override;
int64_t* GetRawIds();
};
}
}
}
......@@ -4,6 +4,7 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "knowhere/index/vector_index/ivf.h"
#include "knowhere/index/vector_index/idmap.h"
#include "knowhere/index/vector_index/gpu_ivf.h"
#include "knowhere/index/vector_index/cpu_kdt_rng.h"
......@@ -12,27 +13,49 @@
namespace zilliz {
namespace vecwise {
namespace milvus {
namespace engine {
// TODO(linxj): index_type => enum struct
VecIndexPtr GetVecIndexFactory(const std::string &index_type) {
VecIndexPtr GetVecIndexFactory(const IndexType &type) {
std::shared_ptr<zilliz::knowhere::VectorIndex> index;
if (index_type == "IVF") {
index = std::make_shared<zilliz::knowhere::IVF>();
} else if (index_type == "GPUIVF") {
index = std::make_shared<zilliz::knowhere::GPUIVF>(0);
} else if (index_type == "SPTAG") {
index = std::make_shared<zilliz::knowhere::CPUKDTRNG>();
switch (type) {
case IndexType::FAISS_IDMAP: {
index = std::make_shared<zilliz::knowhere::IDMAP>();
return std::make_shared<BFIndex>(index);
}
case IndexType::FAISS_IVFFLAT_CPU: {
index = std::make_shared<zilliz::knowhere::IVF>();
break;
}
case IndexType::FAISS_IVFFLAT_GPU: {
index = std::make_shared<zilliz::knowhere::GPUIVF>(0);
break;
}
case IndexType::FAISS_IVFPQ_CPU: {
index = std::make_shared<zilliz::knowhere::IVFPQ>();
break;
}
case IndexType::FAISS_IVFPQ_GPU: {
index = std::make_shared<zilliz::knowhere::GPUIVFPQ>(0);
break;
}
case IndexType::SPTAG_KDT_RNT_CPU: {
index = std::make_shared<zilliz::knowhere::CPUKDTRNG>();
break;
}
//case IndexType::NSG: { // TODO(linxj): bug.
// index = std::make_shared<zilliz::knowhere::NSG>();
// break;
//}
default: {
return nullptr;
}
}
// TODO(linxj): Support NSG
//else if (index_type == "NSG") {
// index = std::make_shared<zilliz::knowhere::NSG>();
//}
return std::make_shared<VecIndexImpl>(index);
}
VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) {
VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary) {
auto index = GetVecIndexFactory(index_type);
index->Load(index_binary);
return index;
......
......@@ -14,7 +14,7 @@
namespace zilliz {
namespace vecwise {
namespace milvus {
namespace engine {
// TODO(linxj): jsoncons => rapidjson or other.
......@@ -40,6 +40,10 @@ class VecIndex {
long *ids,
const Config &cfg = Config()) = 0;
virtual int64_t Dimension() = 0;
virtual int64_t Count() = 0;
virtual zilliz::knowhere::BinarySet Serialize() = 0;
virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0;
......@@ -47,9 +51,20 @@ class VecIndex {
using VecIndexPtr = std::shared_ptr<VecIndex>;
extern VecIndexPtr GetVecIndexFactory(const std::string &index_type);
enum class IndexType {
INVALID = 0,
FAISS_IDMAP = 1,
FAISS_IVFFLAT_CPU,
FAISS_IVFFLAT_GPU,
FAISS_IVFPQ_CPU,
FAISS_IVFPQ_GPU,
SPTAG_KDT_RNT_CPU,
//NSG,
};
extern VecIndexPtr GetVecIndexFactory(const IndexType &type);
extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary);
extern VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary);
}
}
......
knowhere @ 3a30677b
Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41
Subproject commit 3a30677b8ab105955534922d1677e8fa99ef0406
......@@ -38,7 +38,7 @@ set(unittest_libs
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
add_subdirectory(server)
#add_subdirectory(server)
add_subdirectory(db)
add_subdirectory(index_wrapper)
#add_subdirectory(faiss_wrapper)
......
......@@ -6,7 +6,8 @@
aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs)
aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs)
aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src)
#aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src)
aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src)
aux_source_directory(./ test_srcs)
aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files)
......@@ -30,25 +31,34 @@ set(db_test_src
${db_srcs}
${db_scheduler_srcs}
${wrapper_src}
${knowhere_src}
${require_files}
${test_srcs})
${test_srcs}
)
cuda_add_executable(db_test ${db_test_src})
set(db_libs
sqlite3
boost_system
boost_filesystem
lz4
mysqlpp
)
set(knowhere_libs
knowhere
SPTAGLibStatic
arrow
jemalloc_pic
faiss
openblas
lapack
tbb
cudart
cublas
sqlite3
boost_system
boost_filesystem
lz4
mysqlpp
)
target_link_libraries(db_test ${db_libs} ${unittest_libs})
target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs})
install(TARGETS db_test DESTINATION bin)
......@@ -26,32 +26,32 @@ namespace {
}
TEST(DBMiscTest, ENGINE_API_TEST) {
//engine api AddWithIdArray
const uint16_t dim = 512;
const long n = 10;
engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat");
std::vector<float> vectors;
std::vector<long> ids;
for (long i = 0; i < n; i++) {
for (uint16_t k = 0; k < dim; k++) {
vectors.push_back((float) k);
}
ids.push_back(i);
}
auto status = engine.AddWithIdArray(vectors, ids);
ASSERT_TRUE(status.ok());
auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID);
ASSERT_EQ(engine_ptr, nullptr);
engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT);
ASSERT_NE(engine_ptr, nullptr);
engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP);
ASSERT_NE(engine_ptr, nullptr);
}
//TEST(DBMiscTest, ENGINE_API_TEST) {
// //engine api AddWithIdArray
// const uint16_t dim = 512;
// const long n = 10;
// engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat");
// std::vector<float> vectors;
// std::vector<long> ids;
// for (long i = 0; i < n; i++) {
// for (uint16_t k = 0; k < dim; k++) {
// vectors.push_back((float) k);
// }
// ids.push_back(i);
// }
//
// auto status = engine.AddWithIdArray(vectors, ids);
// ASSERT_TRUE(status.ok());
//
// auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID);
// ASSERT_EQ(engine_ptr, nullptr);
//
// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU);
// ASSERT_NE(engine_ptr, nullptr);
//
// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP);
// ASSERT_NE(engine_ptr, nullptr);
//}
TEST(DBMiscTest, EXCEPTION_TEST) {
engine::Exception ex1("");
......
......@@ -11,7 +11,7 @@
#include "utils.h"
using namespace zilliz::vecwise::engine;
using namespace zilliz::milvus::engine;
using namespace zilliz::knowhere;
using ::testing::TestWithParam;
......@@ -20,7 +20,7 @@ using ::testing::Combine;
class KnowhereWrapperTest
: public TestWithParam<::std::tuple<std::string, std::string, int, int, int, int, Config, Config>> {
: public TestWithParam<::std::tuple<IndexType, std::string, int, int, int, int, Config, Config>> {
protected:
void SetUp() override {
std::string generator_type;
......@@ -34,7 +34,7 @@ class KnowhereWrapperTest
}
protected:
std::string index_type;
IndexType index_type;
Config train_cfg;
Config search_cfg;
......@@ -55,12 +55,12 @@ class KnowhereWrapperTest
INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest,
Values(
// ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"]
std::make_tuple("IVF", "Default",
std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default",
64, 10000, 10, 10,
Config::object{{"nlist", 100}, {"dim", 64}},
Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}}
),
std::make_tuple("SPTAG", "Default",
std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default",
64, 10000, 10, 10,
Config::object{{"TPTNumber", 1}, {"dim", 64}},
Config::object{{"dim", 64}, {"k", 10}}
......@@ -113,16 +113,39 @@ TEST_P(KnowhereWrapperTest, serialize_test) {
{
auto binaryset = index_->Serialize();
//int fileno = 0;
//const std::string &base_name = "/tmp/wrapper_serialize_test_bin_";
//std::vector<std::string> filename_list;
//std::vector<std::pair<std::string, size_t >> meta_list;
//for (auto &iter: binaryset.binary_map_) {
// const std::string &filename = base_name + std::to_string(fileno);
// FileIOWriter writer(filename);
// writer(iter.second->data.get(), iter.second->size);
//
// meta_list.push_back(std::make_pair(iter.first, iter.second.size));
// filename_list.push_back(filename);
// ++fileno;
//}
//
//BinarySet load_data_list;
//for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) {
// auto bin_size = meta_list[i].second;
// FileIOReader reader(filename_list[i]);
// std::vector<uint8_t> load_data(bin_size);
// reader(load_data.data(), bin_size);
// load_data_list.Append(meta_list[i].first, load_data);
//}
int fileno = 0;
const std::string &base_name = "/tmp/wrapper_serialize_test_bin_";
std::vector<std::string> filename_list;
const std::string &base_name = "/tmp/wrapper_serialize_test_bin_";
std::vector<std::pair<std::string, size_t >> meta_list;
for (auto &iter: binaryset.binary_map_) {
const std::string &filename = base_name + std::to_string(fileno);
FileIOWriter writer(filename);
writer(iter.second.data, iter.second.size);
writer(iter.second->data.get(), iter.second->size);
meta_list.push_back(std::make_pair(iter.first, iter.second.size));
meta_list.emplace_back(std::make_pair(iter.first, iter.second->size));
filename_list.push_back(filename);
++fileno;
}
......@@ -131,9 +154,12 @@ TEST_P(KnowhereWrapperTest, serialize_test) {
for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) {
auto bin_size = meta_list[i].second;
FileIOReader reader(filename_list[i]);
std::vector<uint8_t> load_data(bin_size);
reader(load_data.data(), bin_size);
load_data_list.Append(meta_list[i].first, load_data);
auto load_data = new uint8_t[bin_size];
reader(load_data, bin_size);
auto data = std::make_shared<uint8_t>();
data.reset(load_data);
load_data_list.Append(meta_list[i].first, data, bin_size);
}
......
......@@ -12,11 +12,12 @@ include_directories(../../src)
aux_source_directory(../../src/db db_srcs)
aux_source_directory(../../src/config config_files)
aux_source_directory(../../src/cache cache_srcs)
aux_source_directory(../../src/wrapper wrapper_src)
aux_source_directory(../../src/metrics metrics_src)
aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs)
aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs)
aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src)
aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src)
aux_source_directory(${MILVUS_ENGINE_SRC}/src/metrics metrics_src)
aux_source_directory(./ test_srcs)
aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files)
......@@ -54,18 +55,26 @@ set(count_test_src
${db_srcs}
${db_scheduler_srcs}
${wrapper_src}
${knowhere_src}
${metrics_src}
${test_srcs}
)
add_executable(metrics_test ${count_test_src} ${require_files} )
target_link_libraries(metrics_test
set(knowhere_libs
knowhere
SPTAGLibStatic
arrow
jemalloc_pic
faiss
openblas
lapack
tbb
)
target_link_libraries(metrics_test
${knowhere_libs}
cudart
cublas
sqlite3
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册