提交 ecedf749 编写于 作者: J jinhai

Merge branch 'branch-0.3.1-yuncong' into 'branch-0.3.1'

Merge: Add new memory mgr and validation

See merge request megasearch/milvus!200

Former-commit-id: aacab994009e7337f4429a5cf9efe4c690ac0240
......@@ -16,3 +16,4 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-1 - Add CHANGELOG.md
- MS-161 - Add CI / CD Module to Milvus Project
- MS-202 - Add Milvus Jenkins project email notification
......@@ -11,6 +11,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-149 - Fixed searching only one index file issue in distributed mode
- MS-153 - fix c_str error when connecting to MySQL
- MS-157 - fix changelog
- MS-190 - use env variable to switch mem manager and fix cmake
## Improvement
- MS-156 - Add unittest for merge result functions
......@@ -23,6 +24,9 @@ Please mark all change in change log and use the ticket from JIRA.
## New Feature
- MS-195 - Add nlist and use_blas_threshold conf
## New Feature
- MS-180 - Add new mem manager
## Task
- MS-125 - Create 0.3.1 release branch
......@@ -76,6 +80,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-144 - Add nprobe config
- MS-147 - Enable IVF
- MS-130 - Add prometheus_test
## Task
- MS-74 - Change README.md in cpp
- MS-88 - Add support for arm architecture
......
......@@ -525,7 +525,7 @@ if(MILVUS_BOOST_VENDORED)
""
${EP_LOG_OPTIONS})
set(Boost_INCLUDE_DIR "${BOOST_PREFIX}")
set(Boost_INCLUDE_DIRS "${BOOST_INCLUDE_DIR}")
set(Boost_INCLUDE_DIRS "${Boost_INCLUDE_DIR}")
add_dependencies(boost_system_static boost_ep)
add_dependencies(boost_filesystem_static boost_ep)
add_dependencies(boost_serialization_static boost_ep)
......
......@@ -16,6 +16,8 @@ db_config:
index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB
archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB
archive_days_threshold: 30 # files older than x days will be archived, unit: day
maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB.
# the sum of maximum_memory and cpu_cache_capacity should be less than total memory
metric_config:
is_startup: off # if monitoring start: on, off
......@@ -37,4 +39,4 @@ engine_config:
nprobe: 10
nlist: 16384
use_blas_threshold: 20
metric_type: L2 #L2 or Inner Product
\ No newline at end of file
metric_type: L2 #L2 or Inner Product
......@@ -13,6 +13,27 @@ DIR_LCOV_OUTPUT="lcov_out"
DIR_GCNO="cmake_build"
DIR_UNITTEST="milvus/bin"
MYSQL_USER_NAME=root
MYSQL_PASSWORD=Fantast1c
MYSQL_HOST='192.168.1.194'
MYSQL_PORT='3306'
MYSQL_DB_NAME=milvus_`date +%s%N`
function mysql_exc()
{
cmd=$1
mysql -h${MYSQL_HOST} -u${MYSQL_USER_NAME} -p${MYSQL_PASSWORD} -e "${cmd}"
if [ $? -ne 0 ]; then
echo "mysql $cmd run failed"
fi
}
mysql_exc "CREATE DATABASE IF NOT EXISTS ${MYSQL_DB_NAME};"
mysql_exc "GRANT ALL PRIVILEGES ON ${MYSQL_DB_NAME}.* TO '${MYSQL_USER_NAME}'@'%';"
mysql_exc "FLUSH PRIVILEGES;"
mysql_exc "USE ${MYSQL_DB_NAME};"
MYSQL_USER_NAME=root
MYSQL_PASSWORD=Fantast1c
......
......@@ -189,4 +189,4 @@ install(FILES
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4
DESTINATION lib) #need to copy libmysqlpp.so
add_subdirectory(sdk)
#add_subdirectory(sdk)
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
namespace zilliz {
namespace milvus {
namespace engine {
const size_t K = 1024UL;
const size_t M = K * K;
const size_t G = K * M;
const size_t T = K * G;
const size_t MAX_TABLE_FILE_MEM = 128 * M;
const int VECTOR_TYPE_SIZE = sizeof(float);
} // namespace engine
} // namespace milvus
} // namespace zilliz
......@@ -87,8 +87,7 @@ DBImpl::DBImpl(const Options& options)
compact_thread_pool_(1, 1),
index_thread_pool_(1, 1) {
meta_ptr_ = DBMetaImplFactory::Build(options.meta, options.mode);
mem_mgr_ = std::make_shared<MemManager>(meta_ptr_, options_);
// mem_mgr_ = (MemManagerPtr)(new MemManager(meta_ptr_, options_));
mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_);
if (options.mode != Options::MODE::READ_ONLY) {
StartTimerTasks();
}
......
......@@ -9,6 +9,7 @@
#include "MemManager.h"
#include "Types.h"
#include "utils/ThreadPool.h"
#include "MemManagerAbstract.h"
#include <mutex>
#include <condition_variable>
......@@ -33,7 +34,6 @@ class Meta;
class DBImpl : public DB {
public:
using MetaPtr = meta::Meta::Ptr;
using MemManagerPtr = typename MemManager::Ptr;
explicit DBImpl(const Options &options);
......@@ -127,7 +127,7 @@ class DBImpl : public DB {
std::thread bg_timer_thread_;
MetaPtr meta_ptr_;
MemManagerPtr mem_mgr_;
MemManagerAbstractPtr mem_mgr_;
server::ThreadPool compact_thread_pool_;
std::list<std::future<void>> compact_thread_results_;
......
......@@ -3,10 +3,14 @@
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include "Factories.h"
#include "DBImpl.h"
#include "MemManager.h"
#include "NewMemManager.h"
#include "Exception.h"
#include <stdlib.h>
#include <time.h>
#include <sstream>
#include <iostream>
......@@ -14,7 +18,9 @@
#include <assert.h>
#include <easylogging++.h>
#include <regex>
#include "Exception.h"
#include <cstdlib>
#include <string>
#include <algorithm>
namespace zilliz {
namespace milvus {
......@@ -72,17 +78,14 @@ std::shared_ptr<meta::Meta> DBMetaImplFactory::Build(const DBMetaOptions& metaOp
if (dialect.find("mysql") != std::string::npos) {
ENGINE_LOG_INFO << "Using MySQL";
return std::make_shared<meta::MySQLMetaImpl>(meta::MySQLMetaImpl(metaOptions, mode));
}
else if (dialect.find("sqlite") != std::string::npos) {
} else if (dialect.find("sqlite") != std::string::npos) {
ENGINE_LOG_INFO << "Using SQLite";
return std::make_shared<meta::DBMetaImpl>(meta::DBMetaImpl(metaOptions));
}
else {
} else {
ENGINE_LOG_ERROR << "Invalid dialect in URI: dialect = " << dialect;
throw InvalidArgumentException("URI dialect is not mysql / sqlite");
}
}
else {
} else {
ENGINE_LOG_ERROR << "Wrong URI format: URI = " << uri;
throw InvalidArgumentException("Wrong URI format ");
}
......@@ -98,6 +101,21 @@ DB* DBFactory::Build(const Options& options) {
return new DBImpl(options);
}
MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr<meta::Meta>& meta,
const Options& options) {
if (const char* env = getenv("MILVUS_USE_OLD_MEM_MANAGER")) {
std::string env_str = env;
std::transform(env_str.begin(), env_str.end(), env_str.begin(), ::toupper);
if (env_str == "ON") {
return std::make_shared<MemManager>(meta, options);
}
else {
return std::make_shared<NewMemManager>(meta, options);
}
}
return std::make_shared<NewMemManager>(meta, options);
}
} // namespace engine
} // namespace milvus
} // namespace zilliz
......@@ -10,16 +10,18 @@
#include "MySQLMetaImpl.h"
#include "Options.h"
#include "ExecutionEngine.h"
#include "MemManagerAbstract.h"
#include <string>
#include <memory>
namespace zilliz {
namespace milvus {
namespace engine {
struct DBMetaOptionsFactory {
static DBMetaOptions Build(const std::string& path = "");
static DBMetaOptions Build(const std::string &path = "");
};
struct OptionsFactory {
......@@ -28,12 +30,16 @@ struct OptionsFactory {
struct DBMetaImplFactory {
static std::shared_ptr<meta::DBMetaImpl> Build();
static std::shared_ptr<meta::Meta> Build(const DBMetaOptions& metaOptions, const int& mode);
static std::shared_ptr<meta::Meta> Build(const DBMetaOptions &metaOptions, const int &mode);
};
struct DBFactory {
static std::shared_ptr<DB> Build();
static DB* Build(const Options&);
static DB *Build(const Options &);
};
struct MemManagerFactory {
static MemManagerAbstractPtr Build(const std::shared_ptr<meta::Meta> &meta, const Options &options);
};
} // namespace engine
......
......@@ -8,28 +8,30 @@
#include "MetaConsts.h"
#include "EngineFactory.h"
#include "metrics/Metrics.h"
#include "Log.h"
#include <iostream>
#include <sstream>
#include <thread>
#include <easylogging++.h>
namespace zilliz {
namespace milvus {
namespace engine {
MemVectors::MemVectors(const std::shared_ptr<meta::Meta>& meta_ptr,
const meta::TableFileSchema& schema, const Options& options)
: meta_(meta_ptr),
options_(options),
schema_(schema),
id_generator_(new SimpleIDGenerator()),
active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) {
MemVectors::MemVectors(const std::shared_ptr<meta::Meta> &meta_ptr,
const meta::TableFileSchema &schema, const Options &options)
: meta_(meta_ptr),
options_(options),
schema_(schema),
id_generator_(new SimpleIDGenerator()),
active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType) schema_.engine_type_)) {
}
Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
if(active_engine_ == nullptr) {
Status MemVectors::Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_) {
if (active_engine_ == nullptr) {
return Status::Error("index engine is null");
}
......@@ -38,13 +40,15 @@ Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_)
Status status = active_engine_->AddWithIds(n_, vectors_, vector_ids_.data());
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_), static_cast<int>(schema_.dimension_), total_time);
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_),
static_cast<int>(schema_.dimension_),
total_time);
return status;
}
size_t MemVectors::RowCount() const {
if(active_engine_ == nullptr) {
if (active_engine_ == nullptr) {
return 0;
}
......@@ -52,15 +56,15 @@ size_t MemVectors::RowCount() const {
}
size_t MemVectors::Size() const {
if(active_engine_ == nullptr) {
if (active_engine_ == nullptr) {
return 0;
}
return active_engine_->Size();
}
Status MemVectors::Serialize(std::string& table_id) {
if(active_engine_ == nullptr) {
Status MemVectors::Serialize(std::string &table_id) {
if (active_engine_ == nullptr) {
return Status::Error("index engine is null");
}
......@@ -72,15 +76,16 @@ Status MemVectors::Serialize(std::string& table_id) {
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
schema_.size_ = size;
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size/total_time);
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size / total_time);
schema_.file_type_ = (size >= options_.index_trigger_size) ?
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
auto status = meta_->UpdateTableFile(schema_);
LOG(DEBUG) << "New " << ((schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index")
<< " file " << schema_.file_id_ << " of size " << (double)(active_engine_->Size()) / (double)meta::M << " M";
<< " file " << schema_.file_id_ << " of size " << (double) (active_engine_->Size()) / (double) meta::M
<< " M";
active_engine_->Cache();
......@@ -98,7 +103,7 @@ MemVectors::~MemVectors() {
* MemManager
*/
MemManager::MemVectorsPtr MemManager::GetMemByTable(
const std::string& table_id) {
const std::string &table_id) {
auto memIt = mem_id_map_.find(table_id);
if (memIt != mem_id_map_.end()) {
return memIt->second;
......@@ -115,26 +120,31 @@ MemManager::MemVectorsPtr MemManager::GetMemByTable(
return mem_id_map_[table_id];
}
Status MemManager::InsertVectors(const std::string& table_id_,
size_t n_,
const float* vectors_,
IDNumbers& vector_ids_) {
Status MemManager::InsertVectors(const std::string &table_id_,
size_t n_,
const float *vectors_,
IDNumbers &vector_ids_) {
LOG(DEBUG) << "MemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() <<
", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem();
std::unique_lock<std::mutex> lock(mutex_);
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
}
Status MemManager::InsertVectorsNoLock(const std::string& table_id,
size_t n,
const float* vectors,
IDNumbers& vector_ids) {
Status MemManager::InsertVectorsNoLock(const std::string &table_id,
size_t n,
const float *vectors,
IDNumbers &vector_ids) {
MemVectorsPtr mem = GetMemByTable(table_id);
if (mem == nullptr) {
return Status::NotFound("Group " + table_id + " not found!");
}
//makesure each file size less than index_trigger_size
if(mem->Size() > options_.index_trigger_size) {
if (mem->Size() > options_.index_trigger_size) {
std::unique_lock<std::mutex> lock(serialization_mtx_);
immu_mem_list_.push_back(mem);
mem_id_map_.erase(table_id);
......@@ -147,8 +157,8 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id,
Status MemManager::ToImmutable() {
std::unique_lock<std::mutex> lock(mutex_);
MemIdMap temp_map;
for (auto& kv: mem_id_map_) {
if(kv.second->RowCount() == 0) {
for (auto &kv: mem_id_map_) {
if (kv.second->RowCount() == 0) {
temp_map.insert(kv);
continue;//empty vector, no need to serialize
}
......@@ -159,12 +169,12 @@ Status MemManager::ToImmutable() {
return Status::OK();
}
Status MemManager::Serialize(std::set<std::string>& table_ids) {
Status MemManager::Serialize(std::set<std::string> &table_ids) {
ToImmutable();
std::unique_lock<std::mutex> lock(serialization_mtx_);
std::string table_id;
table_ids.clear();
for (auto& mem : immu_mem_list_) {
for (auto &mem : immu_mem_list_) {
mem->Serialize(table_id);
table_ids.insert(table_id);
}
......@@ -172,7 +182,7 @@ Status MemManager::Serialize(std::set<std::string>& table_ids) {
return Status::OK();
}
Status MemManager::EraseMemVector(const std::string& table_id) {
Status MemManager::EraseMemVector(const std::string &table_id) {
{//erase MemVector from rapid-insert cache
std::unique_lock<std::mutex> lock(mutex_);
mem_id_map_.erase(table_id);
......@@ -181,8 +191,8 @@ Status MemManager::EraseMemVector(const std::string& table_id) {
{//erase MemVector from serialize cache
std::unique_lock<std::mutex> lock(serialization_mtx_);
MemList temp_list;
for (auto& mem : immu_mem_list_) {
if(mem->TableId() != table_id) {
for (auto &mem : immu_mem_list_) {
if (mem->TableId() != table_id) {
temp_list.push_back(mem);
}
}
......@@ -192,6 +202,26 @@ Status MemManager::EraseMemVector(const std::string& table_id) {
return Status::OK();
}
size_t MemManager::GetCurrentMutableMem() {
size_t totalMem = 0;
for (auto &kv : mem_id_map_) {
auto memVector = kv.second;
totalMem += memVector->Size();
}
return totalMem;
}
size_t MemManager::GetCurrentImmutableMem() {
size_t totalMem = 0;
for (auto &memVector : immu_mem_list_) {
totalMem += memVector->Size();
}
return totalMem;
}
size_t MemManager::GetCurrentMem() {
return GetCurrentMutableMem() + GetCurrentImmutableMem();
}
} // namespace engine
} // namespace milvus
......
......@@ -9,6 +9,7 @@
#include "IDGenerator.h"
#include "Status.h"
#include "Meta.h"
#include "MemManagerAbstract.h"
#include <map>
#include <string>
......@@ -17,72 +18,79 @@
#include <mutex>
#include <set>
namespace zilliz {
namespace milvus {
namespace engine {
namespace meta {
class Meta;
class Meta;
}
class MemVectors {
public:
public:
using MetaPtr = meta::Meta::Ptr;
using Ptr = std::shared_ptr<MemVectors>;
explicit MemVectors(const std::shared_ptr<meta::Meta>&,
const meta::TableFileSchema&, const Options&);
explicit MemVectors(const std::shared_ptr<meta::Meta> &,
const meta::TableFileSchema &, const Options &);
Status Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_);
Status Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_);
size_t RowCount() const;
size_t Size() const;
Status Serialize(std::string& table_id);
Status Serialize(std::string &table_id);
~MemVectors();
const std::string& Location() const { return schema_.location_; }
const std::string &Location() const { return schema_.location_; }
std::string TableId() const { return schema_.table_id_; }
private:
private:
MemVectors() = delete;
MemVectors(const MemVectors&) = delete;
MemVectors& operator=(const MemVectors&) = delete;
MemVectors(const MemVectors &) = delete;
MemVectors &operator=(const MemVectors &) = delete;
MetaPtr meta_;
Options options_;
meta::TableFileSchema schema_;
IDGenerator* id_generator_;
IDGenerator *id_generator_;
ExecutionEnginePtr active_engine_;
}; // MemVectors
class MemManager {
public:
class MemManager : public MemManagerAbstract {
public:
using MetaPtr = meta::Meta::Ptr;
using MemVectorsPtr = typename MemVectors::Ptr;
using Ptr = std::shared_ptr<MemManager>;
MemManager(const std::shared_ptr<meta::Meta>& meta, const Options& options)
MemManager(const std::shared_ptr<meta::Meta> &meta, const Options &options)
: meta_(meta), options_(options) {}
MemVectorsPtr GetMemByTable(const std::string& table_id);
Status InsertVectors(const std::string &table_id,
size_t n, const float *vectors, IDNumbers &vector_ids) override;
Status Serialize(std::set<std::string> &table_ids) override;
Status EraseMemVector(const std::string &table_id) override;
size_t GetCurrentMutableMem() override;
Status InsertVectors(const std::string& table_id,
size_t n, const float* vectors, IDNumbers& vector_ids);
size_t GetCurrentImmutableMem() override;
Status Serialize(std::set<std::string>& table_ids);
size_t GetCurrentMem() override;
Status EraseMemVector(const std::string& table_id);
private:
MemVectorsPtr GetMemByTable(const std::string &table_id);
private:
Status InsertVectorsNoLock(const std::string& table_id,
size_t n, const float* vectors, IDNumbers& vector_ids);
Status InsertVectorsNoLock(const std::string &table_id,
size_t n, const float *vectors, IDNumbers &vector_ids);
Status ToImmutable();
using MemIdMap = std::map<std::string, MemVectorsPtr>;
......
#pragma once
#include <set>
namespace zilliz {
namespace milvus {
namespace engine {
class MemManagerAbstract {
public:
virtual Status InsertVectors(const std::string &table_id,
size_t n, const float *vectors, IDNumbers &vector_ids) = 0;
virtual Status Serialize(std::set<std::string> &table_ids) = 0;
virtual Status EraseMemVector(const std::string &table_id) = 0;
virtual size_t GetCurrentMutableMem() = 0;
virtual size_t GetCurrentImmutableMem() = 0;
virtual size_t GetCurrentMem() = 0;
}; // MemManagerAbstract
using MemManagerAbstractPtr = std::shared_ptr<MemManagerAbstract>;
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#include "MemTable.h"
#include "Log.h"
namespace zilliz {
namespace milvus {
namespace engine {
MemTable::MemTable(const std::string &table_id,
const std::shared_ptr<meta::Meta> &meta,
const Options &options) :
table_id_(table_id),
meta_(meta),
options_(options) {
}
Status MemTable::Add(VectorSource::Ptr &source) {
while (!source->AllAdded()) {
MemTableFile::Ptr current_mem_table_file;
if (!mem_table_file_list_.empty()) {
current_mem_table_file = mem_table_file_list_.back();
}
Status status;
if (mem_table_file_list_.empty() || current_mem_table_file->IsFull()) {
MemTableFile::Ptr new_mem_table_file = std::make_shared<MemTableFile>(table_id_, meta_, options_);
status = new_mem_table_file->Add(source);
if (status.ok()) {
mem_table_file_list_.emplace_back(new_mem_table_file);
}
} else {
status = current_mem_table_file->Add(source);
}
if (!status.ok()) {
std::string err_msg = "MemTable::Add failed: " + status.ToString();
ENGINE_LOG_ERROR << err_msg;
return Status::Error(err_msg);
}
}
return Status::OK();
}
void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file) {
mem_table_file = mem_table_file_list_.back();
}
size_t MemTable::GetTableFileCount() {
return mem_table_file_list_.size();
}
Status MemTable::Serialize() {
for (auto mem_table_file = mem_table_file_list_.begin(); mem_table_file != mem_table_file_list_.end();) {
auto status = (*mem_table_file)->Serialize();
if (!status.ok()) {
std::string err_msg = "MemTable::Serialize failed: " + status.ToString();
ENGINE_LOG_ERROR << err_msg;
return Status::Error(err_msg);
}
std::lock_guard<std::mutex> lock(mutex_);
mem_table_file = mem_table_file_list_.erase(mem_table_file);
}
return Status::OK();
}
bool MemTable::Empty() {
return mem_table_file_list_.empty();
}
const std::string &MemTable::GetTableId() const {
return table_id_;
}
size_t MemTable::GetCurrentMem() {
std::lock_guard<std::mutex> lock(mutex_);
size_t total_mem = 0;
for (auto &mem_table_file : mem_table_file_list_) {
total_mem += mem_table_file->GetCurrentMem();
}
return total_mem;
}
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#pragma once
#include "Status.h"
#include "MemTableFile.h"
#include "VectorSource.h"
#include <mutex>
namespace zilliz {
namespace milvus {
namespace engine {
class MemTable {
public:
using Ptr = std::shared_ptr<MemTable>;
using MemTableFileList = std::vector<MemTableFile::Ptr>;
using MetaPtr = meta::Meta::Ptr;
MemTable(const std::string &table_id, const std::shared_ptr<meta::Meta> &meta, const Options &options);
Status Add(VectorSource::Ptr &source);
void GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file);
size_t GetTableFileCount();
Status Serialize();
bool Empty();
const std::string &GetTableId() const;
size_t GetCurrentMem();
private:
const std::string table_id_;
MemTableFileList mem_table_file_list_;
MetaPtr meta_;
Options options_;
std::mutex mutex_;
}; //MemTable
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#include "MemTableFile.h"
#include "Constants.h"
#include "Log.h"
#include "EngineFactory.h"
#include "metrics/Metrics.h"
#include <cmath>
namespace zilliz {
namespace milvus {
namespace engine {
MemTableFile::MemTableFile(const std::string &table_id,
const std::shared_ptr<meta::Meta> &meta,
const Options &options) :
table_id_(table_id),
meta_(meta),
options_(options) {
current_mem_ = 0;
auto status = CreateTableFile();
if (status.ok()) {
execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_,
table_file_schema_.location_,
(EngineType) table_file_schema_.engine_type_);
}
}
Status MemTableFile::CreateTableFile() {
meta::TableFileSchema table_file_schema;
table_file_schema.table_id_ = table_id_;
auto status = meta_->CreateTableFile(table_file_schema);
if (status.ok()) {
table_file_schema_ = table_file_schema;
} else {
std::string err_msg = "MemTableFile::CreateTableFile failed: " + status.ToString();
ENGINE_LOG_ERROR << err_msg;
}
return status;
}
Status MemTableFile::Add(const VectorSource::Ptr &source) {
if (table_file_schema_.dimension_ <= 0) {
std::string err_msg = "MemTableFile::Add: table_file_schema dimension = " +
std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_;
ENGINE_LOG_ERROR << err_msg;
return Status::Error(err_msg);
}
size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE;
size_t mem_left = GetMemLeft();
if (mem_left >= single_vector_mem_size) {
size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size);
size_t num_vectors_added;
auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added);
if (status.ok()) {
current_mem_ += (num_vectors_added * single_vector_mem_size);
}
return status;
}
return Status::OK();
}
size_t MemTableFile::GetCurrentMem() {
return current_mem_;
}
size_t MemTableFile::GetMemLeft() {
return (MAX_TABLE_FILE_MEM - current_mem_);
}
bool MemTableFile::IsFull() {
size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE;
return (GetMemLeft() < single_vector_mem_size);
}
Status MemTableFile::Serialize() {
auto start_time = METRICS_NOW_TIME;
auto size = GetCurrentMem();
execution_engine_->Serialize();
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
table_file_schema_.size_ = size;
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time);
table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ?
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
auto status = meta_->UpdateTableFile(table_file_schema_);
LOG(DEBUG) << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index")
<< " file " << table_file_schema_.file_id_ << " of size " << (double) size / (double) M << " M";
execution_engine_->Cache();
return status;
}
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#pragma once
#include "Status.h"
#include "Meta.h"
#include "VectorSource.h"
#include "ExecutionEngine.h"
namespace zilliz {
namespace milvus {
namespace engine {
class MemTableFile {
public:
using Ptr = std::shared_ptr<MemTableFile>;
using MetaPtr = meta::Meta::Ptr;
MemTableFile(const std::string &table_id, const std::shared_ptr<meta::Meta> &meta, const Options &options);
Status Add(const VectorSource::Ptr &source);
size_t GetCurrentMem();
size_t GetMemLeft();
bool IsFull();
Status Serialize();
private:
Status CreateTableFile();
const std::string table_id_;
meta::TableFileSchema table_file_schema_;
MetaPtr meta_;
Options options_;
size_t current_mem_;
ExecutionEnginePtr execution_engine_;
}; //MemTableFile
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#include "NewMemManager.h"
#include "VectorSource.h"
#include "Log.h"
#include "Constants.h"
#include <thread>
namespace zilliz {
namespace milvus {
namespace engine {
NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string &table_id) {
auto memIt = mem_id_map_.find(table_id);
if (memIt != mem_id_map_.end()) {
return memIt->second;
}
mem_id_map_[table_id] = std::make_shared<MemTable>(table_id, meta_, options_);
return mem_id_map_[table_id];
}
Status NewMemManager::InsertVectors(const std::string &table_id_,
size_t n_,
const float *vectors_,
IDNumbers &vector_ids_) {
while (GetCurrentMem() > options_.maximum_memory) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
LOG(DEBUG) << "NewMemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() <<
", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem();
std::unique_lock<std::mutex> lock(mutex_);
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
}
Status NewMemManager::InsertVectorsNoLock(const std::string &table_id,
size_t n,
const float *vectors,
IDNumbers &vector_ids) {
MemTablePtr mem = GetMemByTable(table_id);
VectorSource::Ptr source = std::make_shared<VectorSource>(n, vectors);
auto status = mem->Add(source);
if (status.ok()) {
vector_ids = source->GetVectorIds();
}
return status;
}
Status NewMemManager::ToImmutable() {
std::unique_lock<std::mutex> lock(mutex_);
MemIdMap temp_map;
for (auto &kv: mem_id_map_) {
if (kv.second->Empty()) {
//empty table, no need to serialize
temp_map.insert(kv);
} else {
immu_mem_list_.push_back(kv.second);
}
}
mem_id_map_.swap(temp_map);
return Status::OK();
}
Status NewMemManager::Serialize(std::set<std::string> &table_ids) {
ToImmutable();
std::unique_lock<std::mutex> lock(serialization_mtx_);
table_ids.clear();
for (auto &mem : immu_mem_list_) {
mem->Serialize();
table_ids.insert(mem->GetTableId());
}
immu_mem_list_.clear();
return Status::OK();
}
Status NewMemManager::EraseMemVector(const std::string &table_id) {
{//erase MemVector from rapid-insert cache
std::unique_lock<std::mutex> lock(mutex_);
mem_id_map_.erase(table_id);
}
{//erase MemVector from serialize cache
std::unique_lock<std::mutex> lock(serialization_mtx_);
MemList temp_list;
for (auto &mem : immu_mem_list_) {
if (mem->GetTableId() != table_id) {
temp_list.push_back(mem);
}
}
immu_mem_list_.swap(temp_list);
}
return Status::OK();
}
size_t NewMemManager::GetCurrentMutableMem() {
size_t total_mem = 0;
for (auto &kv : mem_id_map_) {
auto memTable = kv.second;
total_mem += memTable->GetCurrentMem();
}
return total_mem;
}
size_t NewMemManager::GetCurrentImmutableMem() {
size_t total_mem = 0;
for (auto &mem_table : immu_mem_list_) {
total_mem += mem_table->GetCurrentMem();
}
return total_mem;
}
size_t NewMemManager::GetCurrentMem() {
return GetCurrentMutableMem() + GetCurrentImmutableMem();
}
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#pragma once
#include "Meta.h"
#include "MemTable.h"
#include "Status.h"
#include "MemManagerAbstract.h"
#include <map>
#include <string>
#include <ctime>
#include <memory>
#include <mutex>
namespace zilliz {
namespace milvus {
namespace engine {
class NewMemManager : public MemManagerAbstract {
public:
using MetaPtr = meta::Meta::Ptr;
using Ptr = std::shared_ptr<NewMemManager>;
using MemTablePtr = typename MemTable::Ptr;
NewMemManager(const std::shared_ptr<meta::Meta> &meta, const Options &options)
: meta_(meta), options_(options) {}
Status InsertVectors(const std::string &table_id,
size_t n, const float *vectors, IDNumbers &vector_ids) override;
Status Serialize(std::set<std::string> &table_ids) override;
Status EraseMemVector(const std::string &table_id) override;
size_t GetCurrentMutableMem() override;
size_t GetCurrentImmutableMem() override;
size_t GetCurrentMem() override;
private:
MemTablePtr GetMemByTable(const std::string &table_id);
Status InsertVectorsNoLock(const std::string &table_id,
size_t n, const float *vectors, IDNumbers &vector_ids);
Status ToImmutable();
using MemIdMap = std::map<std::string, MemTablePtr>;
using MemList = std::vector<MemTablePtr>;
MemIdMap mem_id_map_;
MemList immu_mem_list_;
MetaPtr meta_;
Options options_;
std::mutex mutex_;
std::mutex serialization_mtx_;
}; // NewMemManager
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
......@@ -63,6 +63,7 @@ struct Options {
size_t index_trigger_size = ONE_GB; //unit: byte
DBMetaOptions meta;
int mode = MODE::SINGLE;
float maximum_memory = 4 * ONE_GB;
}; // Options
......
#include "VectorSource.h"
#include "ExecutionEngine.h"
#include "EngineFactory.h"
#include "Log.h"
#include "metrics/Metrics.h"
namespace zilliz {
namespace milvus {
namespace engine {
VectorSource::VectorSource(const size_t &n,
const float *vectors) :
n_(n),
vectors_(vectors),
id_generator_(new SimpleIDGenerator()) {
current_num_vectors_added = 0;
}
Status VectorSource::Add(const ExecutionEnginePtr &execution_engine,
const meta::TableFileSchema &table_file_schema,
const size_t &num_vectors_to_add,
size_t &num_vectors_added) {
auto start_time = METRICS_NOW_TIME;
num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ?
num_vectors_to_add : n_ - current_num_vectors_added;
IDNumbers vector_ids_to_add;
id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add);
Status status = execution_engine->AddWithIds(num_vectors_added,
vectors_ + current_num_vectors_added * table_file_schema.dimension_,
vector_ids_to_add.data());
if (status.ok()) {
current_num_vectors_added += num_vectors_added;
vector_ids_.insert(vector_ids_.end(),
std::make_move_iterator(vector_ids_to_add.begin()),
std::make_move_iterator(vector_ids_to_add.end()));
} else {
ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString();
}
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast<int>(n_),
static_cast<int>(table_file_schema.dimension_),
total_time);
return status;
}
size_t VectorSource::GetNumVectorsAdded() {
return current_num_vectors_added;
}
bool VectorSource::AllAdded() {
return (current_num_vectors_added == n_);
}
IDNumbers VectorSource::GetVectorIds() {
return vector_ids_;
}
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
#pragma once
#include "Status.h"
#include "Meta.h"
#include "IDGenerator.h"
#include "ExecutionEngine.h"
namespace zilliz {
namespace milvus {
namespace engine {
class VectorSource {
public:
using Ptr = std::shared_ptr<VectorSource>;
VectorSource(const size_t &n, const float *vectors);
Status Add(const ExecutionEnginePtr &execution_engine,
const meta::TableFileSchema &table_file_schema,
const size_t &num_vectors_to_add,
size_t &num_vectors_added);
size_t GetNumVectorsAdded();
bool AllAdded();
IDNumbers GetVectorIds();
private:
const size_t n_;
const float *vectors_;
IDNumbers vector_ids_;
size_t current_num_vectors_added;
IDGenerator *id_generator_;
}; //VectorSource
} // namespace engine
} // namespace milvus
} // namespace zilliz
\ No newline at end of file
......@@ -28,6 +28,14 @@ DBWrapper::DBWrapper() {
if(index_size > 0) {//ensure larger than zero, unit is MB
opt.index_trigger_size = (size_t)index_size * engine::ONE_MB;
}
float maximum_memory = config.GetFloatValue(CONFIG_MAXMIMUM_MEMORY);
if (maximum_memory > 1.0) {
opt.maximum_memory = maximum_memory * engine::ONE_GB;
}
else {
std::cout << "ERROR: maximum_memory should be at least 1 GB" << std::endl;
kill(0, SIGUSR1);
}
ConfigNode& serverConfig = ServerConfig::GetInstance().GetConfig(CONFIG_SERVER);
std::string mode = serverConfig.GetValue(CONFIG_CLUSTER_MODE, "single");
......
......@@ -8,6 +8,7 @@
#include "ServerConfig.h"
#include "ThreadPoolServer.h"
#include "DBWrapper.h"
#include "utils/Log.h"
#include "milvus_types.h"
#include "milvus_constants.h"
......
......@@ -8,6 +8,7 @@
#include "utils/CommonUtil.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
#include "utils/ValidationUtil.h"
#include "DBWrapper.h"
#include "version.h"
......@@ -110,7 +111,13 @@ namespace {
}
long days = (tt_end > tt_start) ? (tt_end - tt_start)/DAY_SECONDS : (tt_start - tt_end)/DAY_SECONDS;
for(long i = 0; i <= days; i++) {
if(days == 0) {
error_code = SERVER_INVALID_TIME_RANGE;
error_msg = "Invalid time range: " + range.start_value + " to " + range.end_value;
return ;
}
for(long i = 0; i < days; i++) {
time_t tt_day = tt_start + DAY_SECONDS*i;
tm tm_day;
CommonUtil::ConvertTime(tt_day, tm_day);
......@@ -138,16 +145,20 @@ ServerError CreateTableTask::OnExecute() {
try {
//step 1: check arguments
if(schema_.table_name.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(schema_.table_name);
if(res != SERVER_SUCCESS) {
return res;
}
if(schema_.dimension <= 0) {
return SetError(SERVER_INVALID_TABLE_DIMENSION, "Invalid table dimension: " + std::to_string(schema_.dimension));
res = ValidateTableDimension(schema_.dimension);
if(res != SERVER_SUCCESS) {
return res;
}
engine::EngineType engine_type = EngineType(schema_.index_type);
if(engine_type == engine::EngineType::INVALID) {
return SetError(SERVER_INVALID_INDEX_TYPE, "Invalid index type: " + std::to_string(schema_.index_type));
res = ValidateTableIndexType(schema_.index_type);
if(res != SERVER_SUCCESS) {
return res;
}
//step 2: construct table schema
......@@ -189,8 +200,10 @@ ServerError DescribeTableTask::OnExecute() {
try {
//step 1: check arguments
if(table_name_.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(table_name_);
if(res != SERVER_SUCCESS) {
return res;
}
//step 2: get table info
......@@ -265,10 +278,11 @@ ServerError HasTableTask::OnExecute() {
TimeRecorder rc("HasTableTask");
//step 1: check arguments
if(table_name_.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(table_name_);
if(res != SERVER_SUCCESS) {
return res;
}
//step 2: check table existence
engine::Status stat = DBWrapper::DB()->HasTable(table_name_, has_table_);
if(!stat.ok()) {
......@@ -299,8 +313,10 @@ ServerError DeleteTableTask::OnExecute() {
TimeRecorder rc("DeleteTableTask");
//step 1: check arguments
if (table_name_.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(table_name_);
if(res != SERVER_SUCCESS) {
return res;
}
//step 2: check table existence
......@@ -325,7 +341,7 @@ ServerError DeleteTableTask::OnExecute() {
}
rc.Record("deleta table");
rc.Elapse("totally cost");
rc.Elapse("total cost");
} catch (std::exception& ex) {
return SetError(SERVER_UNEXPECTED_ERROR, ex.what());
}
......@@ -381,8 +397,10 @@ ServerError AddVectorTask::OnExecute() {
TimeRecorder rc("AddVectorTask");
//step 1: check arguments
if (table_name_.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(table_name_);
if(res != SERVER_SUCCESS) {
return res;
}
if(record_array_.empty()) {
......@@ -429,7 +447,7 @@ ServerError AddVectorTask::OnExecute() {
}
rc.Record("do insert");
rc.Elapse("totally cost");
rc.Elapse("total cost");
} catch (std::exception& ex) {
return SetError(SERVER_UNEXPECTED_ERROR, ex.what());
......@@ -470,8 +488,10 @@ ServerError SearchVectorTask::OnExecute() {
TimeRecorder rc("SearchVectorTask");
//step 1: check arguments
if (table_name_.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(table_name_);
if(res != SERVER_SUCCESS) {
return res;
}
if(top_k_ <= 0) {
......@@ -557,7 +577,7 @@ ServerError SearchVectorTask::OnExecute() {
result_array_.emplace_back(thrift_topk_result);
}
rc.Record("construct result");
rc.Elapse("totally cost");
rc.Elapse("total cost");
} catch (std::exception& ex) {
return SetError(SERVER_UNEXPECTED_ERROR, ex.what());
......@@ -583,8 +603,10 @@ ServerError GetTableRowCountTask::OnExecute() {
TimeRecorder rc("GetTableRowCountTask");
//step 1: check arguments
if (table_name_.empty()) {
return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name");
ServerError res = SERVER_SUCCESS;
res = ValidateTableName(table_name_);
if(res != SERVER_SUCCESS) {
return res;
}
//step 2: get row count
......@@ -596,7 +618,7 @@ ServerError GetTableRowCountTask::OnExecute() {
row_count_ = (int64_t) row_count;
rc.Elapse("totally cost");
rc.Elapse("total cost");
} catch (std::exception& ex) {
return SetError(SERVER_UNEXPECTED_ERROR, ex.what());
......
......@@ -27,6 +27,7 @@ static const std::string CONFIG_DB_SLAVE_PATH = "db_slave_path";
static const std::string CONFIG_DB_INDEX_TRIGGER_SIZE = "index_building_threshold";
static const std::string CONFIG_DB_ARCHIVE_DISK = "archive_disk_threshold";
static const std::string CONFIG_DB_ARCHIVE_DAYS = "archive_days_threshold";
static const std::string CONFIG_MAXMIMUM_MEMORY = "maximum_memory";
static const std::string CONFIG_LOG = "log_config";
......
......@@ -55,7 +55,6 @@ constexpr ServerError SERVER_LICENSE_VALIDATION_FAIL = ToGlobalServerErrorCode(5
constexpr ServerError DB_META_TRANSACTION_FAILED = ToGlobalServerErrorCode(1000);
class ServerException : public std::exception {
public:
ServerException(ServerError error_code,
......
#include <src/db/ExecutionEngine.h>
#include "ValidationUtil.h"
#include "Log.h"
namespace zilliz {
namespace milvus {
namespace server {
constexpr size_t table_name_size_limit = 255;
constexpr int64_t table_dimension_limit = 16384;
ServerError
ValidateTableName(const std::string &table_name) {
// Table name shouldn't be empty.
if (table_name.empty()) {
SERVER_LOG_ERROR << "Empty table name";
return SERVER_INVALID_TABLE_NAME;
}
// Table name size shouldn't exceed 16384.
if (table_name.size() > table_name_size_limit) {
SERVER_LOG_ERROR << "Table name size exceed the limitation";
return SERVER_INVALID_TABLE_NAME;
}
// Table name first character should be underscore or character.
char first_char = table_name[0];
if (first_char != '_' && std::isalpha(first_char) == 0) {
SERVER_LOG_ERROR << "Table name first character isn't underscore or character: " << first_char;
return SERVER_INVALID_TABLE_NAME;
}
int64_t table_name_size = table_name.size();
for (int64_t i = 1; i < table_name_size; ++i) {
char name_char = table_name[i];
if (name_char != '_' && std::isalnum(name_char) == 0) {
SERVER_LOG_ERROR << "Table name character isn't underscore or alphanumber: " << name_char;
return SERVER_INVALID_TABLE_NAME;
}
}
return SERVER_SUCCESS;
}
ServerError
ValidateTableDimension(int64_t dimension) {
if (dimension <= 0 || dimension > table_dimension_limit) {
SERVER_LOG_ERROR << "Table dimension excceed the limitation: " << table_dimension_limit;
return SERVER_INVALID_VECTOR_DIMENSION;
} else {
return SERVER_SUCCESS;
}
}
ServerError
ValidateTableIndexType(int32_t index_type) {
auto engine_type = engine::EngineType(index_type);
switch (engine_type) {
case engine::EngineType::FAISS_IDMAP:
case engine::EngineType::FAISS_IVFFLAT: {
SERVER_LOG_DEBUG << "Index type: " << index_type;
return SERVER_SUCCESS;
}
default: {
return SERVER_INVALID_INDEX_TYPE;
}
}
}
}
}
}
\ No newline at end of file
#pragma once
#include "Error.h"
namespace zilliz {
namespace milvus {
namespace server {
ServerError
ValidateTableName(const std::string& table_name);
ServerError
ValidateTableDimension(int64_t dimension);
ServerError
ValidateTableIndexType(int32_t index_type);
}
}
}
\ No newline at end of file
......@@ -12,7 +12,6 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files)
set(unittest_srcs
${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
#${EASYLOGGINGPP_INCLUDE_DIR}/easylogging++.cc)
set(require_files
${MILVUS_ENGINE_SRC}/server/ServerConfig.cpp
......@@ -44,4 +43,5 @@ add_subdirectory(db)
add_subdirectory(faiss_wrapper)
#add_subdirectory(license)
add_subdirectory(metrics)
add_subdirectory(storage)
\ No newline at end of file
add_subdirectory(storage)
add_subdirectory(utils)
\ No newline at end of file
......@@ -23,6 +23,8 @@ link_directories("/usr/local/cuda/lib64")
include_directories(/usr/include/mysql)
#add_definitions(-DBOOST_ERROR_CODE_HEADER_ONLY)
set(db_test_src
#${unittest_srcs}
${config_files}
......@@ -40,9 +42,9 @@ set(db_libs
faiss
cudart
cublas
sqlite3
boost_system
boost_filesystem
sqlite
boost_system_static
boost_filesystem_static
lz4
mysqlpp
)
......
......@@ -3,17 +3,20 @@
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include <thread>
#include <easylogging++.h>
#include <boost/filesystem.hpp>
#include "utils.h"
#include "db/DB.h"
#include "db/DBImpl.h"
#include "db/MetaConsts.h"
#include "db/Factories.h"
#include <gtest/gtest.h>
#include <easylogging++.h>
#include <boost/filesystem.hpp>
#include <thread>
#include <random>
using namespace zilliz::milvus;
namespace {
......
#include "gtest/gtest.h"
#include "db/VectorSource.h"
#include "db/MemTableFile.h"
#include "db/MemTable.h"
#include "utils.h"
#include "db/Factories.h"
#include "db/Constants.h"
#include "db/EngineFactory.h"
#include "metrics/Metrics.h"
#include "db/MetaConsts.h"
#include "boost/filesystem.hpp"
#include <thread>
#include <fstream>
#include <iostream>
#include <cmath>
#include <random>
using namespace zilliz::milvus;
namespace {
static const std::string TABLE_NAME = "test_group";
static constexpr int64_t TABLE_DIM = 256;
static constexpr int64_t VECTOR_COUNT = 250000;
static constexpr int64_t INSERT_LOOP = 10000;
engine::meta::TableSchema BuildTableSchema() {
engine::meta::TableSchema table_info;
table_info.dimension_ = TABLE_DIM;
table_info.table_id_ = TABLE_NAME;
table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP;
return table_info;
}
void BuildVectors(int64_t n, std::vector<float> &vectors) {
vectors.clear();
vectors.resize(n * TABLE_DIM);
float *data = vectors.data();
for (int i = 0; i < n; i++) {
for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
data[TABLE_DIM * i] += i / 2000.;
}
}
}
TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) {
std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
engine::meta::TableSchema table_schema = BuildTableSchema();
auto status = impl_->CreateTable(table_schema);
ASSERT_TRUE(status.ok());
engine::meta::TableFileSchema table_file_schema;
table_file_schema.table_id_ = TABLE_NAME;
status = impl_->CreateTableFile(table_file_schema);
ASSERT_TRUE(status.ok());
int64_t n = 100;
std::vector<float> vectors;
BuildVectors(n, vectors);
engine::VectorSource source(n, vectors.data());
size_t num_vectors_added;
engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_,
table_file_schema.location_,
(engine::EngineType) table_file_schema.engine_type_);
status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added);
ASSERT_TRUE(status.ok());
ASSERT_EQ(num_vectors_added, 50);
engine::IDNumbers vector_ids = source.GetVectorIds();
ASSERT_EQ(vector_ids.size(), 50);
status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added);
ASSERT_TRUE(status.ok());
ASSERT_EQ(num_vectors_added, 50);
vector_ids = source.GetVectorIds();
ASSERT_EQ(vector_ids.size(), 100);
status = impl_->DropAll();
ASSERT_TRUE(status.ok());
}
TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) {
std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
auto options = engine::OptionsFactory::Build();
engine::meta::TableSchema table_schema = BuildTableSchema();
auto status = impl_->CreateTable(table_schema);
ASSERT_TRUE(status.ok());
engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options);
int64_t n_100 = 100;
std::vector<float> vectors_100;
BuildVectors(n_100, vectors_100);
engine::VectorSource::Ptr source = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());
status = mem_table_file.Add(source);
ASSERT_TRUE(status.ok());
// std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl;
engine::IDNumbers vector_ids = source->GetVectorIds();
ASSERT_EQ(vector_ids.size(), 100);
size_t singleVectorMem = sizeof(float) * TABLE_DIM;
ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem);
int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
std::vector<float> vectors_128M;
BuildVectors(n_max, vectors_128M);
engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
status = mem_table_file.Add(source_128M);
vector_ids = source_128M->GetVectorIds();
ASSERT_EQ(vector_ids.size(), n_max - n_100);
ASSERT_TRUE(mem_table_file.IsFull());
status = impl_->DropAll();
ASSERT_TRUE(status.ok());
}
TEST_F(NewMemManagerTest, MEM_TABLE_TEST) {
std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
auto options = engine::OptionsFactory::Build();
engine::meta::TableSchema table_schema = BuildTableSchema();
auto status = impl_->CreateTable(table_schema);
ASSERT_TRUE(status.ok());
int64_t n_100 = 100;
std::vector<float> vectors_100;
BuildVectors(n_100, vectors_100);
engine::VectorSource::Ptr source_100 = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());
engine::MemTable mem_table(TABLE_NAME, impl_, options);
status = mem_table.Add(source_100);
ASSERT_TRUE(status.ok());
engine::IDNumbers vector_ids = source_100->GetVectorIds();
ASSERT_EQ(vector_ids.size(), 100);
engine::MemTableFile::Ptr mem_table_file;
mem_table.GetCurrentMemTableFile(mem_table_file);
size_t singleVectorMem = sizeof(float) * TABLE_DIM;
ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
std::vector<float> vectors_128M;
BuildVectors(n_max, vectors_128M);
engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
status = mem_table.Add(source_128M);
ASSERT_TRUE(status.ok());
vector_ids = source_128M->GetVectorIds();
ASSERT_EQ(vector_ids.size(), n_max);
mem_table.GetCurrentMemTableFile(mem_table_file);
ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
ASSERT_EQ(mem_table.GetTableFileCount(), 2);
int64_t n_1G = 1024000;
std::vector<float> vectors_1G;
BuildVectors(n_1G, vectors_1G);
engine::VectorSource::Ptr source_1G = std::make_shared<engine::VectorSource>(n_1G, vectors_1G.data());
status = mem_table.Add(source_1G);
ASSERT_TRUE(status.ok());
vector_ids = source_1G->GetVectorIds();
ASSERT_EQ(vector_ids.size(), n_1G);
int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM);
ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount);
status = mem_table.Serialize();
ASSERT_TRUE(status.ok());
status = impl_->DropAll();
ASSERT_TRUE(status.ok());
}
TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) {
auto options = engine::OptionsFactory::Build();
options.meta.path = "/tmp/milvus_test";
options.meta.backend_uri = "sqlite://:@:/";
auto db_ = engine::DBFactory::Build(options);
engine::meta::TableSchema table_info = BuildTableSchema();
engine::Status stat = db_->CreateTable(table_info);
engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = TABLE_NAME;
stat = db_->DescribeTable(table_info_get);
ASSERT_STATS(stat);
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
std::map<int64_t, std::vector<float>> search_vectors;
{
engine::IDNumbers vector_ids;
int64_t nb = 1024000;
std::vector<float> xb;
BuildVectors(nb, xb);
engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
ASSERT_TRUE(status.ok());
std::this_thread::sleep_for(std::chrono::seconds(3));
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
int64_t num_query = 20;
for (int64_t i = 0; i < num_query; ++i) {
int64_t index = dis(gen);
std::vector<float> search;
for (int64_t j = 0; j < TABLE_DIM; j++) {
search.push_back(xb[index * TABLE_DIM + j]);
}
search_vectors.insert(std::make_pair(vector_ids[index], search));
}
}
int k = 10;
for (auto &pair : search_vectors) {
auto &search = pair.second;
engine::QueryResults results;
stat = db_->Query(TABLE_NAME, k, 1, search.data(), results);
ASSERT_EQ(results[0][0].first, pair.first);
ASSERT_LT(results[0][0].second, 0.00001);
}
delete db_;
boost::filesystem::remove_all(options.meta.path);
}
TEST_F(NewMemManagerTest, INSERT_TEST) {
auto options = engine::OptionsFactory::Build();
options.meta.path = "/tmp/milvus_test";
options.meta.backend_uri = "sqlite://:@:/";
auto db_ = engine::DBFactory::Build(options);
engine::meta::TableSchema table_info = BuildTableSchema();
engine::Status stat = db_->CreateTable(table_info);
engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = TABLE_NAME;
stat = db_->DescribeTable(table_info_get);
ASSERT_STATS(stat);
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
auto start_time = METRICS_NOW_TIME;
int insert_loop = 20;
for (int i = 0; i < insert_loop; ++i) {
int64_t nb = 409600;
std::vector<float> xb;
BuildVectors(nb, xb);
engine::IDNumbers vector_ids;
engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
ASSERT_TRUE(status.ok());
}
auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time;
delete db_;
boost::filesystem::remove_all(options.meta.path);
}
TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) {
auto options = engine::OptionsFactory::Build();
options.meta.path = "/tmp/milvus_test";
options.meta.backend_uri = "sqlite://:@:/";
auto db_ = engine::DBFactory::Build(options);
engine::meta::TableSchema table_info = BuildTableSchema();
engine::Status stat = db_->CreateTable(table_info);
engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = TABLE_NAME;
stat = db_->DescribeTable(table_info_get);
ASSERT_STATS(stat);
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
engine::IDNumbers vector_ids;
engine::IDNumbers target_ids;
int64_t nb = 409600;
std::vector<float> xb;
BuildVectors(nb, xb);
int64_t qb = 5;
std::vector<float> qxb;
BuildVectors(qb, qxb);
std::thread search([&]() {
engine::QueryResults results;
int k = 10;
std::this_thread::sleep_for(std::chrono::seconds(2));
INIT_TIMER;
std::stringstream ss;
uint64_t count = 0;
uint64_t prev_count = 0;
for (auto j = 0; j < 10; ++j) {
ss.str("");
db_->Size(count);
prev_count = count;
START_TIMER;
stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results);
ss << "Search " << j << " With Size " << count / engine::meta::M << " M";
STOP_TIMER(ss.str());
ASSERT_STATS(stat);
for (auto k = 0; k < qb; ++k) {
ASSERT_EQ(results[k][0].first, target_ids[k]);
ss.str("");
ss << "Result [" << k << "]:";
for (auto result : results[k]) {
ss << result.first << " ";
}
/* LOG(DEBUG) << ss.str(); */
}
ASSERT_TRUE(count >= prev_count);
std::this_thread::sleep_for(std::chrono::seconds(1));
}
});
int loop = 20;
for (auto i = 0; i < loop; ++i) {
if (i == 0) {
db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
ASSERT_EQ(target_ids.size(), qb);
} else {
db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
}
std::this_thread::sleep_for(std::chrono::microseconds(1));
}
search.join();
delete db_;
boost::filesystem::remove_all(options.meta.path);
};
......@@ -3,17 +3,19 @@
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include <thread>
#include <easylogging++.h>
#include <boost/filesystem.hpp>
#include "utils.h"
#include "db/DB.h"
#include "db/DBImpl.h"
#include "db/MetaConsts.h"
#include "db/Factories.h"
#include <gtest/gtest.h>
#include <easylogging++.h>
#include <boost/filesystem.hpp>
#include <thread>
#include <random>
using namespace zilliz::milvus;
namespace {
......
......@@ -6,7 +6,7 @@
#include <gtest/gtest.h>
#include "db/scheduler/task/SearchTask.h"
#include <cmath>
#include <vector>
using namespace zilliz::milvus;
......@@ -159,4 +159,4 @@ TEST(DBSearchTest, MERGE_TEST) {
ASSERT_EQ(target.size(), src_count + target_count);
CheckResult(src_result[0], target_result[0], target);
}
}
\ No newline at end of file
}
......@@ -106,6 +106,18 @@ zilliz::milvus::engine::Options MySQLDBTest::GetOptions() {
return options;
}
void NewMemManagerTest::InitLog() {
el::Configurations defaultConf;
defaultConf.setToDefault();
defaultConf.set(el::Level::Debug,
el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)");
el::Loggers::reconfigureLogger("default", defaultConf);
}
void NewMemManagerTest::SetUp() {
InitLog();
}
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
if (argc > 1) {
......
......@@ -30,7 +30,7 @@
#define STOP_TIMER(name)
#endif
void ASSERT_STATS(zilliz::milvus::engine::Status& stat);
void ASSERT_STATS(zilliz::milvus::engine::Status &stat);
//class TestEnv : public ::testing::Environment {
//public:
......@@ -54,8 +54,8 @@ void ASSERT_STATS(zilliz::milvus::engine::Status& stat);
// ::testing::AddGlobalTestEnvironment(new TestEnv);
class DBTest : public ::testing::Test {
protected:
zilliz::milvus::engine::DB* db_;
protected:
zilliz::milvus::engine::DB *db_;
void InitLog();
virtual void SetUp() override;
......@@ -64,13 +64,13 @@ protected:
};
class DBTest2 : public DBTest {
protected:
protected:
virtual zilliz::milvus::engine::Options GetOptions() override;
};
class MetaTest : public DBTest {
protected:
protected:
std::shared_ptr<zilliz::milvus::engine::meta::DBMetaImpl> impl_;
virtual void SetUp() override;
......@@ -78,12 +78,17 @@ protected:
};
class MySQLTest : public ::testing::Test {
protected:
protected:
// std::shared_ptr<zilliz::milvus::engine::meta::MySQLMetaImpl> impl_;
zilliz::milvus::engine::DBMetaOptions getDBMetaOptions();
};
class MySQLDBTest : public ::testing::Test {
protected:
class MySQLDBTest : public ::testing::Test {
protected:
zilliz::milvus::engine::Options GetOptions();
};
class NewMemManagerTest : public ::testing::Test {
void InitLog();
void SetUp() override;
};
......@@ -22,13 +22,13 @@ add_executable(wrapper_test ${wrapper_test_src})
set(wrapper_libs
stdc++
boost_system
boost_filesystem
boost_system_static
boost_filesystem_static
libgpufaiss.a
faiss
cudart
cublas
sqlite3
sqlite
snappy
bz2
z
......
......@@ -4,12 +4,15 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include "wrapper/Operand.h"
#include "wrapper/Index.h"
#include "wrapper/IndexBuilder.h"
#include <gtest/gtest.h>
#include <random>
using namespace zilliz::milvus::engine;
......
......@@ -33,11 +33,11 @@ set(db_libs
nvidia-ml
cudart
cublas
boost_system
boost_filesystem
boost_system_static
boost_filesystem_static
lz4
crypto
boost_serialization
boost_serialization_static
)
target_link_libraries(license_test ${db_libs} ${unittest_libs})
......@@ -66,9 +66,9 @@ target_link_libraries(metrics_test
faiss
cudart
cublas
sqlite3
boost_system
boost_filesystem
sqlite
boost_system_static
boost_filesystem_static
lz4
metrics
gtest
......
......@@ -37,9 +37,9 @@ set(require_libs
faiss
cudart
cublas
sqlite3
boost_system
boost_filesystem
sqlite
boost_system_static
boost_filesystem_static
snappy
z
bz2
......
......@@ -25,7 +25,7 @@ set(s3_client_libs
stdc++
aws-cpp-sdk-s3
aws-cpp-sdk-core
boost_filesystem
boost_filesystem_static
)
target_link_libraries(s3_test
${s3_client_libs}
......
#-------------------------------------------------------------------------------
# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited.
# Proprietary and confidential.
#-------------------------------------------------------------------------------
# Make sure that your call to link_directories takes place before your call to the relevant add_executable.
include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
set(validation_util_src
${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp
${MILVUS_ENGINE_SRC}/utils/ValidationUtil.h)
set(validation_util_test_src
${unittest_srcs}
${validation_util_src}
${require_files}
ValidationUtilTest.cpp
)
add_executable(valication_util_test
${validation_util_test_src}
${config_files})
target_link_libraries(valication_util_test
${unittest_libs}
boost_filesystem_static)
install(TARGETS valication_util_test DESTINATION bin)
\ No newline at end of file
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include "utils/ValidationUtil.h"
#include "utils/Error.h"
#include <string>
using namespace zilliz::milvus::server;
TEST(ValidationUtilTest, TableNameTest) {
std::string table_name = "Normal123_";
ServerError res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_SUCCESS);
table_name = "12sds";
res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME);
table_name = "";
res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME);
table_name = "_asdasd";
res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_SUCCESS);
table_name = "!@#!@";
res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME);
table_name = "中文";
res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME);
table_name = std::string('a', 32768);
res = ValidateTableName(table_name);
ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME);
}
TEST(ValidationUtilTest, TableDimensionTest) {
ASSERT_EQ(ValidateTableDimension(-1), SERVER_INVALID_VECTOR_DIMENSION);
ASSERT_EQ(ValidateTableDimension(0), SERVER_INVALID_VECTOR_DIMENSION);
ASSERT_EQ(ValidateTableDimension(16385), SERVER_INVALID_VECTOR_DIMENSION);
ASSERT_EQ(ValidateTableDimension(16384), SERVER_SUCCESS);
ASSERT_EQ(ValidateTableDimension(1), SERVER_SUCCESS);
}
TEST(ValidationUtilTest, TableIndexTypeTest) {
ASSERT_EQ(ValidateTableIndexType(0), SERVER_INVALID_INDEX_TYPE);
ASSERT_EQ(ValidateTableIndexType(1), SERVER_SUCCESS);
ASSERT_EQ(ValidateTableIndexType(2), SERVER_SUCCESS);
ASSERT_EQ(ValidateTableIndexType(3), SERVER_INVALID_INDEX_TYPE);
ASSERT_EQ(ValidateTableIndexType(4), SERVER_INVALID_INDEX_TYPE);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册