提交 5967ffcb 编写于 作者: Q quicksilver

Merge remote-tracking branch 'upstream/branch-0.3.1' into branch-0.3.1


Former-commit-id: 7df6fe07393d1db796ab0c1bdc2559f2712447d1
......@@ -17,3 +17,4 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-1 - Add CHANGELOG.md
- MS-161 - Add CI / CD Module to Milvus Project
- MS-202 - Add Milvus Jenkins project email notification
- MS-215 - Add Milvus cluster CI/CD groovy file
container('milvus-build-env') {
timeout(time: 20, unit: 'MINUTES') {
timeout(time: 30, unit: 'MINUTES') {
gitlabCommitStatus(name: 'Build Engine') {
dir ("milvus_engine") {
try {
......@@ -17,4 +17,3 @@ container('milvus-build-env') {
}
}
}
container('milvus-build-env') {
timeout(time: 20, unit: 'MINUTES') {
timeout(time: 30, unit: 'MINUTES') {
gitlabCommitStatus(name: 'Build Engine') {
dir ("milvus_engine") {
try {
......@@ -17,4 +17,3 @@ container('milvus-build-env') {
}
}
}
......@@ -35,7 +35,7 @@ pipeline {
defaultContainer 'jnlp'
containerTemplate {
name 'milvus-build-env'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.10'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.11'
ttyEnabled true
command 'cat'
}
......
......@@ -35,7 +35,7 @@ pipeline {
defaultContainer 'jnlp'
containerTemplate {
name 'milvus-build-env'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.10'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.11'
ttyEnabled true
command 'cat'
}
......
......@@ -35,7 +35,7 @@ pipeline {
defaultContainer 'jnlp'
containerTemplate {
name 'milvus-build-env'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.10'
image 'registry.zilliz.com/milvus/milvus-build-env:v0.11'
ttyEnabled true
command 'cat'
}
......
......@@ -9,9 +9,17 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-148 - Disable cleanup if mode is read only
- MS-149 - Fixed searching only one index file issue in distributed mode
- MS-153 - fix c_str error when connecting to MySQL
- MS-157 - fix changelog
- MS-190 - use env variable to switch mem manager and fix cmake
- MS-153 - Fix c_str error when connecting to MySQL
- MS-157 - Fix changelog
- MS-190 - Use env variable to switch mem manager and fix cmake
- MS-217 - Fix SQ8 row count bug
- MS-224 - Return AlreadyExist status in MySQLMetaImpl::CreateTable if table already exists
- MS-232 - Add MySQLMetaImpl::UpdateTableFilesToIndex and set maximum_memory to default if config value = 0
- MS-233 - Remove mem manager log
- MS-230 - Change parameter name: Maximum_memory to insert_buffer_size
- MS-234 - Some case cause background merge thread stop
- MS-235 - Some test cases random fail
- MS-236 - Add MySQLMetaImpl::HasNonIndexFiles
## Improvement
- MS-156 - Add unittest for merge result functions
......@@ -20,12 +28,13 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-206 - Support SQ8 index type
- MS-208 - Add buildinde interface for C++ SDK
- MS-212 - Support Inner product metric type
## New Feature
- MS-195 - Add nlist and use_blas_threshold conf
- MS-241 - Build Faiss with MKL if using Intel CPU; else build with OpenBlas
- MS-242 - Clean up cmake and change MAKE_BUILD_ARGS to be user defined variable
- MS-245 - Improve search result transfer performance
## New Feature
- MS-180 - Add new mem manager
- MS-195 - Add nlist and use_blas_threshold conf
## Task
......
......@@ -116,6 +116,11 @@ set(MILVUS_ENGINE_SRC ${PROJECT_SOURCE_DIR}/src)
add_compile_definitions(PROFILER=${PROFILER})
message("MILVUS_ENABLE_PROFILING = ${MILVUS_ENABLE_PROFILING}")
if (MILVUS_ENABLE_PROFILING STREQUAL "ON")
ADD_DEFINITIONS(-DMILVUS_ENABLE_PROFILING)
endif()
include_directories(${MILVUS_ENGINE_INCLUDE})
include_directories(${MILVUS_ENGINE_SRC})
......
### Compilation
#### Step 1: install necessery tools
Install MySQL
centos7 :
yum install gfortran qt4 flex bison mysql-devel
yum install gfortran qt4 flex bison mysql-devel mysql
ubuntu16.04 :
sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev
sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client
cd scripts && sudo ./requirements.sh
If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link:
......
......@@ -7,8 +7,9 @@ INSTALL_PREFIX=$(pwd)/milvus
MAKE_CLEAN="OFF"
BUILD_COVERAGE="OFF"
DB_PATH="/opt/milvus"
PROFILING="OFF"
while getopts "p:d:t:uhlrc" arg
while getopts "p:d:t:uhlrcg" arg
do
case $arg in
t)
......@@ -36,6 +37,9 @@ do
c)
BUILD_COVERAGE="ON"
;;
g)
PROFILING="ON"
;;
h) # help
echo "
......@@ -47,6 +51,7 @@ parameter:
-l: build license version(default: OFF)
-r: remove previous build directory(default: OFF)
-c: code coverage(default: OFF)
-g: profiling(default: OFF)
usage:
./build.sh -t \${BUILD_TYPE} [-u] [-h] [-g] [-r] [-c]
......@@ -77,6 +82,7 @@ if [[ ${MAKE_CLEAN} == "ON" ]]; then
-DCMAKE_LICENSE_CHECK=${LICENSE_CHECK} \
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
-DMILVUS_DB_PATH=${DB_PATH} \
-DMILVUS_ENABLE_PROFILING=${PROFILING} \
$@ ../"
echo ${CMAKE_CMD}
......
......@@ -57,8 +57,6 @@ define_option(MILVUS_VERBOSE_THIRDPARTY_BUILD
define_option(MILVUS_WITH_ARROW "Build with ARROW" OFF)
define_option(MILVUS_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF)
define_option(MILVUS_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
Note that this requires linking Boost statically" ON)
......@@ -111,6 +109,11 @@ define_option(MILVUS_WITH_ZSTD "Build with zstd compression" ${MILVUS_WITH_ZSTD_
define_option(MILVUS_WITH_AWS "Build with AWS SDK" ON)
if (MILVUS_ENABLE_PROFILING STREQUAL "ON")
define_option(MILVUS_WITH_LIBUNWIND "Build with libunwind" ON)
define_option(MILVUS_WITH_GPERFTOOLS "Build with gperftools" ON)
endif()
#----------------------------------------------------------------------
if(MSVC)
set_option_category("MSVC")
......
此差异已折叠。
......@@ -14,10 +14,10 @@ db_config:
db_backend_url: sqlite://:@:/
index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB
archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB
archive_days_threshold: 30 # files older than x days will be archived, unit: day
maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB.
# the sum of maximum_memory and cpu_cache_capacity should be less than total memory
archive_disk_threshold: 0 # triger archive action if storage size exceed this value, 0 means no limit, unit: GB
archive_days_threshold: 0 # files older than x days will be archived, 0 means no limit, unit: day
insert_buffer_size: 4 # maximum insert buffer size allowed, default: 4, unit: GB, should be at least 1 GB.
# the sum of insert_buffer_size and cpu_cache_capacity should be less than total memory
metric_config:
is_startup: off # if monitoring start: on, off
......@@ -39,4 +39,4 @@ engine_config:
nprobe: 10
nlist: 16384
use_blas_threshold: 20
metric_type: L2 #L2 or Inner Product
metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP
#!/usr/bin/env bash
wget -P /tmp https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
apt-key add /tmp/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list'
apt -y update && apt-get -y install intel-mkl-gnu-2019.4-243 intel-mkl-core-2019.4-243
#sh -c 'echo export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2019.4.243/linux/mkl/lib/intel64:\$LD_LIBRARY_PATH > /etc/profile.d/mkl.sh'
#source /etc/profile
......@@ -48,7 +48,6 @@ set(engine_files
${db_files}
${db_scheduler_files}
${wrapper_files}
# metrics/Metrics.cpp
${metrics_files}
)
......@@ -64,6 +63,10 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
include_directories(thrift/gen-cpp)
include_directories(/usr/include/mysql)
if (MILVUS_ENABLE_PROFILING STREQUAL "ON")
SET(PROFILER_LIB profiler)
endif()
set(third_party_libs
easyloggingpp
sqlite
......@@ -71,8 +74,6 @@ set(third_party_libs
yaml-cpp
libgpufaiss.a
faiss
lapack
openblas
prometheus-cpp-push
prometheus-cpp-pull
prometheus-cpp-core
......@@ -85,11 +86,22 @@ set(third_party_libs
zlib
zstd
mysqlpp
${PROFILER_LIB}
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
)
if (MEGASEARCH_WITH_ARROW STREQUAL "ON")
set(third_party_libs ${third_party_libs} arrow)
endif()
endif()
if(${BUILD_FAISS_WITH_MKL} STREQUAL "true")
set(third_party_libs ${third_party_libs}
${MKL_LIBS}
${MKL_LIBS})
else()
set(third_party_libs ${third_party_libs}
lapack
openblas)
endif()
if (GPU_VERSION STREQUAL "ON")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
......@@ -187,6 +199,6 @@ install(FILES
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3
${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4
DESTINATION lib) #need to copy libmysqlpp.so
DESTINATION lib)
#add_subdirectory(sdk)
add_subdirectory(sdk)
......@@ -20,6 +20,11 @@ public:
: index_(index)
{}
DataObj(const engine::Index_ptr& index, int64_t size)
: index_(index),
size_(size)
{}
engine::Index_ptr data() { return index_; }
const engine::Index_ptr& data() const { return index_; }
......@@ -28,11 +33,16 @@ public:
return 0;
}
if(size_ > 0) {
return size_;
}
return index_->ntotal*(index_->dim*4);
}
private:
engine::Index_ptr index_ = nullptr;
int64_t size_ = 0;
};
using DataObjPtr = std::shared_ptr<DataObj>;
......
......@@ -89,6 +89,7 @@ DBImpl::DBImpl(const Options& options)
meta_ptr_ = DBMetaImplFactory::Build(options.meta, options.mode);
mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_);
if (options.mode != Options::MODE::READ_ONLY) {
ENGINE_LOG_INFO << "StartTimerTasks";
StartTimerTasks();
}
}
......@@ -169,7 +170,10 @@ Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq,
}
}
return QueryAsync(table_id, file_id_array, k, nq, vectors, dates, results);
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info before query
status = QueryAsync(table_id, file_id_array, k, nq, vectors, dates, results);
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info after query
return status;
}
Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_ids,
......@@ -194,7 +198,10 @@ Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>
return Status::Error("Invalid file id");
}
return QueryAsync(table_id, files_array, k, nq, vectors, dates, results);
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info before query
status = QueryAsync(table_id, files_array, k, nq, vectors, dates, results);
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info after query
return status;
}
Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files,
......@@ -229,7 +236,6 @@ void DBImpl::BackgroundTimerTask() {
Status status;
server::SystemInfo::GetInstance().Init();
while (true) {
if (!bg_error_.ok()) break;
if (shutting_down_.load(std::memory_order_acquire)){
for(auto& iter : compact_thread_results_) {
iter.wait();
......@@ -384,15 +390,11 @@ Status DBImpl::BackgroundMergeFiles(const std::string& table_id) {
}
void DBImpl::BackgroundCompaction(std::set<std::string> table_ids) {
// static int b_count = 0;
// b_count++;
// std::cout << "BackgroundCompaction: " << b_count << std::endl;
Status status;
for (auto& table_id : table_ids) {
status = BackgroundMergeFiles(table_id);
if (!status.ok()) {
bg_error_ = status;
ENGINE_LOG_ERROR << "Merge files for table " << table_id << " failed: " << status.ToString();
return;
}
}
......@@ -402,7 +404,6 @@ void DBImpl::BackgroundCompaction(std::set<std::string> table_ids) {
int ttl = 1;
if (options_.mode == Options::MODE::CLUSTER) {
ttl = meta::D_SEC;
// ENGINE_LOG_DEBUG << "Server mode is cluster. Clean up files with ttl = " << std::to_string(ttl) << "seconds.";
}
meta_ptr_->CleanUpFilesWithTTL(ttl);
}
......@@ -486,7 +487,7 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
//step 6: update meta
table_file.file_type_ = meta::TableFileSchema::INDEX;
table_file.size_ = index->PhysicalSize();
table_file.size_ = index->Size();
auto to_remove = file;
to_remove.file_type_ = meta::TableFileSchema::TO_DELETE;
......@@ -535,10 +536,9 @@ void DBImpl::BackgroundBuildIndex() {
meta_ptr_->FilesToIndex(to_index_files);
Status status;
for (auto& file : to_index_files) {
/* ENGINE_LOG_DEBUG << "Buiding index for " << file.location; */
status = BuildIndex(file);
if (!status.ok()) {
bg_error_ = status;
ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString();
return;
}
......@@ -546,7 +546,6 @@ void DBImpl::BackgroundBuildIndex() {
break;
}
}
/* ENGINE_LOG_DEBUG << "All Buiding index Done"; */
}
Status DBImpl::DropAll() {
......
......@@ -118,10 +118,8 @@ class DBImpl : public DB {
BuildIndex(const meta::TableFileSchema &);
private:
const Options options_;
Status bg_error_;
std::atomic<bool> shutting_down_;
std::thread bg_timer_thread_;
......
......@@ -291,6 +291,8 @@ Status DBMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) {
try {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_),
where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW
or
c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW
or
c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX)
and c(&TableFileSchema::table_id_) == table_id
......@@ -674,16 +676,22 @@ Status DBMetaImpl::Archive() {
Status DBMetaImpl::Size(uint64_t &result) {
result = 0;
try {
auto selected = ConnectorPtr->select(columns(sum(&TableFileSchema::size_)),
where(
c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE
));
auto files = ConnectorPtr->select(columns(&TableFileSchema::size_,
&TableFileSchema::file_type_,
&TableFileSchema::engine_type_),
where(
c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE
));
for (auto &sub_query : selected) {
if (!std::get<0>(sub_query)) {
continue;
for (auto &file : files) {
auto file_size = std::get<0>(file);
auto file_type = std::get<1>(file);
auto engine_type = std::get<2>(file);
if(file_type == (int)TableFileSchema::INDEX && engine_type == (int)EngineType::FAISS_IVFSQ8) {
result += (uint64_t)file_size/4;//hardcode for sq8
} else {
result += (uint64_t)file_size;
}
result += (uint64_t) (*std::get<0>(sub_query));
}
} catch (std::exception &e) {
return HandleException("Encounter exception when calculte db size", e);
......
......@@ -19,6 +19,7 @@ enum class EngineType {
FAISS_IDMAP = 1,
FAISS_IVFFLAT,
FAISS_IVFSQ8,
MAX_VALUE = FAISS_IVFSQ8,
};
class ExecutionEngine {
......
......@@ -110,7 +110,7 @@ Status FaissExecutionEngine::Merge(const std::string& location) {
if (location == location_) {
return Status::Error("Cannot Merge Self");
}
ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_;
ENGINE_LOG_DEBUG << "Merge raw file: " << location << " to: " << location_;
auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
if (!to_merge) {
......@@ -165,8 +165,9 @@ Status FaissExecutionEngine::Search(long n,
}
Status FaissExecutionEngine::Cache() {
zilliz::milvus::cache::CpuCacheMgr::GetInstance(
)->InsertItem(location_, std::make_shared<Index>(pIndex_));
auto index = std::make_shared<Index>(pIndex_);
cache::DataObjPtr data_obj = std::make_shared<cache::DataObj>(index, PhysicalSize());
zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, data_obj);
return Status::OK();
}
......
......@@ -125,9 +125,6 @@ Status MemManager::InsertVectors(const std::string &table_id_,
const float *vectors_,
IDNumbers &vector_ids_) {
LOG(DEBUG) << "MemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() <<
", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem();
std::unique_lock<std::mutex> lock(mutex_);
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
......
此差异已折叠。
......@@ -25,13 +25,10 @@ Status NewMemManager::InsertVectors(const std::string &table_id_,
const float *vectors_,
IDNumbers &vector_ids_) {
while (GetCurrentMem() > options_.maximum_memory) {
while (GetCurrentMem() > options_.insert_buffer_size) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
LOG(DEBUG) << "NewMemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() <<
", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem();
std::unique_lock<std::mutex> lock(mutex_);
return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_);
......
......@@ -41,6 +41,10 @@ void ArchiveConf::ParseCritirias(const std::string& criterias) {
}
for (auto& token : tokens) {
if(token.empty()) {
continue;
}
std::vector<std::string> kv;
boost::algorithm::split(kv, token, boost::is_any_of(":"));
if (kv.size() != 2) {
......
......@@ -22,7 +22,7 @@ static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB;
static const std::string ARCHIVE_CONF_DISK = "disk";
static const std::string ARCHIVE_CONF_DAYS = "days";
static const std::string ARCHIVE_CONF_DEFAULT = ARCHIVE_CONF_DISK + ":512";
static const std::string ARCHIVE_CONF_DEFAULT = "";
struct ArchiveConf {
using CriteriaT = std::map<std::string, int>;
......@@ -63,7 +63,7 @@ struct Options {
size_t index_trigger_size = ONE_GB; //unit: byte
DBMetaOptions meta;
int mode = MODE::SINGLE;
float maximum_memory = 4 * ONE_GB;
size_t insert_buffer_size = 4 * ONE_GB;
}; // Options
......
......@@ -107,7 +107,7 @@ Status SearchTask::ClusterResult(const std::vector<long> &output_ids,
uint64_t nq,
uint64_t topk,
SearchContext::ResultSet &result_set) {
if(output_ids.size() != nq*topk || output_distence.size() != nq*topk) {
if(output_ids.size() < nq*topk || output_distence.size() < nq*topk) {
std::string msg = "Invalid id array size: " + std::to_string(output_ids.size()) +
" distance array size: " + std::to_string(output_distence.size());
SERVER_LOG_ERROR << msg;
......
......@@ -23,6 +23,7 @@ namespace {
static constexpr int64_t TOP_K = 10;
static constexpr int64_t SEARCH_TARGET = 5000; //change this value, result is different
static constexpr int64_t ADD_VECTOR_LOOP = 10;
static constexpr int64_t SECONDS_EACH_HOUR = 3600;
#define BLOCK_SPLITER std::cout << "===========================================" << std::endl;
......@@ -59,7 +60,7 @@ namespace {
std::string CurrentTime() {
time_t tt;
time( &tt );
tt = tt + 8*3600;
tt = tt + 8*SECONDS_EACH_HOUR;
tm* t= gmtime( &tt );
std::string str = std::to_string(t->tm_year + 1900) + "_" + std::to_string(t->tm_mon + 1)
......@@ -69,10 +70,11 @@ namespace {
return str;
}
std::string CurrentTmDate() {
std::string CurrentTmDate(int64_t offset_day = 0) {
time_t tt;
time( &tt );
tt = tt + 8*3600;
tt = tt + 8*SECONDS_EACH_HOUR;
tt = tt + 24*SECONDS_EACH_HOUR*offset_day;
tm* t= gmtime( &tt );
std::string str = std::to_string(t->tm_year + 1900) + "-" + std::to_string(t->tm_mon + 1)
......@@ -148,7 +150,7 @@ namespace {
std::cout << "The top 1 result is wrong: " << result_id
<< " vs. " << search_id << std::endl;
} else {
std::cout << "Check result sucessfully" << std::endl;
std::cout << "No." << index-1 << " Check result successfully" << std::endl;
}
}
BLOCK_SPLITER
......@@ -160,7 +162,7 @@ namespace {
std::vector<Range> query_range_array;
Range rg;
rg.start_value = CurrentTmDate();
rg.end_value = CurrentTmDate();
rg.end_value = CurrentTmDate(1);
query_range_array.emplace_back(rg);
std::vector<RowRecord> record_array;
......@@ -234,6 +236,7 @@ ClientTest::Test(const std::string& address, const std::string& port) {
std::vector<std::pair<int64_t, RowRecord>> search_record_array;
{//add vectors
for (int i = 0; i < ADD_VECTOR_LOOP; i++) {//add vectors
TimeRecorder recorder("Add vector No." + std::to_string(i));
std::vector<RowRecord> record_array;
int64_t begin_index = i * BATCH_ROW_COUNT;
BuildVectors(begin_index, begin_index + BATCH_ROW_COUNT, record_array);
......@@ -255,6 +258,7 @@ ClientTest::Test(const std::string& address, const std::string& port) {
}
{//wait unit build index finish
TimeRecorder recorder("Build index");
std::cout << "Wait until build all index done" << std::endl;
Status stat = conn->BuildIndex(TABLE_NAME);
std::cout << "BuildIndex function call status: " << stat.ToString() << std::endl;
......
......@@ -209,17 +209,25 @@ ClientProxy::SearchVector(const std::string &table_name,
}
//step 3: search vectors
std::vector<thrift::TopKQueryResult> result_array;
ClientPtr()->interface()->SearchVector(result_array, table_name, thrift_records, thrift_ranges, topk);
std::vector<thrift::TopKQueryBinResult> result_array;
ClientPtr()->interface()->SearchVector2(result_array, table_name, thrift_records, thrift_ranges, topk);
//step 4: convert result array
for(auto& thrift_topk_result : result_array) {
TopKQueryResult result;
for(auto& thrift_query_result : thrift_topk_result.query_result_arrays) {
size_t id_count = thrift_topk_result.id_array.size()/sizeof(int64_t);
size_t dist_count = thrift_topk_result.distance_array.size()/ sizeof(double);
if(id_count != dist_count) {
return Status(StatusCode::UnknownError, "illegal result");
}
int64_t* id_ptr = (int64_t*)thrift_topk_result.id_array.data();
double* dist_ptr = (double*)thrift_topk_result.distance_array.data();
for(size_t i = 0; i < id_count; i++) {
QueryResult query_result;
query_result.id = thrift_query_result.id;
query_result.distance = thrift_query_result.distance;
query_result.id = id_ptr[i];
query_result.distance = dist_ptr[i];
result.query_result_arrays.emplace_back(query_result);
}
......
......@@ -28,12 +28,12 @@ DBWrapper::DBWrapper() {
if(index_size > 0) {//ensure larger than zero, unit is MB
opt.index_trigger_size = (size_t)index_size * engine::ONE_MB;
}
float maximum_memory = config.GetFloatValue(CONFIG_MAXMIMUM_MEMORY);
if (maximum_memory > 1.0) {
opt.maximum_memory = maximum_memory * engine::ONE_GB;
int64_t insert_buffer_size = config.GetInt64Value(CONFIG_DB_INSERT_BUFFER_SIZE, 4);
if (insert_buffer_size >= 1) {
opt.insert_buffer_size = insert_buffer_size * engine::ONE_GB;
}
else {
std::cout << "ERROR: maximum_memory should be at least 1 GB" << std::endl;
std::cout << "ERROR: insert_buffer_size should be at least 1 GB" << std::endl;
kill(0, SIGUSR1);
}
......
......@@ -76,7 +76,7 @@ MilvusServer::StartService() {
return;
}
stdcxx::shared_ptr<ThreadManager> threadManager(ThreadManager::newSimpleThreadManager());
stdcxx::shared_ptr<ThreadManager> threadManager(ThreadManager::newSimpleThreadManager(16));
stdcxx::shared_ptr<PosixThreadFactory> threadFactory(new PosixThreadFactory());
threadManager->threadFactory(threadFactory);
threadManager->start();
......
......@@ -60,11 +60,22 @@ RequestHandler::SearchVector(std::vector<thrift::TopKQueryResult> &_return,
const std::vector<thrift::Range> &query_range_array,
const int64_t topk) {
// SERVER_LOG_DEBUG << "Entering RequestHandler::SearchVector";
BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, std::vector<std::string>(), query_record_array,
BaseTaskPtr task_ptr = SearchVectorTask1::Create(table_name, std::vector<std::string>(), query_record_array,
query_range_array, topk, _return);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::SearchVector2(std::vector<thrift::TopKQueryBinResult> & _return,
const std::string& table_name,
const std::vector<thrift::RowRecord> & query_record_array,
const std::vector<thrift::Range> & query_range_array,
const int64_t topk) {
BaseTaskPtr task_ptr = SearchVectorTask2::Create(table_name, std::vector<std::string>(), query_record_array,
query_range_array, topk, _return);
RequestScheduler::ExecTask(task_ptr);
}
void
RequestHandler::SearchVectorInFiles(std::vector<::milvus::thrift::TopKQueryResult> &_return,
const std::string& table_name,
......@@ -73,7 +84,7 @@ RequestHandler::SearchVectorInFiles(std::vector<::milvus::thrift::TopKQueryResul
const std::vector<::milvus::thrift::Range> &query_range_array,
const int64_t topk) {
// SERVER_LOG_DEBUG << "Entering RequestHandler::SearchVectorInFiles. file_id_array size = " << std::to_string(file_id_array.size());
BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, file_id_array, query_record_array,
BaseTaskPtr task_ptr = SearchVectorTask1::Create(table_name, file_id_array, query_record_array,
query_range_array, topk, _return);
RequestScheduler::ExecTask(task_ptr);
}
......
......@@ -106,6 +106,29 @@ public:
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t topk);
/**
* @brief Query vector
*
* This method is used to query vector in table.
*
* @param table_name, table_name is queried.
* @param query_record_array, all vector are going to be queried.
* @param query_range_array, optional ranges for conditional search. If not specified, search whole table
* @param topk, how many similarity vectors will be searched.
*
* @return query binary result array.
*
* @param table_name
* @param query_record_array
* @param query_range_array
* @param topk
*/
void SearchVector2(std::vector<::milvus::thrift::TopKQueryBinResult> & _return,
const std::string& table_name,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t topk);
/**
* @brief Internal use query interface
*
......
此差异已折叠。
......@@ -129,7 +129,28 @@ private:
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class SearchVectorTask : public BaseTask {
class SearchVectorTaskBase : public BaseTask {
protected:
SearchVectorTaskBase(const std::string& table_name,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k);
ServerError OnExecute() override;
virtual ServerError ConstructResult(engine::QueryResults& results) = 0;
protected:
std::string table_name_;
std::vector<std::string> file_id_array_;
int64_t top_k_;
const std::vector<::milvus::thrift::RowRecord>& record_array_;
const std::vector<::milvus::thrift::Range>& range_array_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class SearchVectorTask1 : public SearchVectorTaskBase {
public:
static BaseTaskPtr Create(const std::string& table_name,
const std::vector<std::string>& file_id_array,
......@@ -139,24 +160,43 @@ public:
std::vector<::milvus::thrift::TopKQueryResult>& result_array);
protected:
SearchVectorTask(const std::string& table_name,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k,
SearchVectorTask1(const std::string& table_name,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k,
std::vector<::milvus::thrift::TopKQueryResult>& result_array);
ServerError OnExecute() override;
ServerError ConstructResult(engine::QueryResults& results) override;
private:
std::string table_name_;
std::vector<std::string> file_id_array_;
int64_t top_k_;
const std::vector<::milvus::thrift::RowRecord>& record_array_;
const std::vector<::milvus::thrift::Range>& range_array_;
std::vector<::milvus::thrift::TopKQueryResult>& result_array_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class SearchVectorTask2 : public SearchVectorTaskBase {
public:
static BaseTaskPtr Create(const std::string& table_name,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k,
std::vector<::milvus::thrift::TopKQueryBinResult>& result_array);
protected:
SearchVectorTask2(const std::string& table_name,
const std::vector<std::string>& file_id_array,
const std::vector<::milvus::thrift::RowRecord> & query_record_array,
const std::vector<::milvus::thrift::Range> & query_range_array,
const int64_t top_k,
std::vector<::milvus::thrift::TopKQueryBinResult>& result_array);
ServerError ConstructResult(engine::QueryResults& results) override;
private:
std::vector<::milvus::thrift::TopKQueryBinResult>& result_array_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class GetTableRowCountTask : public BaseTask {
public:
......
......@@ -27,7 +27,7 @@ static const std::string CONFIG_DB_SLAVE_PATH = "db_slave_path";
static const std::string CONFIG_DB_INDEX_TRIGGER_SIZE = "index_building_threshold";
static const std::string CONFIG_DB_ARCHIVE_DISK = "archive_disk_threshold";
static const std::string CONFIG_DB_ARCHIVE_DAYS = "archive_days_threshold";
static const std::string CONFIG_MAXMIMUM_MEMORY = "maximum_memory";
static const std::string CONFIG_DB_INSERT_BUFFER_SIZE = "insert_buffer_size";
static const std::string CONFIG_LOG = "log_config";
......
......@@ -120,6 +120,28 @@ class MilvusServiceHandler : virtual public MilvusServiceIf {
printf("SearchVector\n");
}
/**
* @brief Query vector
*
* This method is used to query vector in table.
*
* @param table_name, table_name is queried.
* @param query_record_array, all vector are going to be queried.
* @param query_range_array, optional ranges for conditional search. If not specified, search whole table
* @param topk, how many similarity vectors will be searched.
*
* @return query binary result array.
*
* @param table_name
* @param query_record_array
* @param query_range_array
* @param topk
*/
void SearchVector2(std::vector<TopKQueryBinResult> & _return, const std::string& table_name, const std::vector<RowRecord> & query_record_array, const std::vector<Range> & query_range_array, const int64_t topk) {
// Your implementation goes here
printf("SearchVector2\n");
}
/**
* @brief Internal use query interface
*
......
......@@ -63,6 +63,8 @@ class QueryResult;
class TopKQueryResult;
class TopKQueryBinResult;
typedef struct _Exception__isset {
_Exception__isset() : code(false), reason(false) {}
bool code :1;
......@@ -346,6 +348,47 @@ void swap(TopKQueryResult &a, TopKQueryResult &b);
std::ostream& operator<<(std::ostream& out, const TopKQueryResult& obj);
class TopKQueryBinResult : public virtual ::apache::thrift::TBase {
public:
TopKQueryBinResult(const TopKQueryBinResult&);
TopKQueryBinResult& operator=(const TopKQueryBinResult&);
TopKQueryBinResult() : id_array(), distance_array() {
}
virtual ~TopKQueryBinResult() throw();
std::string id_array;
std::string distance_array;
void __set_id_array(const std::string& val);
void __set_distance_array(const std::string& val);
bool operator == (const TopKQueryBinResult & rhs) const
{
if (!(id_array == rhs.id_array))
return false;
if (!(distance_array == rhs.distance_array))
return false;
return true;
}
bool operator != (const TopKQueryBinResult &rhs) const {
return !(*this == rhs);
}
bool operator < (const TopKQueryBinResult & ) const;
uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
virtual void printTo(std::ostream& out) const;
};
void swap(TopKQueryBinResult &a, TopKQueryBinResult &b);
std::ostream& operator<<(std::ostream& out, const TopKQueryBinResult& obj);
}} // namespace
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -3,6 +3,7 @@ BOOST_VERSION=1.70.0
BZIP2_VERSION=1.0.6
EASYLOGGINGPP_VERSION=v9.96.7
FAISS_VERSION=7b07685
MKL_VERSION=2019.4.243
GTEST_VERSION=1.8.1
JSONCONS_VERSION=0.126.0
LAPACK_VERSION=v3.8.0
......@@ -19,5 +20,7 @@ YAMLCPP_VERSION=0.6.2
ZLIB_VERSION=v1.2.11
ZSTD_VERSION=v1.4.0
AWS_VERSION=1.7.125
LIBUNWIND_VERSION=1.3.1
GPERFTOOLS_VERSION=2.7
# vim: set filetype=sh:
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册