// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "wrapper/ConfAdapter.h" #include "WrapperException.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" #include "server/Config.h" #include "utils/Log.h" #include #include #include // TODO(lxj): add conf checker namespace milvus { namespace engine { #if CUDA_VERSION > 9000 #define GPU_MAX_NRPOBE 2048 #else #define GPU_MAX_NRPOBE 1024 #endif void ConfAdapter::MatchBase(knowhere::Config conf, knowhere::METRICTYPE default_metric) { if (conf->metric_type == knowhere::DEFAULT_TYPE) conf->metric_type = default_metric; } knowhere::Config ConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; conf->k = metaconf.k; MatchBase(conf); return conf; } knowhere::Config ConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { auto conf = std::make_shared(); conf->k = metaconf.k; return conf; } knowhere::Config IVFConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->nlist = MatchNlist(metaconf.size, metaconf.nlist, 16384); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; MatchBase(conf); return conf; } static constexpr float TYPICAL_COUNT = 1000000.0; int64_t IVFConfAdapter::MatchNlist(const int64_t& size, const int64_t& nlist, const int64_t& per_nlist) { if (size <= TYPICAL_COUNT / per_nlist + 1) { // handle less row count, avoid nlist set to 0 return 1; } else if (int(size / TYPICAL_COUNT) * nlist <= 0) { // calculate a proper nlist if nlist not specified or size less than TYPICAL_COUNT return int(size / TYPICAL_COUNT * per_nlist); } return nlist; } knowhere::Config IVFConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { auto conf = std::make_shared(); conf->k = metaconf.k; if (metaconf.nprobe <= 0) conf->nprobe = 16; // hardcode here else conf->nprobe = metaconf.nprobe; switch (type) { case IndexType::FAISS_IVFFLAT_GPU: case IndexType::FAISS_IVFSQ8_GPU: case IndexType::FAISS_IVFPQ_GPU: if (conf->nprobe > GPU_MAX_NRPOBE) { WRAPPER_LOG_WARNING << "When search with GPU, nprobe shoud be no more than " << GPU_MAX_NRPOBE << ", but you passed " << conf->nprobe << ". Search with " << GPU_MAX_NRPOBE << " instead"; conf->nprobe = GPU_MAX_NRPOBE; } } return conf; } knowhere::Config IVFSQConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->nlist = MatchNlist(metaconf.size, metaconf.nlist, 16384); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; conf->nbits = 8; MatchBase(conf); return conf; } knowhere::Config IVFPQConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; conf->nbits = 8; MatchBase(conf); #ifdef MILVUS_GPU_VERSION Status s; bool enable_gpu = false; server::Config& config = server::Config::GetInstance(); s = config.GetGpuResourceConfigEnable(enable_gpu); if (s.ok() && conf->metric_type == knowhere::METRICTYPE::IP) { WRAPPER_LOG_ERROR << "PQ not support IP in GPU version!"; throw WrapperException("PQ not support IP in GPU version!"); } #endif /* * Faiss 1.6 * Only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims per sub-quantizer are currently supported with * no precomputed codes. Precomputed codes supports any number of dimensions, but will involve memory overheads. */ static std::vector support_dim_per_subquantizer{32, 28, 24, 20, 16, 12, 10, 8, 6, 4, 3, 2, 1}; static std::vector support_subquantizer{96, 64, 56, 48, 40, 32, 28, 24, 20, 16, 12, 8, 4, 3, 2, 1}; std::vector resset; for (const auto& dimperquantizer : support_dim_per_subquantizer) { if (!(conf->d % dimperquantizer)) { auto subquantzier_num = conf->d / dimperquantizer; auto finder = std::find(support_subquantizer.begin(), support_subquantizer.end(), subquantzier_num); if (finder != support_subquantizer.end()) { resset.push_back(subquantzier_num); } } } if (resset.empty()) { // todo(linxj): throw exception here. WRAPPER_LOG_ERROR << "The dims of PQ is wrong : only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims per sub-" "quantizer are currently supported with no precomputed codes."; throw WrapperException( "The dims of PQ is wrong : only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims " "per sub-quantizer are currently supported with no precomputed codes."); // return nullptr; } static int64_t compression_level = 1; // 1:low, 2:high if (compression_level == 1) { conf->m = resset[int(resset.size() / 2)]; WRAPPER_LOG_DEBUG << "PQ m = " << conf->m << ", compression radio = " << conf->d / conf->m * 4; } return conf; } knowhere::Config IVFPQConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { auto conf = std::make_shared(); conf->k = metaconf.k; if (metaconf.nprobe <= 0) { WRAPPER_LOG_ERROR << "The nprobe of PQ is wrong!"; throw WrapperException("The nprobe of PQ is wrong!"); } else { conf->nprobe = metaconf.nprobe; } return conf; } int64_t IVFPQConfAdapter::MatchNlist(const int64_t& size, const int64_t& nlist) { if (size <= TYPICAL_COUNT / 16384 + 1) { // handle less row count, avoid nlist set to 0 return 1; } else if (int(size / TYPICAL_COUNT) * nlist <= 0) { // calculate a proper nlist if nlist not specified or size less than TYPICAL_COUNT return int(size / TYPICAL_COUNT * 16384); } return nlist; } knowhere::Config NSGConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->nlist = MatchNlist(metaconf.size, metaconf.nlist, 16384); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; conf->k = metaconf.k; auto scale_factor = round(metaconf.dim / 128.0); scale_factor = scale_factor >= 4 ? 4 : scale_factor; conf->nprobe = int64_t(conf->nlist * 0.01); // conf->knng = 40 + 10 * scale_factor; // the size of knng conf->knng = 50; conf->search_length = 50 + 5 * scale_factor; conf->out_degree = 50 + 5 * scale_factor; conf->candidate_pool_size = 300; MatchBase(conf); return conf; } knowhere::Config NSGConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { auto conf = std::make_shared(); conf->k = metaconf.k; conf->search_length = metaconf.search_length; if (metaconf.search_length == TEMPMETA_DEFAULT_VALUE) { conf->search_length = 30; // TODO(linxj): hardcode here. } return conf; } knowhere::Config SPTAGKDTConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; return conf; } knowhere::Config SPTAGKDTConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { auto conf = std::make_shared(); conf->k = metaconf.k; return conf; } knowhere::Config SPTAGBKTConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; return conf; } knowhere::Config SPTAGBKTConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { auto conf = std::make_shared(); conf->k = metaconf.k; return conf; } knowhere::Config BinIDMAPConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; conf->k = metaconf.k; MatchBase(conf, knowhere::METRICTYPE::HAMMING); return conf; } knowhere::Config BinIVFConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->nlist = MatchNlist(metaconf.size, metaconf.nlist, 2048); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; conf->gpu_id = metaconf.gpu_id; MatchBase(conf, knowhere::METRICTYPE::HAMMING); return conf; } } // namespace engine } // namespace milvus