提交 9db19a9a 编写于 作者: J JinHai-CN

Merge remote-tracking branch 'main/master'

......@@ -47,6 +47,7 @@ jobs:
shell: bash
run: |
docker-compose build ubuntu-core
docker rmi $(docker images | grep '<none>' | awk '{print $3}') || exit 0
- name: Docker Run
run: |
docker-compose run --use-aliases -d db
......@@ -94,6 +95,7 @@ jobs:
shell: bash
run: |
docker-compose build centos-core
docker rmi $(docker images | grep '<none>' | awk '{print $3}') || exit 0
- name: Docker Run
run: |
docker-compose run --use-aliases -d db
......
......@@ -7,9 +7,16 @@ Please mark all change in change log and use the issue from GitHub
## Bug
- \#1705 Limit the insert data batch size
- \#1929 Skip MySQL meta schema field width check
- \#2073 Fix CheckDBConfigBackendUrl error message
- \#2076 CheckMetricConfigAddress error message
## Feature
- \#1751 Add api SearchByID
- \#1752 Add api GetVectorsByID
- \#1962 Add api HasPartition
- \#1965 FAISS/NSG/HNSW/ANNOY use unified distance calculation algorithm
- \#2064 Warn when use SQLite as metadata management
## Improvement
- \#221 Refactor LOG macro
......
......@@ -30,6 +30,7 @@ pipeline {
LOWER_BUILD_TYPE = params.BUILD_TYPE.toLowerCase()
SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}"
PIPELINE_NAME = "milvus-ci"
HELM_BRANCH = "master"
}
stages {
......
......@@ -3,7 +3,7 @@ timeout(time: 180, unit: 'MINUTES') {
sh 'helm version'
sh 'helm repo add stable https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts'
sh 'helm repo update'
checkout([$class: 'GitSCM', branches: [[name: "master"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/master:refs/remotes/origin/master"]]])
checkout([$class: 'GitSCM', branches: [[name: "${env.HELM_BRANCH}"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${env.HELM_BRANCH}:refs/remotes/origin/${env.HELM_BRANCH}"]]])
retry(3) {
sh "helm install --wait --timeout 300s --set image.repository=registry.zilliz.com/milvus/engine --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP -f ci/db_backend/mysql_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ${env.HELM_RELEASE_NAME} ."
}
......@@ -19,7 +19,7 @@ timeout(time: 180, unit: 'MINUTES') {
if (!fileExists('milvus-helm')) {
dir ("milvus-helm") {
checkout([$class: 'GitSCM', branches: [[name: "master"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/master:refs/remotes/origin/master"]]])
checkout([$class: 'GitSCM', branches: [[name:"${env.HELM_BRANCH}"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${env.HELM_BRANCH}:refs/remotes/origin/${env.HELM_BRANCH}"]]])
}
}
dir ("milvus-helm") {
......
......@@ -3,7 +3,7 @@ timeout(time: 120, unit: 'MINUTES') {
sh 'helm version'
sh 'helm repo add stable https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts'
sh 'helm repo update'
checkout([$class: 'GitSCM', branches: [[name: "master"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/master:refs/remotes/origin/master"]]])
checkout([$class: 'GitSCM', branches: [[name: "${env.HELM_BRANCH}"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${env.HELM_BRANCH}:refs/remotes/origin/${env.HELM_BRANCH}"]]])
retry(3) {
sh "helm install --wait --timeout 600s --set image.repository=registry.zilliz.com/milvus/engine --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP -f ci/db_backend/mysql_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ${env.HELM_RELEASE_NAME} ."
}
......
......@@ -54,7 +54,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler scheduler_main_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/action scheduler_action_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/event scheduler_event_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/job scheduler_job_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/optimizer scheduler_optimizer_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/selector scheduler_selector_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/resource scheduler_resource_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/scheduler/task scheduler_task_files)
set(scheduler_files
......@@ -62,7 +62,7 @@ set(scheduler_files
${scheduler_action_files}
${scheduler_event_files}
${scheduler_job_files}
${scheduler_optimizer_files}
${scheduler_selector_files}
${scheduler_resource_files}
${scheduler_task_files}
)
......
......@@ -32,76 +32,43 @@ namespace milvus {
namespace codec {
void
DefaultAttrsFormat::read_attrs_internal(const std::string& file_path, off_t offset, size_t num,
std::vector<uint8_t>& raw_attrs, size_t& nbytes) {
int ra_fd = open(file_path.c_str(), O_RDONLY, 00664);
if (ra_fd == -1) {
DefaultAttrsFormat::read_attrs_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, off_t offset,
size_t num, std::vector<uint8_t>& raw_attrs, size_t& nbytes) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t num_bytes;
if (::read(ra_fd, &num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to read from file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->read(&nbytes, sizeof(size_t));
num = std::min(num, num_bytes - offset);
num = std::min(num, nbytes - offset);
offset += sizeof(size_t);
int off = lseek(ra_fd, offset, SEEK_SET);
if (off == -1) {
std::string err_msg = "Failed to seek file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->seekg(offset);
raw_attrs.resize(num / sizeof(uint8_t));
if (::read(ra_fd, raw_attrs.data(), num) == -1) {
std::string err_msg = "Failed to read from file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->read(raw_attrs.data(), num);
nbytes = num;
if (::close(ra_fd) == -1) {
std::string err_msg = "Failed to close file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->close();
}
void
DefaultAttrsFormat::read_uids_internal(const std::string& file_path, std::vector<int64_t>& uids) {
int uid_fd = open(file_path.c_str(), O_RDONLY, 00664);
if (uid_fd == -1) {
DefaultAttrsFormat::read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
std::vector<int64_t>& uids) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t num_bytes;
if (::read(uid_fd, &num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to read from file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->read(&num_bytes, sizeof(size_t));
uids.resize(num_bytes / sizeof(int64_t));
if (::read(uid_fd, uids.data(), num_bytes) == -1) {
std::string err_msg = "Failed to read from file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->read(uids.data(), num_bytes);
if (::close(uid_fd) == -1) {
std::string err_msg = "Failed to close file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
fs_ptr->reader_ptr_->read(uids.data(), num_bytes);
}
void
......@@ -123,7 +90,7 @@ DefaultAttrsFormat::read(const milvus::storage::FSHandlerPtr& fs_ptr, milvus::se
for (; uid_it != it_end; ++uid_it) {
const auto& path = uid_it->path();
if (path.extension().string() == user_id_extension_) {
read_uids_internal(path.string(), uids);
read_uids_internal(fs_ptr, path.string(), uids);
break;
}
}
......@@ -134,10 +101,9 @@ DefaultAttrsFormat::read(const milvus::storage::FSHandlerPtr& fs_ptr, milvus::se
if (path.extension().string() == raw_attr_extension_) {
auto file_name = path.filename().string();
auto field_name = file_name.substr(0, file_name.size() - 3);
// void* attr_list;
std::vector<uint8_t> attr_list;
size_t nbytes;
read_attrs_internal(path.string(), 0, INT64_MAX, attr_list, nbytes);
read_attrs_internal(fs_ptr, path.string(), 0, INT64_MAX, attr_list, nbytes);
milvus::segment::AttrPtr attr =
std::make_shared<milvus::segment::Attr>(attr_list, nbytes, uids, field_name);
attrs_read->attrs.insert(std::pair(field_name, attr));
......@@ -238,7 +204,7 @@ DefaultAttrsFormat::read_uids(const milvus::storage::FSHandlerPtr& fs_ptr, std::
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == user_id_extension_) {
read_uids_internal(path.string(), uids);
read_uids_internal(fs_ptr, path.string(), uids);
}
}
}
......
......@@ -51,10 +51,11 @@ class DefaultAttrsFormat : public AttrsFormat {
private:
void
read_attrs_internal(const std::string&, off_t, size_t, std::vector<uint8_t>&, size_t&);
read_attrs_internal(const storage::FSHandlerPtr& fs_ptr, const std::string&, off_t, size_t, std::vector<uint8_t>&,
size_t&);
void
read_uids_internal(const std::string&, std::vector<int64_t>&);
read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string&, std::vector<int64_t>&);
private:
std::mutex mutex_;
......
......@@ -891,7 +891,7 @@ Config::CheckDBConfigBackendUrl(const std::string& value) {
std::string msg =
"Invalid backend url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " +
"The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus.";
return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value);
return Status(SERVER_INVALID_ARGUMENT, msg);
}
return Status::OK();
}
......@@ -1093,7 +1093,7 @@ Status
Config::CheckMetricConfigAddress(const std::string& value) {
if (!ValidationUtil::ValidateIpAddress(value).ok()) {
std::string msg = "Invalid metric ip: " + value + ". Possible reason: metric_config.ip is invalid.";
return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config ip: " + value);
return Status(SERVER_INVALID_ARGUMENT, msg);
}
return Status::OK();
}
......
......@@ -65,7 +65,7 @@ class DB {
AllCollections(std::vector<meta::CollectionSchema>& table_schema_array) = 0;
virtual Status
GetCollectionInfo(const std::string& collection_id, CollectionInfo& collection_info) = 0;
GetCollectionInfo(const std::string& collection_id, std::string& collection_info) = 0;
virtual Status
GetCollectionRowCount(const std::string& collection_id, uint64_t& row_count) = 0;
......@@ -108,7 +108,8 @@ class DB {
Compact(const std::string& collection_id) = 0;
virtual Status
GetVectorByID(const std::string& collection_id, const IDNumber& vector_id, VectorsData& vector) = 0;
GetVectorsByID(const std::string& collection_id, const IDNumbers& id_array,
std::vector<engine::VectorsData>& vectors) = 0;
virtual Status
GetVectorIDs(const std::string& collection_id, const std::string& segment_id, IDNumbers& vector_ids) = 0;
......@@ -117,9 +118,9 @@ class DB {
// Merge(const std::set<std::string>& table_ids) = 0;
virtual Status
QueryByID(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
IDNumber vector_id, ResultIds& result_ids, ResultDistances& result_distances) = 0;
QueryByIDs(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
const IDNumbers& id_array, ResultIds& result_ids, ResultDistances& result_distances) = 0;
virtual Status
Query(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
......@@ -153,7 +154,8 @@ class DB {
DescribeHybridCollection(meta::CollectionSchema& collection_schema, meta::hybrid::FieldsSchema& fields_schema) = 0;
virtual Status
InsertEntities(const std::string& collection_id, const std::string& partition_tag, Entity& entity,
InsertEntities(const std::string& collection_id, const std::string& partition_tag,
const std::vector<std::string>& field_names, Entity& entity,
std::unordered_map<std::string, meta::hybrid::DataType>& field_types) = 0;
virtual Status
......
此差异已折叠。
......@@ -74,7 +74,7 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi
AllCollections(std::vector<meta::CollectionSchema>& collection_schema_array) override;
Status
GetCollectionInfo(const std::string& collection_id, CollectionInfo& collection_info) override;
GetCollectionInfo(const std::string& collection_id, std::string& collection_info) override;
Status
PreloadCollection(const std::string& collection_id) override;
......@@ -118,7 +118,8 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi
Compact(const std::string& collection_id) override;
Status
GetVectorByID(const std::string& collection_id, const IDNumber& vector_id, VectorsData& vector) override;
GetVectorsByID(const std::string& collection_id, const IDNumbers& id_array,
std::vector<engine::VectorsData>& vectors) override;
Status
GetVectorIDs(const std::string& collection_id, const std::string& segment_id, IDNumbers& vector_ids) override;
......@@ -144,7 +145,8 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi
meta::hybrid::FieldsSchema& fields_schema) override;
Status
InsertEntities(const std::string& collection_name, const std::string& partition_tag, engine::Entity& entity,
InsertEntities(const std::string& collection_name, const std::string& partition_tag,
const std::vector<std::string>& field_names, engine::Entity& entity,
std::unordered_map<std::string, meta::hybrid::DataType>& field_types) override;
Status
......@@ -155,9 +157,9 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi
ResultIds& result_ids, ResultDistances& result_distances) override;
Status
QueryByID(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
IDNumber vector_id, ResultIds& result_ids, ResultDistances& result_distances) override;
QueryByIDs(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
const IDNumbers& id_array, ResultIds& result_ids, ResultDistances& result_distances) override;
Status
Query(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
......@@ -193,8 +195,8 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi
ResultIds& result_ids, ResultDistances& result_distances);
Status
GetVectorByIdHelper(const std::string& collection_id, IDNumber vector_id, VectorsData& vector,
const meta::SegmentsSchema& files);
GetVectorsByIdHelper(const std::string& collection_id, const IDNumbers& id_array,
std::vector<engine::VectorsData>& vectors, const meta::SegmentsSchema& files);
void
InternalFlush(const std::string& collection_id = "");
......
......@@ -50,6 +50,7 @@ struct VectorsData {
struct Entity {
uint64_t entity_count_ = 0;
std::vector<uint8_t> attr_value_;
std::unordered_map<std::string, std::vector<std::string>> attr_data_;
std::unordered_map<std::string, VectorsData> vector_data_;
IDNumbers id_array_;
......@@ -60,22 +61,6 @@ using Table2FileErr = std::map<std::string, File2ErrArray>;
using File2RefCount = std::map<std::string, int64_t>;
using Table2FileRef = std::map<std::string, File2RefCount>;
struct SegmentStat {
std::string name_;
int64_t row_count_ = 0;
std::string index_name_;
int64_t data_size_ = 0;
};
struct PartitionStat {
std::string tag_;
std::vector<SegmentStat> segments_stat_;
};
struct CollectionInfo {
std::vector<PartitionStat> partitions_stat_;
};
static const char* DEFAULT_PARTITON_TAG = "_default";
} // namespace engine
......
......@@ -24,6 +24,8 @@
#include "utils/CommonUtil.h"
#include "utils/Log.h"
#include <map>
namespace milvus {
namespace engine {
namespace utils {
......@@ -221,12 +223,6 @@ IsRawIndexType(int32_t type) {
return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP);
}
bool
IsBinaryIndexType(int32_t index_type) {
return (index_type == (int32_t)engine::EngineType::FAISS_BIN_IDMAP) ||
(index_type == (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT);
}
bool
IsBinaryMetricType(int32_t metric_type) {
return (metric_type == (int32_t)engine::MetricType::HAMMING) ||
......@@ -299,6 +295,29 @@ ParseMetaUri(const std::string& uri, MetaUriInfo& info) {
return Status::OK();
}
std::string
GetIndexName(int32_t index_type) {
static std::map<int32_t, std::string> index_type_name = {
{(int32_t)engine::EngineType::FAISS_IDMAP, "IDMAP"},
{(int32_t)engine::EngineType::FAISS_IVFFLAT, "IVFFLAT"},
{(int32_t)engine::EngineType::FAISS_IVFSQ8, "IVFSQ8"},
{(int32_t)engine::EngineType::NSG_MIX, "NSG"},
{(int32_t)engine::EngineType::ANNOY, "ANNOY"},
{(int32_t)engine::EngineType::FAISS_IVFSQ8H, "IVFSQ8H"},
{(int32_t)engine::EngineType::FAISS_PQ, "PQ"},
{(int32_t)engine::EngineType::SPTAG_KDT, "KDT"},
{(int32_t)engine::EngineType::SPTAG_BKT, "BKT"},
{(int32_t)engine::EngineType::FAISS_BIN_IDMAP, "IDMAP"},
{(int32_t)engine::EngineType::FAISS_BIN_IVFFLAT, "IVFFLAT"},
};
if (index_type_name.find(index_type) == index_type_name.end()) {
return "Unknow";
}
return index_type_name[index_type];
}
} // namespace utils
} // namespace engine
} // namespace milvus
......@@ -48,9 +48,6 @@ IsSameIndex(const CollectionIndex& index1, const CollectionIndex& index2);
bool
IsRawIndexType(int32_t type);
static bool
IsBinaryIndexType(int32_t index_type);
bool
IsBinaryMetricType(int32_t metric_type);
......@@ -73,6 +70,9 @@ struct MetaUriInfo {
Status
ParseMetaUri(const std::string& uri, MetaUriInfo& info);
std::string
GetIndexName(int32_t index_type);
} // namespace utils
} // namespace engine
} // namespace milvus
......@@ -128,10 +128,6 @@ class ExecutionEngine {
Search(int64_t n, const uint8_t* data, int64_t k, const milvus::json& extra_params, float* distances,
int64_t* labels, bool hybrid) = 0;
virtual Status
Search(int64_t n, const std::vector<int64_t>& ids, int64_t k, const milvus::json& extra_params, float* distances,
int64_t* labels, bool hybrid) = 0;
virtual std::shared_ptr<ExecutionEngine>
BuildIndex(const std::string& location, EngineType engine_type) = 0;
......
......@@ -810,10 +810,17 @@ ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_quer
std::vector<int8_t> data;
data.resize(size / sizeof(int8_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int8_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int8_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int8_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto term_value : general_query->leaf->term_query->field_value) {
int8_t query_value = atoi(term_value.c_str());
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
......@@ -831,10 +838,16 @@ ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_quer
std::vector<int16_t> data;
data.resize(size / sizeof(int16_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int16_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int16_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int16_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto term_value : general_query->leaf->term_query->field_value) {
int16_t query_value = atoi(term_value.c_str());
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
......@@ -852,10 +865,17 @@ ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_quer
std::vector<int32_t> data;
data.resize(size / sizeof(int32_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int32_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int32_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int32_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto term_value : general_query->leaf->term_query->field_value) {
int32_t query_value = atoi(term_value.c_str());
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
......@@ -873,10 +893,17 @@ ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_quer
std::vector<int64_t> data;
data.resize(size / sizeof(int64_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int64_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int64_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int64_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto term_value : general_query->leaf->term_query->field_value) {
int64_t query_value = atoi(term_value.c_str());
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
......@@ -894,12 +921,17 @@ ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_quer
std::vector<float> data;
data.resize(size / sizeof(float));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<float> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(float);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int64_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto term_value : general_query->leaf->term_query->field_value) {
std::istringstream iss(term_value);
float query_value;
iss >> query_value;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
......@@ -917,12 +949,17 @@ ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_quer
std::vector<double> data;
data.resize(size / sizeof(double));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<double> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(double);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(double));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto term_value : general_query->leaf->term_query->field_value) {
std::istringstream iss(term_value);
double query_value;
iss >> query_value;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
......@@ -1157,87 +1194,6 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
return Status::OK();
}
Status
ExecutionEngineImpl::Search(int64_t n, const std::vector<int64_t>& ids, int64_t k, const milvus::json& extra_params,
float* distances, int64_t* labels, bool hybrid) {
TimeRecorder rc(LogOut("[%s][%ld] ExecutionEngineImpl::Search vector of ids", "search", 0));
if (index_ == nullptr) {
LOG_ENGINE_ERROR_ << LogOut("[%s][%ld] ExecutionEngineImpl: index is null, failed to search", "search", 0);
return Status(DB_ERROR, "index is null");
}
milvus::json conf = extra_params;
conf[knowhere::meta::TOPK] = k;
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(index_->index_type());
if (!adapter->CheckSearch(conf, index_->index_type(), index_->index_mode())) {
LOG_ENGINE_ERROR_ << LogOut("[%s][%ld] Illegal search params", "search", 0);
throw Exception(DB_ERROR, "Illegal search params");
}
if (hybrid) {
HybridLoad();
}
rc.RecordSection("search prepare");
// std::string segment_dir;
// utils::GetParentPath(location_, segment_dir);
// segment::SegmentReader segment_reader(segment_dir);
// segment::IdBloomFilterPtr id_bloom_filter_ptr;
// segment_reader.LoadBloomFilter(id_bloom_filter_ptr);
// Check if the id is present. If so, find its offset
const std::vector<segment::doc_id_t>& uids = index_->GetUids();
std::vector<int64_t> offsets;
/*
std::vector<segment::doc_id_t> uids;
auto status = segment_reader.LoadUids(uids);
if (!status.ok()) {
return status;
}
*/
// There is only one id in ids
for (auto& id : ids) {
// if (id_bloom_filter_ptr->Check(id)) {
// if (uids.empty()) {
// segment_reader.LoadUids(uids);
// }
// auto found = std::find(uids.begin(), uids.end(), id);
// if (found != uids.end()) {
// auto offset = std::distance(uids.begin(), found);
// offsets.emplace_back(offset);
// }
// }
auto found = std::find(uids.begin(), uids.end(), id);
if (found != uids.end()) {
auto offset = std::distance(uids.begin(), found);
offsets.emplace_back(offset);
}
}
rc.RecordSection("get offset");
if (!offsets.empty()) {
auto dataset = knowhere::GenDatasetWithIds(offsets.size(), index_->Dim(), nullptr, offsets.data());
auto result = index_->QueryById(dataset, conf);
rc.RecordSection("query by id done");
LOG_ENGINE_DEBUG_ << LogOut("[%s][%ld] get %ld uids from index %s", "search", 0, index_->GetUids().size(),
location_.c_str());
MapAndCopyResult(result, uids, offsets.size(), k, distances, labels);
rc.RecordSection("map uids " + std::to_string(offsets.size() * k));
}
if (hybrid) {
HybridUnset();
}
return Status::OK();
}
Status
ExecutionEngineImpl::GetVectorByID(const int64_t& id, float* vector, bool hybrid) {
if (index_ == nullptr) {
......
......@@ -82,10 +82,6 @@ class ExecutionEngineImpl : public ExecutionEngine {
Search(int64_t n, const uint8_t* data, int64_t k, const milvus::json& extra_params, float* distances,
int64_t* labels, bool hybrid = false) override;
Status
Search(int64_t n, const std::vector<int64_t>& ids, int64_t k, const milvus::json& extra_params, float* distances,
int64_t* labels, bool hybrid) override;
ExecutionEnginePtr
BuildIndex(const std::string& location, EngineType engine_type) override;
......
......@@ -9,7 +9,7 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include "db/insert/MemMenagerFactory.h"
#include "db/insert/MemManagerFactory.h"
#include "MemManagerImpl.h"
#include "utils/Exception.h"
#include "utils/Log.h"
......
......@@ -97,6 +97,7 @@ MemManagerImpl::InsertEntities(const std::string& table_id, int64_t length, cons
<< "Insert buffer size exceeds limit. Performing force flush";
auto status = Flush(flushed_tables, false);
if (!status.ok()) {
LOG_ENGINE_DEBUG_ << LogOut("[%s][%ld] ", "insert", 0) << "Flush fail: " << status.message();
return status;
}
}
......
......@@ -35,6 +35,9 @@
#include "utils/StringHelpFunctions.h"
#include "utils/ValidationUtil.h"
#define USING_SQLITE_WARNING LOG_ENGINE_WARNING_ << \
"You are using SQLite as the meta data management, which can't be used in production. Please change it to MySQL!";
namespace milvus {
namespace engine {
namespace meta {
......@@ -242,6 +245,7 @@ SqliteMetaImpl::Initialize() {
Status
SqliteMetaImpl::CreateCollection(CollectionSchema& collection_schema) {
USING_SQLITE_WARNING
try {
server::MetricCollector metric;
......@@ -436,6 +440,7 @@ SqliteMetaImpl::DeleteCollectionFiles(const std::string& collection_id) {
Status
SqliteMetaImpl::CreateCollectionFile(SegmentSchema& file_schema) {
USING_SQLITE_WARNING
if (file_schema.date_ == EmptyDate) {
file_schema.date_ = utils::GetDate();
}
......@@ -899,6 +904,7 @@ SqliteMetaImpl::DropCollectionIndex(const std::string& collection_id) {
Status
SqliteMetaImpl::CreatePartition(const std::string& collection_id, const std::string& partition_name, const std::string& tag,
uint64_t lsn) {
USING_SQLITE_WARNING
server::MetricCollector metric;
CollectionSchema collection_schema;
......@@ -1852,6 +1858,7 @@ SqliteMetaImpl::GetGlobalLastLSN(uint64_t& lsn) {
Status
SqliteMetaImpl::CreateHybridCollection(meta::CollectionSchema& collection_schema,
meta::hybrid::FieldsSchema& fields_schema) {
USING_SQLITE_WARNING
try {
server::MetricCollector metric;
......@@ -1912,7 +1919,6 @@ SqliteMetaImpl::CreateHybridCollection(meta::CollectionSchema& collection_schema
Status
SqliteMetaImpl::DescribeHybridCollection(milvus::engine::meta::CollectionSchema& collection_schema,
milvus::engine::meta::hybrid::FieldsSchema& fields_schema) {
try {
server::MetricCollector metric;
fiu_do_on("SqliteMetaImpl.DescriCollection.throw_exception", throw std::exception());
......@@ -1970,7 +1976,7 @@ SqliteMetaImpl::DescribeHybridCollection(milvus::engine::meta::CollectionSchema&
Status
SqliteMetaImpl::CreateHybridCollectionFile(SegmentSchema& file_schema) {
USING_SQLITE_WARNING
if (file_schema.date_ == EmptyDate) {
file_schema.date_ = utils::GetDate();
}
......
......@@ -108,7 +108,7 @@ message SearchInFilesParam {
message SearchByIDParam {
string collection_name = 1;
repeated string partition_tag_array = 2;
int64 id = 3;
repeated int64 id_array = 3;
int64 topk = 4;
repeated KeyValuePair extra_params = 5;
}
......@@ -180,48 +180,28 @@ message DeleteByIDParam {
repeated int64 id_array = 2;
}
/**
* @brief segment statistics
*/
message SegmentStat {
string segment_name = 1;
int64 row_count = 2;
string index_name = 3;
int64 data_size = 4;
}
/**
* @brief collection statistics
*/
message PartitionStat {
string tag = 1;
int64 total_row_count = 2;
repeated SegmentStat segments_stat = 3;
}
/**
* @brief collection information
*/
message CollectionInfo {
Status status = 1;
int64 total_row_count = 2;
repeated PartitionStat partitions_stat = 3;
string json_info = 2;
}
/**
* @brief vector identity
* @brief vectors identity
*/
message VectorIdentity {
message VectorsIdentity {
string collection_name = 1;
int64 id = 2;
repeated int64 id_array = 2;
}
/**
* @brief vector data
*/
message VectorData {
message VectorsData {
Status status = 1;
RowRecord vector_data = 2;
repeated RowRecord vectors_data = 2;
}
/**
......@@ -307,9 +287,10 @@ message MappingList {
message TermQuery {
string field_name = 1;
repeated string values = 2;
float boost = 3;
repeated KeyValuePair extra_params = 4;
bytes values = 2;
int64 value_num = 3;
float boost = 4;
repeated KeyValuePair extra_params = 5;
}
enum CompareOperator {
......@@ -384,8 +365,9 @@ message HEntity {
Status status = 1;
int64 entity_id = 2;
repeated string field_names = 3;
repeated AttrRecord attr_records = 4;
repeated FieldValue result_values = 5;
bytes attr_records = 4;
int64 row_num = 5;
repeated FieldValue result_values = 6;
}
message HQueryResult {
......@@ -534,6 +516,15 @@ service MilvusService {
*/
rpc CreatePartition(PartitionParam) returns (Status) {}
/**
* @brief This method is used to test partition existence.
*
* @param PartitionParam, target partition.
*
* @return BoolReply
*/
rpc HasPartition(PartitionParam) returns (BoolReply) {}
/**
* @brief This method is used to show partition information
*
......@@ -562,13 +553,13 @@ service MilvusService {
rpc Insert(InsertParam) returns (VectorIds) {}
/**
* @brief This method is used to get vector data by id.
* @brief This method is used to get vectors data by id array.
*
* @param VectorIdentity, target vector id.
* @param VectorsIdentity, target vector id array.
*
* @return VectorData
* @return VectorsData
*/
rpc GetVectorByID(VectorIdentity) returns (VectorData) {}
rpc GetVectorsByID(VectorsIdentity) returns (VectorsData) {}
/**
* @brief This method is used to get vector ids from a segment
......
......@@ -242,7 +242,7 @@ NSGConfAdapter::CheckSearch(Config& oricfg, const IndexType type, const IndexMod
bool
HNSWConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) {
static int64_t MIN_EFCONSTRUCTION = 100;
static int64_t MAX_EFCONSTRUCTION = 500;
static int64_t MAX_EFCONSTRUCTION = 800;
static int64_t MIN_M = 5;
static int64_t MAX_M = 48;
......
......@@ -85,6 +85,7 @@ void
IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
if (index_) {
// it is builded all
KNOWHERE_LOG_DEBUG << "IndexAnnoy::BuildAll: index_ has been built!";
return;
}
......
......@@ -68,7 +68,7 @@ IndexHNSW::Load(const BinarySet& index_binary) {
index_ = std::make_shared<hnswlib::HierarchicalNSW<float>>(space);
index_->loadIndex(reader);
normalize = index_->metric_type_ == 1 ? true : false; // 1 == InnerProduct
normalize = index_->metric_type_ == 1; // 1 == InnerProduct
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
......@@ -158,7 +158,7 @@ IndexHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config) {
ret = index_->searchKnn((float*)single_query, k, compare, blacklist);
while (ret.size() < k) {
ret.push_back(std::make_pair(-1, -1));
ret.emplace_back(std::make_pair(-1, -1));
}
std::vector<float> dist;
std::vector<int64_t> ids;
......
......@@ -121,11 +121,6 @@ CPUSPTAGRNG::Train(const DatasetPtr& origin, const Config& train_config) {
DatasetPtr dataset = origin;
// if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
// && preprocessor_) {
// preprocessor_->Preprocess(dataset);
//}
auto vectorset = ConvertToVectorSet(dataset);
auto metaset = ConvertToMetadataSet(dataset);
index_ptr_->BuildIndex(vectorset, metaset);
......
......@@ -55,6 +55,23 @@ static std::unordered_map<std::string, int32_t> str_old_index_type_map = {
{IndexEnum::INDEX_FAISS_BIN_IVFFLAT, (int32_t)OldIndexType::FAISS_BIN_IVFLAT_CPU},
};
/* used in 0.8.0 */
namespace IndexEnum {
const char* INVALID = "";
const char* INDEX_FAISS_IDMAP = "IDMAP";
const char* INDEX_FAISS_IVFFLAT = "IVF_FLAT";
const char* INDEX_FAISS_IVFPQ = "IVF_PQ";
const char* INDEX_FAISS_IVFSQ8 = "IVF_SQ8";
const char* INDEX_FAISS_IVFSQ8H = "IVF_SQ8_HYBRID";
const char* INDEX_FAISS_BIN_IDMAP = "BIN_IDMAP";
const char* INDEX_FAISS_BIN_IVFFLAT = "BIN_IVF_FLAT";
const char* INDEX_NSG = "NSG";
const char* INDEX_SPTAG_KDT_RNT = "SPTAG_KDT_RNT";
const char* INDEX_SPTAG_BKT_RNT = "SPTAG_BKT_RNT";
const char* INDEX_HNSW = "HNSW";
const char* INDEX_ANNOY = "ANNOY";
} // namespace IndexEnum
std::string
OldIndexTypeToStr(const int32_t type) {
try {
......
......@@ -43,19 +43,19 @@ using IndexType = std::string;
/* used in 0.8.0 */
namespace IndexEnum {
constexpr const char* INVALID = "";
constexpr const char* INDEX_FAISS_IDMAP = "IDMAP";
constexpr const char* INDEX_FAISS_IVFFLAT = "IVF_FLAT";
constexpr const char* INDEX_FAISS_IVFPQ = "IVF_PQ";
constexpr const char* INDEX_FAISS_IVFSQ8 = "IVF_SQ8";
constexpr const char* INDEX_FAISS_IVFSQ8H = "IVF_SQ8_HYBRID";
constexpr const char* INDEX_FAISS_BIN_IDMAP = "BIN_IDMAP";
constexpr const char* INDEX_FAISS_BIN_IVFFLAT = "BIN_IVF_FLAT";
constexpr const char* INDEX_NSG = "NSG";
constexpr const char* INDEX_SPTAG_KDT_RNT = "SPTAG_KDT_RNT";
constexpr const char* INDEX_SPTAG_BKT_RNT = "SPTAG_BKT_RNT";
constexpr const char* INDEX_HNSW = "HNSW";
constexpr const char* INDEX_ANNOY = "ANNOY";
extern const char* INVALID;
extern const char* INDEX_FAISS_IDMAP;
extern const char* INDEX_FAISS_IVFFLAT;
extern const char* INDEX_FAISS_IVFPQ;
extern const char* INDEX_FAISS_IVFSQ8;
extern const char* INDEX_FAISS_IVFSQ8H;
extern const char* INDEX_FAISS_BIN_IDMAP;
extern const char* INDEX_FAISS_BIN_IVFFLAT;
extern const char* INDEX_NSG;
extern const char* INDEX_SPTAG_KDT_RNT;
extern const char* INDEX_SPTAG_BKT_RNT;
extern const char* INDEX_HNSW;
extern const char* INDEX_ANNOY;
} // namespace IndexEnum
enum class IndexMode { MODE_CPU = 0, MODE_GPU = 1 };
......
......@@ -63,13 +63,13 @@ namespace SPTAG
static double GetVector(char* cstr, const char* sep, std::vector<float>& arr, DimensionType& NumDim) {
char* current;
char* context = NULL;
char* context = nullptr;
DimensionType i = 0;
double sum = 0;
arr.clear();
current = strtok_s(cstr, sep, &context);
while (current != NULL && (i < NumDim || NumDim < 0)) {
while (current != nullptr && (i < NumDim || NumDim < 0)) {
try {
float val = (float)atof(current);
arr.push_back(val);
......@@ -80,7 +80,7 @@ namespace SPTAG
}
sum += arr[i] * arr[i];
current = strtok_s(NULL, sep, &context);
current = strtok_s(nullptr, sep, &context);
i++;
}
......
......@@ -44,10 +44,10 @@ class AnnoyIndex {
ptr->get_nns_by_vector(w, n, search_k, result, distances);
};
void getNnsByItem(int item, int n, int search_k, vector<int32_t>* result) {
ptr->get_nns_by_item(item, n, search_k, result, NULL);
ptr->get_nns_by_item(item, n, search_k, result, nullptr);
};
void getNnsByVector(const float* w, int n, int search_k, vector<int32_t>* result) {
ptr->get_nns_by_vector(w, n, search_k, result, NULL);
ptr->get_nns_by_vector(w, n, search_k, result, nullptr);
};
int getNItems() {
......
......@@ -824,19 +824,19 @@ struct Manhattan : Minkowski {
template<typename S, typename T>
class AnnoyIndexInterface {
public:
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-nullptr
virtual ~AnnoyIndexInterface() {};
virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
virtual bool build(int q, char** error=NULL) = 0;
virtual bool unbuild(char** error=NULL) = 0;
virtual bool save(const char* filename, bool prefault=false, char** error=NULL) = 0;
virtual bool add_item(S item, const T* w, char** error=nullptr) = 0;
virtual bool build(int q, char** error=nullptr) = 0;
virtual bool unbuild(char** error=nullptr) = 0;
virtual bool save(const char* filename, bool prefault=false, char** error=nullptr) = 0;
virtual void unload() = 0;
virtual bool load(const char* filename, bool prefault=false, char** error=NULL) = 0;
virtual bool load_index(void* index_data, const int64_t& index_size, char** error = NULL) = 0;
virtual bool load(const char* filename, bool prefault=false, char** error=nullptr) = 0;
virtual bool load_index(void* index_data, const int64_t& index_size, char** error = nullptr) = 0;
virtual T get_distance(S i, S j) const = 0;
virtual void get_nns_by_item(S item, size_t n, int search_k, vector<S>* result, vector<T>* distances,
virtual void get_nns_by_item(S item, size_t n, int64_t search_k, vector<S>* result, vector<T>* distances,
faiss::ConcurrentBitsetPtr& bitset = nullptr) const = 0;
virtual void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* result, vector<T>* distances,
virtual void get_nns_by_vector(const T* w, size_t n, int64_t search_k, vector<S>* result, vector<T>* distances,
faiss::ConcurrentBitsetPtr& bitset = nullptr) const = 0;
virtual S get_n_items() const = 0;
virtual S get_dim() const = 0;
......@@ -846,7 +846,7 @@ class AnnoyIndexInterface {
virtual void verbose(bool v) = 0;
virtual void get_item(S item, T* v) const = 0;
virtual void set_seed(int q) = 0;
virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
virtual bool on_disk_build(const char* filename, char** error=nullptr) = 0;
};
template<typename S, typename T, typename Distance, typename Random>
......@@ -894,12 +894,12 @@ public:
return _f;
}
bool add_item(S item, const T* w, char** error=NULL) {
bool add_item(S item, const T* w, char** error=nullptr) {
return add_item_impl(item, w, error);
}
template<typename W>
bool add_item_impl(S item, const W& w, char** error=NULL) {
bool add_item_impl(S item, const W& w, char** error=nullptr) {
if (_loaded) {
set_error_from_string(error, "You can't add an item to a loaded index");
return false;
......@@ -924,7 +924,7 @@ public:
return true;
}
bool on_disk_build(const char* file, char** error=NULL) {
bool on_disk_build(const char* file, char** error=nullptr) {
_on_disk = true;
_fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600);
if (_fd == -1) {
......@@ -945,7 +945,7 @@ public:
return true;
}
bool build(int q, char** error=NULL) {
bool build(int q, char** error=nullptr) {
if (_loaded) {
set_error_from_string(error, "You can't build a loaded index");
return false;
......@@ -997,7 +997,7 @@ public:
return true;
}
bool unbuild(char** error=NULL) {
bool unbuild(char** error=nullptr) {
if (_loaded) {
set_error_from_string(error, "You can't unbuild a loaded index");
return false;
......@@ -1010,7 +1010,7 @@ public:
return true;
}
bool save(const char* filename, bool prefault=false, char** error=NULL) {
bool save(const char* filename, bool prefault=false, char** error=nullptr) {
if (!_built) {
set_error_from_string(error, "You can't save an index that hasn't been built");
return false;
......@@ -1022,7 +1022,7 @@ public:
unlink(filename);
FILE *f = fopen(filename, "wb");
if (f == NULL) {
if (f == nullptr) {
set_error_from_errno(error, "Unable to open");
return false;
}
......@@ -1044,7 +1044,7 @@ public:
void reinitialize() {
_fd = 0;
_nodes = NULL;
_nodes = nullptr;
_loaded = false;
_n_items = 0;
_n_nodes = 0;
......@@ -1071,7 +1071,7 @@ public:
if (_verbose) showUpdate("unloaded\n");
}
bool load(const char* filename, bool prefault=false, char** error=NULL) {
bool load(const char* filename, bool prefault=false, char** error=nullptr) {
_fd = open(filename, O_RDONLY, (int)0400);
if (_fd == -1) {
set_error_from_errno(error, "Unable to open");
......@@ -1172,14 +1172,14 @@ public:
return D::normalized_distance(D::distance(_get(i), _get(j), _f));
}
void get_nns_by_item(S item, size_t n, int search_k, vector<S>* result, vector<T>* distances,
void get_nns_by_item(S item, size_t n, int64_t search_k, vector<S>* result, vector<T>* distances,
faiss::ConcurrentBitsetPtr& bitset) const {
// TODO: handle OOB
const Node* m = _get(item);
_get_all_nns(m->v, n, search_k, result, distances, bitset);
}
void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* result, vector<T>* distances,
void get_nns_by_vector(const T* w, size_t n, int64_t search_k, vector<S>* result, vector<T>* distances,
faiss::ConcurrentBitsetPtr& bitset) const {
_get_all_nns(w, n, search_k, result, distances, bitset);
}
......@@ -1327,7 +1327,7 @@ protected:
return item;
}
void _get_all_nns(const T* v, size_t n, int search_k, vector<S>* result, vector<T>* distances,
void _get_all_nns(const T* v, size_t n, int64_t search_k, vector<S>* result, vector<T>* distances,
faiss::ConcurrentBitsetPtr& bitset) const {
Node* v_node = (Node *)alloca(_s);
D::template zero_value<Node>(v_node);
......@@ -1337,7 +1337,7 @@ protected:
std::priority_queue<pair<T, S> > q;
if (search_k <= 0) {
search_k = std::max(n * _roots.size(), (size_t )_n_items * 5 / 100);
search_k = std::max(int64_t(n * _roots.size()), int64_t(_n_items * 5 / 100));
}
for (size_t i = 0; i < _roots.size(); i++) {
......
......@@ -21,7 +21,7 @@
#include "kissrandom.h"
#if LUA_VERSION_NUM == 501
#define compat_setfuncs(L, funcs) luaL_register(L, NULL, funcs)
#define compat_setfuncs(L, funcs) luaL_register(L, nullptr, funcs)
#define compat_rawlen lua_objlen
#else
#define compat_setfuncs(L, funcs) luaL_setfuncs(L, funcs, 0)
......@@ -203,7 +203,7 @@ public:
Searcher s(L);
int item = getItemIndex(L, 2, s.self->get_n_items());
s.self->get_nns_by_item(item, s.n, s.search_k, &s.result,
s.include_distances ? &s.distances : NULL);
s.include_distances ? &s.distances : nullptr);
return s.pushResults(L);
}
......@@ -213,7 +213,7 @@ public:
AnnoyT* vec = &(_vec[0]);
toVector(L, 2, s.self->get_f(), vec);
s.self->get_nns_by_vector(vec, s.n, s.search_k, &s.result,
s.include_distances ? &s.distances : NULL);
s.include_distances ? &s.distances : nullptr);
return s.pushResults(L);
}
......@@ -246,7 +246,7 @@ public:
static const luaL_Reg funcs[] = {
{"__gc", &ThisClass::gc},
{"__tostring", &ThisClass::tostring},
{NULL, NULL},
{nullptr, nullptr},
};
return funcs;
}
......@@ -264,7 +264,7 @@ public:
{"get_distance", &ThisClass::get_distance},
{"get_n_items", &ThisClass::get_n_items},
{"on_disk_build", &ThisClass::on_disk_build},
{NULL, NULL},
{nullptr, nullptr},
};
return funcs;
}
......@@ -304,7 +304,7 @@ static int lua_an_make(lua_State* L) {
static const luaL_Reg LUA_ANNOY_FUNCS[] = {
{"AnnoyIndex", lua_an_make},
{NULL, NULL},
{nullptr, nullptr},
};
extern "C" {
......
......@@ -96,7 +96,7 @@ public:
_index.get_nns_by_item(item, n, search_k, result, &distances_internal);
distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
} else {
_index.get_nns_by_item(item, n, search_k, result, NULL);
_index.get_nns_by_item(item, n, search_k, result, nullptr);
}
};
void get_nns_by_vector(const float* w, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
......@@ -107,7 +107,7 @@ public:
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, &distances_internal);
distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
} else {
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, NULL);
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, nullptr);
}
};
int32_t get_n_items() const { return _index.get_n_items(); };
......@@ -133,14 +133,14 @@ typedef struct {
static PyObject *
py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
py_annoy *self = (py_annoy *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
if (self == nullptr) {
return nullptr;
}
const char *metric = NULL;
const char *metric = nullptr;
static char const * kwlist[] = {"f", "metric", NULL};
static char const * kwlist[] = {"f", "metric", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &self->f, &metric))
return NULL;
return nullptr;
if (!metric) {
// This keeps coming up, see #368 etc
PyErr_WarnEx(PyExc_FutureWarning, "The default argument for metric will be removed "
......@@ -158,7 +158,7 @@ py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
self->ptr = new AnnoyIndex<int32_t, float, DotProduct, Kiss64Random>(self->f);
} else {
PyErr_SetString(PyExc_ValueError, "No such metric");
return NULL;
return nullptr;
}
return (PyObject *)self;
......@@ -168,11 +168,11 @@ py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
static int
py_an_init(py_annoy *self, PyObject *args, PyObject *kwargs) {
// Seems to be needed for Python 3
const char *metric = NULL;
const char *metric = nullptr;
int f;
static char const * kwlist[] = {"f", "metric", NULL};
static char const * kwlist[] = {"f", "metric", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &f, &metric))
return (int) NULL;
return (int) nullptr;
return 0;
}
......@@ -187,7 +187,7 @@ py_an_dealloc(py_annoy* self) {
static PyMemberDef py_annoy_members[] = {
{(char*)"f", T_INT, offsetof(py_annoy, f), 0,
(char*)""},
{NULL} /* Sentinel */
{nullptr} /* Sentinel */
};
......@@ -196,15 +196,15 @@ py_an_load(py_annoy *self, PyObject *args, PyObject *kwargs) {
char *filename, *error;
bool prefault = false;
if (!self->ptr)
return NULL;
static char const * kwlist[] = {"fn", "prefault", NULL};
return nullptr;
static char const * kwlist[] = {"fn", "prefault", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
return NULL;
return nullptr;
if (!self->ptr->load(filename, prefault, &error)) {
PyErr_SetString(PyExc_IOError, error);
free(error);
return NULL;
return nullptr;
}
Py_RETURN_TRUE;
}
......@@ -215,15 +215,15 @@ py_an_save(py_annoy *self, PyObject *args, PyObject *kwargs) {
char *filename, *error;
bool prefault = false;
if (!self->ptr)
return NULL;
static char const * kwlist[] = {"fn", "prefault", NULL};
return nullptr;
static char const * kwlist[] = {"fn", "prefault", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
return NULL;
return nullptr;
if (!self->ptr->save(filename, prefault, &error)) {
PyErr_SetString(PyExc_IOError, error);
free(error);
return NULL;
return nullptr;
}
Py_RETURN_TRUE;
}
......@@ -265,21 +265,21 @@ static PyObject*
py_an_get_nns_by_item(py_annoy *self, PyObject *args, PyObject *kwargs) {
int32_t item, n, search_k=-1, include_distances=0;
if (!self->ptr)
return NULL;
return nullptr;
static char const * kwlist[] = {"i", "n", "search_k", "include_distances", NULL};
static char const * kwlist[] = {"i", "n", "search_k", "include_distances", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ii|ii", (char**)kwlist, &item, &n, &search_k, &include_distances))
return NULL;
return nullptr;
if (!check_constraints(self, item, false)) {
return NULL;
return nullptr;
}
vector<int32_t> result;
vector<float> distances;
Py_BEGIN_ALLOW_THREADS;
self->ptr->get_nns_by_item(item, n, search_k, &result, include_distances ? &distances : NULL);
self->ptr->get_nns_by_item(item, n, search_k, &result, include_distances ? &distances : nullptr);
Py_END_ALLOW_THREADS;
return get_nns_to_python(result, distances, include_distances);
......@@ -315,22 +315,22 @@ py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) {
PyObject* v;
int32_t n, search_k=-1, include_distances=0;
if (!self->ptr)
return NULL;
return nullptr;
static char const * kwlist[] = {"vector", "n", "search_k", "include_distances", NULL};
static char const * kwlist[] = {"vector", "n", "search_k", "include_distances", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii", (char**)kwlist, &v, &n, &search_k, &include_distances))
return NULL;
return nullptr;
vector<float> w(self->f);
if (!convert_list_to_vector(v, self->f, &w)) {
return NULL;
return nullptr;
}
vector<int32_t> result;
vector<float> distances;
Py_BEGIN_ALLOW_THREADS;
self->ptr->get_nns_by_vector(&w[0], n, search_k, &result, include_distances ? &distances : NULL);
self->ptr->get_nns_by_vector(&w[0], n, search_k, &result, include_distances ? &distances : nullptr);
Py_END_ALLOW_THREADS;
return get_nns_to_python(result, distances, include_distances);
......@@ -341,12 +341,12 @@ static PyObject*
py_an_get_item_vector(py_annoy *self, PyObject *args) {
int32_t item;
if (!self->ptr)
return NULL;
return nullptr;
if (!PyArg_ParseTuple(args, "i", &item))
return NULL;
return nullptr;
if (!check_constraints(self, item, false)) {
return NULL;
return nullptr;
}
vector<float> v(self->f);
......@@ -365,24 +365,24 @@ py_an_add_item(py_annoy *self, PyObject *args, PyObject* kwargs) {
PyObject* v;
int32_t item;
if (!self->ptr)
return NULL;
static char const * kwlist[] = {"i", "vector", NULL};
return nullptr;
static char const * kwlist[] = {"i", "vector", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "iO", (char**)kwlist, &item, &v))
return NULL;
return nullptr;
if (!check_constraints(self, item, true)) {
return NULL;
return nullptr;
}
vector<float> w(self->f);
if (!convert_list_to_vector(v, self->f, &w)) {
return NULL;
return nullptr;
}
char* error;
if (!self->ptr->add_item(item, &w[0], &error)) {
PyErr_SetString(PyExc_Exception, error);
free(error);
return NULL;
return nullptr;
}
Py_RETURN_NONE;
......@@ -392,15 +392,15 @@ static PyObject *
py_an_on_disk_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
char *filename, *error;
if (!self->ptr)
return NULL;
static char const * kwlist[] = {"fn", NULL};
return nullptr;
static char const * kwlist[] = {"fn", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", (char**)kwlist, &filename))
return NULL;
return nullptr;
if (!self->ptr->on_disk_build(filename, &error)) {
PyErr_SetString(PyExc_IOError, error);
free(error);
return NULL;
return nullptr;
}
Py_RETURN_TRUE;
}
......@@ -409,10 +409,10 @@ static PyObject *
py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
int q;
if (!self->ptr)
return NULL;
static char const * kwlist[] = {"n_trees", NULL};
return nullptr;
static char const * kwlist[] = {"n_trees", nullptr};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i", (char**)kwlist, &q))
return NULL;
return nullptr;
bool res;
char* error;
......@@ -422,7 +422,7 @@ py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
if (!res) {
PyErr_SetString(PyExc_Exception, error);
free(error);
return NULL;
return nullptr;
}
Py_RETURN_TRUE;
......@@ -432,13 +432,13 @@ py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
static PyObject *
py_an_unbuild(py_annoy *self) {
if (!self->ptr)
return NULL;
return nullptr;
char* error;
if (!self->ptr->unbuild(&error)) {
PyErr_SetString(PyExc_Exception, error);
free(error);
return NULL;
return nullptr;
}
Py_RETURN_TRUE;
......@@ -448,7 +448,7 @@ py_an_unbuild(py_annoy *self) {
static PyObject *
py_an_unload(py_annoy *self) {
if (!self->ptr)
return NULL;
return nullptr;
self->ptr->unload();
......@@ -460,12 +460,12 @@ static PyObject *
py_an_get_distance(py_annoy *self, PyObject *args) {
int32_t i, j;
if (!self->ptr)
return NULL;
return nullptr;
if (!PyArg_ParseTuple(args, "ii", &i, &j))
return NULL;
return nullptr;
if (!check_constraints(self, i, false) || !check_constraints(self, j, false)) {
return NULL;
return nullptr;
}
double d = self->ptr->get_distance(i,j);
......@@ -476,7 +476,7 @@ py_an_get_distance(py_annoy *self, PyObject *args) {
static PyObject *
py_an_get_n_items(py_annoy *self) {
if (!self->ptr)
return NULL;
return nullptr;
int32_t n = self->ptr->get_n_items();
return PyInt_FromLong(n);
......@@ -485,7 +485,7 @@ py_an_get_n_items(py_annoy *self) {
static PyObject *
py_an_get_n_trees(py_annoy *self) {
if (!self->ptr)
return NULL;
return nullptr;
int32_t n = self->ptr->get_n_trees();
return PyInt_FromLong(n);
......@@ -495,9 +495,9 @@ static PyObject *
py_an_verbose(py_annoy *self, PyObject *args) {
int verbose;
if (!self->ptr)
return NULL;
return nullptr;
if (!PyArg_ParseTuple(args, "i", &verbose))
return NULL;
return nullptr;
self->ptr->verbose((bool)verbose);
......@@ -509,9 +509,9 @@ static PyObject *
py_an_set_seed(py_annoy *self, PyObject *args) {
int q;
if (!self->ptr)
return NULL;
return nullptr;
if (!PyArg_ParseTuple(args, "i", &q))
return NULL;
return nullptr;
self->ptr->set_seed(q);
......@@ -535,12 +535,12 @@ static PyMethodDef AnnoyMethods[] = {
{"get_n_trees",(PyCFunction)py_an_get_n_trees, METH_NOARGS, "Returns the number of trees in the index."},
{"verbose",(PyCFunction)py_an_verbose, METH_VARARGS, ""},
{"set_seed",(PyCFunction)py_an_set_seed, METH_VARARGS, "Sets the seed of Annoy's random number generator."},
{NULL, NULL, 0, NULL} /* Sentinel */
{nullptr, nullptr, 0, nullptr} /* Sentinel */
};
static PyTypeObject PyAnnoyType = {
PyVarObject_HEAD_INIT(NULL, 0)
PyVarObject_HEAD_INIT(nullptr, 0)
"annoy.Annoy", /*tp_name*/
sizeof(py_annoy), /*tp_basicsize*/
0, /*tp_itemsize*/
......@@ -581,7 +581,7 @@ static PyTypeObject PyAnnoyType = {
};
static PyMethodDef module_methods[] = {
{NULL} /* Sentinel */
{nullptr} /* Sentinel */
};
#if PY_MAJOR_VERSION >= 3
......@@ -591,10 +591,10 @@ static PyMethodDef module_methods[] = {
ANNOY_DOC, /* m_doc */
-1, /* m_size */
module_methods, /* m_methods */
NULL, /* m_reload */
NULL, /* m_traverse */
NULL, /* m_clear */
NULL, /* m_free */
nullptr, /* m_reload */
nullptr, /* m_traverse */
nullptr, /* m_clear */
nullptr, /* m_free */
};
#endif
......@@ -602,7 +602,7 @@ PyObject *create_module(void) {
PyObject *m;
if (PyType_Ready(&PyAnnoyType) < 0)
return NULL;
return nullptr;
#if PY_MAJOR_VERSION >= 3
m = PyModule_Create(&moduledef);
......@@ -610,8 +610,8 @@ PyObject *create_module(void) {
m = Py_InitModule("annoylib", module_methods);
#endif
if (m == NULL)
return NULL;
if (m == nullptr)
return nullptr;
Py_INCREF(&PyAnnoyType);
PyModule_AddObject(m, "Annoy", (PyObject *)&PyAnnoyType);
......
......@@ -219,8 +219,8 @@ main() {
}
printf("gen xb and ids done! \n");
// srand((unsigned)time(NULL));
auto random_seed = (unsigned)time(NULL);
// srand((unsigned)time(nullptr));
auto random_seed = (unsigned)time(nullptr);
printf("delete ids: \n");
for (int i = 0; i < nq; i++) {
auto tmp = rand_r(&random_seed) % nb;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册