提交 8963031c 编写于 作者: Y Yu Kun

Merge remote-tracking branch 'upstream/branch-0.4.0' into branch-0.4.0


Former-commit-id: e235849c69b0311b6953833019d4edf8c1dcc2c3

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
......@@ -224,97 +224,97 @@ spec:
}
}
stage("Cluster") {
agent {
kubernetes {
label 'dev-test'
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: milvus
componet: test
spec:
containers:
- name: milvus-testframework
image: registry.zilliz.com/milvus/milvus-test:v0.2
command:
- cat
tty: true
volumeMounts:
- name: kubeconf
mountPath: /root/.kube/
readOnly: true
volumes:
- name: kubeconf
secret:
secretName: test-cluster-config
"""
}
}
stages {
stage("Deploy to Dev") {
steps {
gitlabCommitStatus(name: 'Deloy to Dev') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_deploy2dev.groovy"
}
}
}
}
}
stage("Dev Test") {
steps {
gitlabCommitStatus(name: 'Deloy Test') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_dev_test.groovy"
load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_cluster_test_out.groovy"
}
}
}
}
}
stage ("Cleanup Dev") {
steps {
gitlabCommitStatus(name: 'Cleanup Dev') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
}
}
}
}
}
}
post {
always {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
}
}
}
success {
script {
echo "Milvus Cluster CI/CD success !"
}
}
aborted {
script {
echo "Milvus Cluster CI/CD aborted !"
}
}
failure {
script {
echo "Milvus Cluster CI/CD failure !"
}
}
}
}
// stage("Cluster") {
// agent {
// kubernetes {
// label 'dev-test'
// defaultContainer 'jnlp'
// yaml """
// apiVersion: v1
// kind: Pod
// metadata:
// labels:
// app: milvus
// componet: test
// spec:
// containers:
// - name: milvus-testframework
// image: registry.zilliz.com/milvus/milvus-test:v0.2
// command:
// - cat
// tty: true
// volumeMounts:
// - name: kubeconf
// mountPath: /root/.kube/
// readOnly: true
// volumes:
// - name: kubeconf
// secret:
// secretName: test-cluster-config
// """
// }
// }
// stages {
// stage("Deploy to Dev") {
// steps {
// gitlabCommitStatus(name: 'Deloy to Dev') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_deploy2dev.groovy"
// }
// }
// }
// }
// }
// stage("Dev Test") {
// steps {
// gitlabCommitStatus(name: 'Deloy Test') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_dev_test.groovy"
// load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_cluster_test_out.groovy"
// }
// }
// }
// }
// }
// stage ("Cleanup Dev") {
// steps {
// gitlabCommitStatus(name: 'Cleanup Dev') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
// }
// }
// }
// }
// }
// }
// post {
// always {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
// }
// }
// }
// success {
// script {
// echo "Milvus Cluster CI/CD success !"
// }
// }
// aborted {
// script {
// echo "Milvus Cluster CI/CD aborted !"
// }
// }
// failure {
// script {
// echo "Milvus Cluster CI/CD failure !"
// }
// }
// }
// }
}
}
}
......
......@@ -224,97 +224,97 @@ spec:
}
}
stage("Cluster") {
agent {
kubernetes {
label 'dev-test'
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: milvus
componet: test
spec:
containers:
- name: milvus-testframework
image: registry.zilliz.com/milvus/milvus-test:v0.2
command:
- cat
tty: true
volumeMounts:
- name: kubeconf
mountPath: /root/.kube/
readOnly: true
volumes:
- name: kubeconf
secret:
secretName: test-cluster-config
"""
}
}
stages {
stage("Deploy to Dev") {
steps {
gitlabCommitStatus(name: 'Deloy to Dev') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_deploy2dev.groovy"
}
}
}
}
}
stage("Dev Test") {
steps {
gitlabCommitStatus(name: 'Deloy Test') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_dev_test.groovy"
load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_cluster_test_out.groovy"
}
}
}
}
}
stage ("Cleanup Dev") {
steps {
gitlabCommitStatus(name: 'Cleanup Dev') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
}
}
}
}
}
}
post {
always {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
}
}
}
success {
script {
echo "Milvus Cluster CI/CD success !"
}
}
aborted {
script {
echo "Milvus Cluster CI/CD aborted !"
}
}
failure {
script {
echo "Milvus Cluster CI/CD failure !"
}
}
}
}
// stage("Cluster") {
// agent {
// kubernetes {
// label 'dev-test'
// defaultContainer 'jnlp'
// yaml """
// apiVersion: v1
// kind: Pod
// metadata:
// labels:
// app: milvus
// componet: test
// spec:
// containers:
// - name: milvus-testframework
// image: registry.zilliz.com/milvus/milvus-test:v0.2
// command:
// - cat
// tty: true
// volumeMounts:
// - name: kubeconf
// mountPath: /root/.kube/
// readOnly: true
// volumes:
// - name: kubeconf
// secret:
// secretName: test-cluster-config
// """
// }
// }
// stages {
// stage("Deploy to Dev") {
// steps {
// gitlabCommitStatus(name: 'Deloy to Dev') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_deploy2dev.groovy"
// }
// }
// }
// }
// }
// stage("Dev Test") {
// steps {
// gitlabCommitStatus(name: 'Deloy Test') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_dev_test.groovy"
// load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_cluster_test_out.groovy"
// }
// }
// }
// }
// }
// stage ("Cleanup Dev") {
// steps {
// gitlabCommitStatus(name: 'Cleanup Dev') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
// }
// }
// }
// }
// }
// }
// post {
// always {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
// }
// }
// }
// success {
// script {
// echo "Milvus Cluster CI/CD success !"
// }
// }
// aborted {
// script {
// echo "Milvus Cluster CI/CD aborted !"
// }
// }
// failure {
// script {
// echo "Milvus Cluster CI/CD failure !"
// }
// }
// }
// }
}
}
}
......
......@@ -224,97 +224,97 @@ spec:
}
}
stage("Cluster") {
agent {
kubernetes {
label 'dev-test'
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: milvus
componet: test
spec:
containers:
- name: milvus-testframework
image: registry.zilliz.com/milvus/milvus-test:v0.2
command:
- cat
tty: true
volumeMounts:
- name: kubeconf
mountPath: /root/.kube/
readOnly: true
volumes:
- name: kubeconf
secret:
secretName: test-cluster-config
"""
}
}
stages {
stage("Deploy to Dev") {
steps {
gitlabCommitStatus(name: 'Deloy to Dev') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_deploy2dev.groovy"
}
}
}
}
}
stage("Dev Test") {
steps {
gitlabCommitStatus(name: 'Deloy Test') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_dev_test.groovy"
load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_cluster_test_out.groovy"
}
}
}
}
}
stage ("Cleanup Dev") {
steps {
gitlabCommitStatus(name: 'Cleanup Dev') {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
}
}
}
}
}
}
post {
always {
container('milvus-testframework') {
script {
load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
}
}
}
success {
script {
echo "Milvus Deploy to Dev Cluster CI/CD success !"
}
}
aborted {
script {
echo "Milvus Deploy to Dev Cluster CI/CD aborted !"
}
}
failure {
script {
echo "Milvus Deploy to Dev Cluster CI/CD failure !"
}
}
}
}
// stage("Cluster") {
// agent {
// kubernetes {
// label 'dev-test'
// defaultContainer 'jnlp'
// yaml """
// apiVersion: v1
// kind: Pod
// metadata:
// labels:
// app: milvus
// componet: test
// spec:
// containers:
// - name: milvus-testframework
// image: registry.zilliz.com/milvus/milvus-test:v0.2
// command:
// - cat
// tty: true
// volumeMounts:
// - name: kubeconf
// mountPath: /root/.kube/
// readOnly: true
// volumes:
// - name: kubeconf
// secret:
// secretName: test-cluster-config
// """
// }
// }
// stages {
// stage("Deploy to Dev") {
// steps {
// gitlabCommitStatus(name: 'Deloy to Dev') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_deploy2dev.groovy"
// }
// }
// }
// }
// }
// stage("Dev Test") {
// steps {
// gitlabCommitStatus(name: 'Deloy Test') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_dev_test.groovy"
// load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_cluster_test_out.groovy"
// }
// }
// }
// }
// }
// stage ("Cleanup Dev") {
// steps {
// gitlabCommitStatus(name: 'Cleanup Dev') {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
// }
// }
// }
// }
// }
// }
// post {
// always {
// container('milvus-testframework') {
// script {
// load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy"
// }
// }
// }
// success {
// script {
// echo "Milvus Deploy to Dev Cluster CI/CD success !"
// }
// }
// aborted {
// script {
// echo "Milvus Deploy to Dev Cluster CI/CD aborted !"
// }
// }
// failure {
// script {
// echo "Milvus Deploy to Dev Cluster CI/CD failure !"
// }
// }
// }
// }
}
}
......
......@@ -14,6 +14,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-432 - Search vectors params nprobe need to check max number
- MS-431 - Search vectors params nprobe: 0/-1, expected result: raise exception
- MS-331 - Crate Table : when table exists, error code is META_FAILED(code=15) rather than ILLEGAL TABLE NAME(code=9))
- MS-430 - Search no result if index created with FLAT
- MS-443 - Create index hang again
## Improvement
- MS-327 - Clean code for milvus
......@@ -66,6 +68,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-422 - Support DeleteTask in Multi-GpuResource case
- MS-428 - Add PushTaskByDataLocality in scheduler
- MS-440 - Add DumpTaskTables in sdk
- MS-442 - Merge Knowhere
- MS-445 - Rename CopyCompleted to LoadCompleted
## New Feature
- MS-343 - Implement ResourceMgr
......
......@@ -10,7 +10,7 @@ DB_PATH="/opt/milvus"
PROFILING="OFF"
BUILD_FAISS_WITH_MKL="OFF"
USE_JFROG_CACHE="OFF"
KNOWHERE_BUILD_DIR="`pwd`/thirdparty/knowhere/cmake_build"
KNOWHERE_BUILD_DIR="`pwd`/src/core/cmake_build"
while getopts "p:d:t:k:uhlrcgmj" arg
do
......
cmake_build
\ No newline at end of file
#-------------------------------------------------------------------------------
# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited.
# Proprietary and confidential.
#-------------------------------------------------------------------------------
cmake_minimum_required(VERSION 3.14)
message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
set(KNOWHERE_VERSION "0.1.0")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" KNOWHERE_BASE_VERSION "${KNOWHERE_VERSION}")
project(knowhere VERSION "${KNOWHERE_BASE_VERSION}" LANGUAGES CUDA C CXX)
set(CMAKE_CXX_STANDARD 14)
set(KNOWHERE_VERSION_MAJOR "${knowhere_VERSION_MAJOR}")
set(KNOWHERE_VERSION_MINOR "${knowhere_VERSION_MINOR}")
set(KNOWHERE_VERSION_PATCH "${knowhere_VERSION_PATCH}")
if(KNOWHERE_VERSION_MAJOR STREQUAL ""
OR KNOWHERE_VERSION_MINOR STREQUAL ""
OR KNOWHERE_VERSION_PATCH STREQUAL "")
message(FATAL_ERROR "Failed to determine Knowhere version from '${KNOWHERE_VERSION}'")
endif()
message(STATUS "Knowhere version: "
"${KNOWHERE_VERSION_MAJOR}.${KNOWHERE_VERSION_MINOR}.${KNOWHERE_VERSION_PATCH} "
"(full: '${KNOWHERE_VERSION}')")
# if no build build type is specified, default to release builds
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif(NOT CMAKE_BUILD_TYPE)
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -fopenmp")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fPIC -fopenmp")
endif()
MESSAGE(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
find_package(CUDA)
#set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES --expt-extended-lambda")
#set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0 -g")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
message("building milvus_engine on x86 architecture")
set(KNOWHERE_BUILD_ARCH x86_64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc)")
message("building milvus_engine on ppc architecture")
set(KNOWHERE_BUILD_ARCH ppc64le)
else()
message("unknown processor type")
message("CMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}")
set(KNOWHERE_BUILD_ARCH unknown)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(BUILD_TYPE "release")
else()
set(BUILD_TYPE "debug")
endif()
message(STATUS "Build type = ${BUILD_TYPE}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(ExternalProject)
include(DefineOptions)
include(BuildUtils)
include(ThirdPartyPackages)
add_subdirectory(src)
if(BUILD_UNIT_TEST STREQUAL "ON")
add_subdirectory(test)
endif()
config_summary()
#!/bin/bash
BUILD_TYPE="Debug"
BUILD_UNITTEST="OFF"
INSTALL_PREFIX=$(pwd)/cmake_build
MAKE_CLEAN="OFF"
PROFILING="OFF"
BUILD_FAISS_WITH_MKL="OFF"
USE_JFROG_CACHE="OFF"
while getopts "p:d:t:uhrcgmj" arg
do
case $arg in
t)
BUILD_TYPE=$OPTARG # BUILD_TYPE
;;
u)
echo "Build and run unittest cases" ;
BUILD_UNITTEST="ON";
;;
p)
INSTALL_PREFIX=$OPTARG
;;
r)
if [[ -d cmake_build ]]; then
rm ./cmake_build -r
MAKE_CLEAN="ON"
fi
;;
g)
PROFILING="ON"
;;
m)
BUILD_FAISS_WITH_MKL="ON"
;;
j)
USE_JFROG_CACHE="ON"
;;
h) # help
echo "
parameter:
-t: build type(default: Debug)
-u: building unit test options(default: OFF)
-p: install prefix(default: $(pwd)/knowhere)
-r: remove previous build directory(default: OFF)
-g: profiling(default: OFF)
-m: build faiss with MKL(default: OFF)
usage:
./build.sh -t \${BUILD_TYPE} [-u] [-h] [-g] [-r] [-c] [-m]
"
exit 0
;;
?)
echo "unknown argument"
exit 1
;;
esac
done
if [[ ! -d cmake_build ]]; then
mkdir cmake_build
MAKE_CLEAN="ON"
fi
cd cmake_build
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
if [[ ${MAKE_CLEAN} == "ON" ]]; then
CMAKE_CMD="cmake -DBUILD_UNIT_TEST=${BUILD_UNITTEST} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
-DMILVUS_ENABLE_PROFILING=${PROFILING} \
-DBUILD_FAISS_WITH_MKL=${BUILD_FAISS_WITH_MKL} \
-DUSE_JFROG_CACHE=${USE_JFROG_CACHE} \
../"
echo ${CMAKE_CMD}
${CMAKE_CMD}
make clean
fi
make -j 8 || exit 1
make install || exit 1
# Define a function that check last file modification
function(Check_Last_Modify cache_check_lists_file_path working_dir last_modified_commit_id)
if(EXISTS "${working_dir}")
if(EXISTS "${cache_check_lists_file_path}")
set(GIT_LOG_SKIP_NUM 0)
set(_MATCH_ALL ON CACHE BOOL "Match all")
set(_LOOP_STATUS ON CACHE BOOL "Whether out of loop")
file(STRINGS ${cache_check_lists_file_path} CACHE_IGNORE_TXT)
while(_LOOP_STATUS)
foreach(_IGNORE_ENTRY ${CACHE_IGNORE_TXT})
if(NOT _IGNORE_ENTRY MATCHES "^[^#]+")
continue()
endif()
set(_MATCH_ALL OFF)
execute_process(COMMAND git log --no-merges -1 --skip=${GIT_LOG_SKIP_NUM} --name-status --pretty= WORKING_DIRECTORY ${working_dir} OUTPUT_VARIABLE CHANGE_FILES)
if(NOT CHANGE_FILES STREQUAL "")
string(REPLACE "\n" ";" _CHANGE_FILES ${CHANGE_FILES})
foreach(_FILE_ENTRY ${_CHANGE_FILES})
string(REGEX MATCH "[^ \t]+$" _FILE_NAME ${_FILE_ENTRY})
execute_process(COMMAND sh -c "echo ${_FILE_NAME} | grep ${_IGNORE_ENTRY}" RESULT_VARIABLE return_code)
if (return_code EQUAL 0)
execute_process(COMMAND git log --no-merges -1 --skip=${GIT_LOG_SKIP_NUM} --pretty=%H WORKING_DIRECTORY ${working_dir} OUTPUT_VARIABLE LAST_MODIFIED_COMMIT_ID)
set (${last_modified_commit_id} ${LAST_MODIFIED_COMMIT_ID} PARENT_SCOPE)
set(_LOOP_STATUS OFF)
endif()
endforeach()
else()
set(_LOOP_STATUS OFF)
endif()
endforeach()
if(_MATCH_ALL)
execute_process(COMMAND git log --no-merges -1 --skip=${GIT_LOG_SKIP_NUM} --pretty=%H WORKING_DIRECTORY ${working_dir} OUTPUT_VARIABLE LAST_MODIFIED_COMMIT_ID)
set (${last_modified_commit_id} ${LAST_MODIFIED_COMMIT_ID} PARENT_SCOPE)
set(_LOOP_STATUS OFF)
endif()
math(EXPR GIT_LOG_SKIP_NUM "${GIT_LOG_SKIP_NUM} + 1")
endwhile(_LOOP_STATUS)
else()
execute_process(COMMAND git log --no-merges -1 --skip=${GIT_LOG_SKIP_NUM} --pretty=%H WORKING_DIRECTORY ${working_dir} OUTPUT_VARIABLE LAST_MODIFIED_COMMIT_ID)
set (${last_modified_commit_id} ${LAST_MODIFIED_COMMIT_ID} PARENT_SCOPE)
endif()
else()
message(FATAL_ERROR "The directory ${working_dir} does not exist")
endif()
endfunction()
# Define a function that extracts a cached package
function(ExternalProject_Use_Cache project_name package_file install_path)
message(STATUS "Will use cached package file: ${package_file}")
ExternalProject_Add(${project_name}
DOWNLOAD_COMMAND ${CMAKE_COMMAND} -E echo
"No download step needed (using cached package)"
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E echo
"No configure step needed (using cached package)"
BUILD_COMMAND ${CMAKE_COMMAND} -E echo
"No build step needed (using cached package)"
INSTALL_COMMAND ${CMAKE_COMMAND} -E echo
"No install step needed (using cached package)"
)
# We want our tar files to contain the Install/<package> prefix (not for any
# very special reason, only for consistency and so that we can identify them
# in the extraction logs) which means that we must extract them in the
# binary (top-level build) directory to have them installed in the right
# place for subsequent ExternalProjects to pick them up. It seems that the
# only way to control the working directory is with Add_Step!
ExternalProject_Add_Step(${project_name} extract
ALWAYS 1
COMMAND
${CMAKE_COMMAND} -E echo
"Extracting ${package_file} to ${install_path}"
COMMAND
${CMAKE_COMMAND} -E tar xzvf ${package_file} ${install_path}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
ExternalProject_Add_StepTargets(${project_name} extract)
endfunction()
# Define a function that to create a new cached package
function(ExternalProject_Create_Cache project_name package_file install_path cache_username cache_password cache_path)
if(EXISTS ${package_file})
message(STATUS "Removing existing package file: ${package_file}")
file(REMOVE ${package_file})
endif()
message(STATUS "Will create cached package file: ${package_file}")
ExternalProject_Add_Step(${project_name} package
DEPENDEES install
BYPRODUCTS ${package_file}
COMMAND ${CMAKE_COMMAND} -E echo "Updating cached package file: ${package_file}"
COMMAND ${CMAKE_COMMAND} -E tar czvf ${package_file} ${install_path}
COMMAND ${CMAKE_COMMAND} -E echo "Uploading package file ${package_file} to ${cache_path}"
COMMAND curl -u${cache_username}:${cache_password} -T ${package_file} ${cache_path}
)
ExternalProject_Add_StepTargets(${project_name} package)
endfunction()
function(ADD_THIRDPARTY_LIB LIB_NAME)
set(options)
set(one_value_args SHARED_LIB STATIC_LIB)
set(multi_value_args DEPS INCLUDE_DIRECTORIES)
cmake_parse_arguments(ARG
"${options}"
"${one_value_args}"
"${multi_value_args}"
${ARGN})
if(ARG_UNPARSED_ARGUMENTS)
message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
endif()
if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
if(NOT ARG_STATIC_LIB)
message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
endif()
set(AUG_LIB_NAME "${LIB_NAME}_static")
add_library(${AUG_LIB_NAME} STATIC IMPORTED)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
if(ARG_DEPS)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
endif()
message(STATUS "Added static library dependency ${AUG_LIB_NAME}: ${ARG_STATIC_LIB}")
if(ARG_INCLUDE_DIRECTORIES)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${ARG_INCLUDE_DIRECTORIES}")
endif()
set(AUG_LIB_NAME "${LIB_NAME}_shared")
add_library(${AUG_LIB_NAME} SHARED IMPORTED)
if(WIN32)
# Mark the ".lib" location as part of a Windows DLL
set_target_properties(${AUG_LIB_NAME}
PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
else()
set_target_properties(${AUG_LIB_NAME}
PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
endif()
if(ARG_DEPS)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
endif()
message(STATUS "Added shared library dependency ${AUG_LIB_NAME}: ${ARG_SHARED_LIB}")
if(ARG_INCLUDE_DIRECTORIES)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${ARG_INCLUDE_DIRECTORIES}")
endif()
elseif(ARG_STATIC_LIB)
set(AUG_LIB_NAME "${LIB_NAME}_static")
add_library(${AUG_LIB_NAME} STATIC IMPORTED)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
if(ARG_DEPS)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
endif()
message(STATUS "Added static library dependency ${AUG_LIB_NAME}: ${ARG_STATIC_LIB}")
if(ARG_INCLUDE_DIRECTORIES)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${ARG_INCLUDE_DIRECTORIES}")
endif()
elseif(ARG_SHARED_LIB)
set(AUG_LIB_NAME "${LIB_NAME}_shared")
add_library(${AUG_LIB_NAME} SHARED IMPORTED)
if(WIN32)
# Mark the ".lib" location as part of a Windows DLL
set_target_properties(${AUG_LIB_NAME}
PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
else()
set_target_properties(${AUG_LIB_NAME}
PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
endif()
message(STATUS "Added shared library dependency ${AUG_LIB_NAME}: ${ARG_SHARED_LIB}")
if(ARG_DEPS)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
endif()
if(ARG_INCLUDE_DIRECTORIES)
set_target_properties(${AUG_LIB_NAME}
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${ARG_INCLUDE_DIRECTORIES}")
endif()
else()
message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
endif()
endfunction()
macro(set_option_category name)
set(KNOWHERE_OPTION_CATEGORY ${name})
list(APPEND "KNOWHERE_OPTION_CATEGORIES" ${name})
endmacro()
macro(define_option name description default)
option(${name} ${description} ${default})
list(APPEND "KNOWHERE_${KNOWHERE_OPTION_CATEGORY}_OPTION_NAMES" ${name})
set("${name}_OPTION_DESCRIPTION" ${description})
set("${name}_OPTION_DEFAULT" ${default})
set("${name}_OPTION_TYPE" "bool")
endmacro()
function(list_join lst glue out)
if("${${lst}}" STREQUAL "")
set(${out} "" PARENT_SCOPE)
return()
endif()
list(GET ${lst} 0 joined)
list(REMOVE_AT ${lst} 0)
foreach(item ${${lst}})
set(joined "${joined}${glue}${item}")
endforeach()
set(${out} ${joined} PARENT_SCOPE)
endfunction()
macro(define_option_string name description default)
set(${name} ${default} CACHE STRING ${description})
list(APPEND "KNOWHERE_${KNOWHERE_OPTION_CATEGORY}_OPTION_NAMES" ${name})
set("${name}_OPTION_DESCRIPTION" ${description})
set("${name}_OPTION_DEFAULT" "\"${default}\"")
set("${name}_OPTION_TYPE" "string")
set("${name}_OPTION_ENUM" ${ARGN})
list_join("${name}_OPTION_ENUM" "|" "${name}_OPTION_ENUM")
if(NOT ("${${name}_OPTION_ENUM}" STREQUAL ""))
set_property(CACHE ${name} PROPERTY STRINGS ${ARGN})
endif()
endmacro()
#----------------------------------------------------------------------
set_option_category("Thirdparty")
set(KNOWHERE_DEPENDENCY_SOURCE_DEFAULT "AUTO")
define_option_string(KNOWHERE_DEPENDENCY_SOURCE
"Method to use for acquiring KNOWHERE's build dependencies"
"${KNOWHERE_DEPENDENCY_SOURCE_DEFAULT}"
"AUTO"
"BUNDLED"
"SYSTEM")
define_option(KNOWHERE_VERBOSE_THIRDPARTY_BUILD
"Show output from ExternalProjects rather than just logging to files" ON)
define_option(KNOWHERE_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF)
define_option(KNOWHERE_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
Note that this requires linking Boost statically" OFF)
define_option(KNOWHERE_BOOST_HEADER_ONLY "Use only BOOST headers" OFF)
define_option(KNOWHERE_WITH_ARROW "Build with ARROW" ON)
define_option(KNOWHERE_WITH_LAPACK "Build with LAPACK library" ON)
define_option(KNOWHERE_WITH_FAISS "Build with FAISS library" ON)
define_option(KNOWHERE_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON)
define_option(KNOWHERE_WITH_OPENBLAS "Build with OpenBLAS library" ON)
#----------------------------------------------------------------------
if(MSVC)
set_option_category("MSVC")
define_option(MSVC_LINK_VERBOSE
"Pass verbose linking options when linking libraries and executables"
OFF)
define_option(KNOWHERE_USE_STATIC_CRT "Build KNOWHERE with statically linked CRT" OFF)
endif()
#----------------------------------------------------------------------
set_option_category("Test and benchmark")
if (BUILD_UNIT_TEST)
define_option(KNOWHERE_BUILD_TESTS "Build the KNOWHERE googletest unit tests" ON)
else()
define_option(KNOWHERE_BUILD_TESTS "Build the KNOWHERE googletest unit tests" OFF)
endif(BUILD_UNIT_TEST)
#----------------------------------------------------------------------
macro(config_summary)
message(STATUS "---------------------------------------------------------------------")
message(STATUS "KNOWHERE version: ${KNOWHERE_VERSION}")
message(STATUS)
message(STATUS "Build configuration summary:")
message(STATUS " Generator: ${CMAKE_GENERATOR}")
message(STATUS " Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS " Source directory: ${CMAKE_CURRENT_SOURCE_DIR}")
if(${CMAKE_EXPORT_COMPILE_COMMANDS})
message(
STATUS " Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json")
endif()
foreach(category ${KNOWHERE_OPTION_CATEGORIES})
message(STATUS)
message(STATUS "${category} options:")
set(option_names ${KNOWHERE_${category}_OPTION_NAMES})
set(max_value_length 0)
foreach(name ${option_names})
string(LENGTH "\"${${name}}\"" value_length)
if(${max_value_length} LESS ${value_length})
set(max_value_length ${value_length})
endif()
endforeach()
foreach(name ${option_names})
if("${${name}_OPTION_TYPE}" STREQUAL "string")
set(value "\"${${name}}\"")
else()
set(value "${${name}}")
endif()
set(default ${${name}_OPTION_DEFAULT})
set(description ${${name}_OPTION_DESCRIPTION})
string(LENGTH ${description} description_length)
if(${description_length} LESS 70)
string(
SUBSTRING
" "
${description_length} -1 description_padding)
else()
set(description_padding "
")
endif()
set(comment "[${name}]")
if("${value}" STREQUAL "${default}")
set(comment "[default] ${comment}")
endif()
if(NOT ("${${name}_OPTION_ENUM}" STREQUAL ""))
set(comment "${comment} [${${name}_OPTION_ENUM}]")
endif()
string(
SUBSTRING "${value} "
0 ${max_value_length} value)
message(STATUS " ${description} ${description_padding} ${value} ${comment}")
endforeach()
endforeach()
endmacro()
此差异已折叠。
#pragma once
#include <memory>
#include <knowhere/common/array.h>
namespace zilliz {
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr &origin);
SchemaPtr
CopySchema(const SchemaPtr &origin);
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
namespace zilliz {
namespace knowhere {
#define GETTENSOR(dataset) \
auto tensor = dataset->tensor()[0]; \
auto p_data = tensor->raw_data(); \
auto dim = tensor->shape()[1]; \
auto rows = tensor->shape()[0]; \
}
}
\ No newline at end of file
#pragma once
#include <memory>
#include <knowhere/common/dataset.h>
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
namespace zilliz {
namespace knowhere {
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr &dataset);
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr &dataset);
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr &dataset, const Config &config);
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results);
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <memory>
#include <knowhere/common/dataset.h>
namespace zilliz {
namespace knowhere {
extern ArrayPtr
ConstructInt64ArraySmart(uint8_t *data, int64_t size);
extern ArrayPtr
ConstructFloatArraySmart(uint8_t *data, int64_t size);
extern TensorPtr
ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector<int64_t> shape);
extern ArrayPtr
ConstructInt64Array(uint8_t *data, int64_t size);
extern ArrayPtr
ConstructFloatArray(uint8_t *data, int64_t size);
extern TensorPtr
ConstructFloatTensor(uint8_t *data, int64_t size, std::vector<int64_t> shape);
extern FieldPtr
ConstructInt64Field(const std::string &name);
extern FieldPtr
ConstructFloatField(const std::string &name);
}
}
#pragma once
#include "arrow/array.h"
#include "knowhere/common/schema.h"
namespace zilliz {
namespace knowhere {
using ArrayData = arrow::ArrayData;
using ArrayDataPtr = std::shared_ptr<ArrayData>;
using Array = arrow::Array;
using ArrayPtr = std::shared_ptr<Array>;
using BooleanArray = arrow::BooleanArray;
using BooleanArrayPtr = std::shared_ptr<arrow::BooleanArray>;
template<typename DType>
using NumericArray = arrow::NumericArray<DType>;
template<typename DType>
using NumericArrayPtr = std::shared_ptr<arrow::NumericArray<DType>>;
using BinaryArray = arrow::BinaryArray;
using BinaryArrayPtr = std::shared_ptr<arrow::BinaryArray>;
using FixedSizeBinaryArray = arrow::FixedSizeBinaryArray;
using FixedSizeBinaryArrayPtr = std::shared_ptr<arrow::FixedSizeBinaryArray>;
using Decimal128Array = arrow::Decimal128Array;
using Decimal128ArrayPtr = std::shared_ptr<arrow::Decimal128Array>;
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <map>
#include <string>
#include <vector>
#include <memory>
#include "knowhere/common/id.h"
namespace zilliz {
namespace knowhere {
struct Binary {
ID id;
std::shared_ptr<uint8_t> data;
int64_t size = 0;
};
using BinaryPtr = std::shared_ptr<Binary>;
class BinarySet {
public:
BinaryPtr
GetByName(const std::string &name) const {
return binary_map_.at(name);
}
void
Append(const std::string &name, BinaryPtr binary) {
binary_map_[name] = std::move(binary);
}
void
Append(const std::string &name, std::shared_ptr<uint8_t> data, int64_t size) {
auto binary = std::make_shared<Binary>();
binary->data = data;
binary->size = size;
binary_map_[name] = std::move(binary);
}
//void
//Append(const std::string &name, void *data, int64_t size, ID id) {
// Binary binary;
// binary.data = data;
// binary.size = size;
// binary.id = id;
// binary_map_[name] = binary;
//}
void clear() {
binary_map_.clear();
}
public:
std::map<std::string, BinaryPtr> binary_map_;
};
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <memory>
#include "arrow/buffer.h"
namespace zilliz {
namespace knowhere {
using Buffer = arrow::Buffer;
using BufferPtr = std::shared_ptr<Buffer>;
using MutableBuffer = arrow::MutableBuffer;
using MutableBufferPtr = std::shared_ptr<MutableBuffer>;
namespace internal {
struct BufferDeleter {
void operator()(Buffer *buffer) {
free((void *) buffer->data());
}
};
}
inline BufferPtr
MakeBufferSmart(uint8_t *data, const int64_t size) {
return BufferPtr(new Buffer(data, size), internal::BufferDeleter());
}
inline MutableBufferPtr
MakeMutableBufferSmart(uint8_t *data, const int64_t size) {
return MutableBufferPtr(new MutableBuffer(data, size), internal::BufferDeleter());
}
inline BufferPtr
MakeBuffer(uint8_t *data, const int64_t size) {
return std::make_shared<Buffer>(data, size);
}
inline MutableBufferPtr
MakeMutableBuffer(uint8_t *data, const int64_t size) {
return std::make_shared<MutableBuffer>(data, size);
}
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <jsoncons/json.hpp>
namespace zilliz {
namespace knowhere {
using Config = jsoncons::json;
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <vector>
#include <memory>
#include "knowhere/common/array.h"
#include "knowhere/common/buffer.h"
#include "knowhere/common/tensor.h"
#include "knowhere/common/schema.h"
#include "knowhere/common/config.h"
#include "knowhere/adapter/arrow.h"
namespace zilliz {
namespace knowhere {
class Dataset;
using DatasetPtr = std::shared_ptr<Dataset>;
class Dataset {
public:
Dataset() = default;
Dataset(std::vector<ArrayPtr> &&array, SchemaPtr array_schema,
std::vector<TensorPtr> &&tensor, SchemaPtr tensor_schema)
: array_(std::move(array)),
array_schema_(std::move(array_schema)),
tensor_(std::move(tensor)),
tensor_schema_(std::move(tensor_schema)) {}
Dataset(std::vector<ArrayPtr> array, SchemaPtr array_schema)
: array_(std::move(array)), array_schema_(std::move(array_schema)) {}
Dataset(std::vector<TensorPtr> tensor, SchemaPtr tensor_schema)
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {}
Dataset(const Dataset &) = delete;
Dataset &operator=(const Dataset &) = delete;
DatasetPtr
Clone() {
auto dataset = std::make_shared<Dataset>();
std::vector<ArrayPtr> clone_array;
for (auto &array : array_) {
clone_array.emplace_back(CopyArray(array));
}
dataset->set_array(clone_array);
std::vector<TensorPtr> clone_tensor;
for (auto &tensor : tensor_) {
auto buffer = tensor->data();
std::shared_ptr<Buffer> copy_buffer;
// TODO: checkout copy success;
buffer->Copy(0, buffer->size(), &copy_buffer);
auto copy = std::make_shared<Tensor>(tensor->type(), copy_buffer, tensor->shape());
clone_tensor.emplace_back(copy);
}
dataset->set_tensor(clone_tensor);
if (array_schema_)
dataset->set_array_schema(CopySchema(array_schema_));
if (tensor_schema_)
dataset->set_tensor_schema(CopySchema(tensor_schema_));
return dataset;
}
public:
const std::vector<ArrayPtr> &
array() const { return array_; }
void
set_array(std::vector<ArrayPtr> array) {
array_ = std::move(array);
}
const std::vector<TensorPtr> &
tensor() const { return tensor_; }
void
set_tensor(std::vector<TensorPtr> tensor) {
tensor_ = std::move(tensor);
}
SchemaConstPtr
array_schema() const { return array_schema_; }
void
set_array_schema(SchemaPtr array_schema) {
array_schema_ = std::move(array_schema);
}
SchemaConstPtr
tensor_schema() const { return tensor_schema_; }
void
set_tensor_schema(SchemaPtr tensor_schema) {
tensor_schema_ = std::move(tensor_schema);
}
//const Config &
//meta() const { return meta_; }
//void
//set_meta(Config meta) {
// meta_ = std::move(meta);
//}
private:
SchemaPtr array_schema_;
SchemaPtr tensor_schema_;
std::vector<ArrayPtr> array_;
std::vector<TensorPtr> tensor_;
//Config meta_;
};
using DatasetPtr = std::shared_ptr<Dataset>;
} // namespace knowhere
} // namespace zilliz
#pragma once
namespace zilliz {
namespace sched {
namespace master {
} // namespace master
} // namespace sched
} // namespace zilliz
#pragma once
#include <cstdint>
#include "zlibrary/error/error.h"
namespace zilliz {
namespace knowhere {
using Error = zilliz::lib::ErrorCode;
constexpr Error STORE_SUCCESS = zilliz::lib::SUCCESS_CODE;
constexpr Error ERROR_CODE_BASE = 0x36000;
constexpr Error ERROR_CODE_END = 0x37000;
constexpr Error
ToGlobalErrorCode(const Error error_code) {
return zilliz::lib::ToGlobalErrorCode(error_code, ERROR_CODE_BASE);
}
class Exception : public zilliz::lib::Exception {
public:
Exception(const Error error_code,
const std::string &message = nullptr)
: zilliz::lib::Exception(error_code, "KNOWHERE", message) {}
};
constexpr Error UNEXPECTED = ToGlobalErrorCode(0x001);
constexpr Error UNSUPPORTED = ToGlobalErrorCode(0x002);
constexpr Error NULL_POINTER = ToGlobalErrorCode(0x003);
constexpr Error OVERFLOW = ToGlobalErrorCode(0x004);
constexpr Error INVALID_ARGUMENT = ToGlobalErrorCode(0x005);
constexpr Error UNSUPPORTED_TYPE = ToGlobalErrorCode(0x006);
} // namespace store
} // namespace zilliz
using Error = zilliz::store::Error;
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <exception>
#include <string>
namespace zilliz {
namespace knowhere {
class KnowhereException : public std::exception {
public:
explicit KnowhereException(const std::string &msg);
KnowhereException(const std::string &msg, const char *funName,
const char *file, int line);
const char *what() const noexcept override;
std::string msg;
};
#define KNOWHERE_THROW_MSG(MSG)\
do {\
throw KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__);\
} while (false)
#define KNOHERE_THROW_FORMAT(FMT, ...)\
do { \
std::string __s;\
int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__);\
__s.resize(__size + 1);\
snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__);\
throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__);\
} while (false)
}
}
\ No newline at end of file
#pragma once
//#include "zcommon/id/id.h"
//using ID = zilliz::common::ID;
#include <stdint.h>
#include <string>
namespace zilliz {
namespace knowhere {
class ID {
public:
constexpr static int64_t kIDSize = 20;
public:
const int32_t *
data() const { return content_; }
int32_t *
mutable_data() { return content_; }
bool
IsValid() const;
std::string
ToString() const;
bool
operator==(const ID &that) const;
bool
operator<(const ID &that) const;
protected:
int32_t content_[5] = {};
};
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <memory>
#include "arrow/type.h"
namespace zilliz {
namespace knowhere {
using DataType = arrow::DataType;
using Field = arrow::Field;
using FieldPtr = std::shared_ptr<arrow::Field>;
using Schema = arrow::Schema;
using SchemaPtr = std::shared_ptr<Schema>;
using SchemaConstPtr = std::shared_ptr<const Schema>;
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <memory>
#include "arrow/tensor.h"
namespace zilliz {
namespace knowhere {
using Tensor = arrow::Tensor;
using TensorPtr = std::shared_ptr<Tensor>;
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <string>
#include <chrono>
namespace zilliz {
namespace knowhere {
class TimeRecorder {
using stdclock = std::chrono::high_resolution_clock;
public:
TimeRecorder(const std::string &header,
int64_t log_level = 0);
~TimeRecorder();//trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5
double RecordSection(const std::string &msg);
double ElapseFromBegin(const std::string &msg);
static std::string GetTimeSpanStr(double span);
private:
void PrintTimeRecord(const std::string &msg, double span);
private:
std::string header_;
stdclock::time_point start_;
stdclock::time_point last_;
int64_t log_level_;
};
}
}
#pragma once
#include <memory>
#include "knowhere/common/binary_set.h"
#include "knowhere/common/dataset.h"
#include "knowhere/index/index_type.h"
#include "knowhere/index/index_model.h"
#include "knowhere/index/preprocessor/preprocessor.h"
namespace zilliz {
namespace knowhere {
class Index {
public:
virtual BinarySet
Serialize() = 0;
virtual void
Load(const BinarySet &index_binary) = 0;
// @throw
virtual DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) = 0;
public:
IndexType
idx_type() const { return idx_type_; }
void
set_idx_type(IndexType idx_type) { idx_type_ = idx_type; }
virtual void
set_preprocessor(PreprocessorPtr preprocessor) {}
virtual void
set_index_model(IndexModelPtr model) {}
private:
IndexType idx_type_;
};
using IndexPtr = std::shared_ptr<Index>;
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <memory>
#include "knowhere/common/binary_set.h"
namespace zilliz {
namespace knowhere {
class IndexModel {
public:
virtual BinarySet
Serialize() = 0;
virtual void
Load(const BinarySet &binary) = 0;
};
using IndexModelPtr = std::shared_ptr<IndexModel>;
} // namespace knowhere
} // namespace zilliz
#pragma once
namespace zilliz {
namespace knowhere {
enum class IndexType {
kUnknown = 0,
kVecIdxBegin = 100,
kVecIVFFlat = kVecIdxBegin,
kVecIdxEnd,
};
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <memory>
#include "preprocessor.h"
namespace zilliz {
namespace knowhere {
class NormalizePreprocessor : public Preprocessor {
public:
DatasetPtr
Preprocess(const DatasetPtr &input) override;
private:
void
Normalize(float *arr, int64_t dimension);
};
using NormalizePreprocessorPtr = std::shared_ptr<NormalizePreprocessor>;
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <memory>
#include "knowhere/common/dataset.h"
namespace zilliz {
namespace knowhere {
class Preprocessor {
public:
virtual DatasetPtr
Preprocess(const DatasetPtr &input) = 0;
};
using PreprocessorPtr = std::shared_ptr<Preprocessor>;
} // namespace knowhere
} // namespace zilliz
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "vector_index.h"
namespace zilliz {
namespace knowhere {
// TODO(linxj): rename CopyToGpu
extern VectorIndexPtr
CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config);
extern VectorIndexPtr
CopyGpuToCpu(const VectorIndexPtr &index, const Config &config);
}
}
\ No newline at end of file
#pragma once
#include <cstdint>
#include <memory>
#include "knowhere/index/vector_index/vector_index.h"
#include "knowhere/index/index_model.h"
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
namespace zilliz {
namespace knowhere {
class CPUKDTRNG : public VectorIndex {
public:
CPUKDTRNG() {
index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT,
SPTAG::VectorValueType::Float);
index_ptr_->SetParameter("DistCalcMethod", "L2");
}
public:
BinarySet
Serialize() override;
VectorIndexPtr Clone() override;
void
Load(const BinarySet &index_array) override;
public:
PreprocessorPtr
BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override;
int64_t Count() override;
int64_t Dimension() override;
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
void
Add(const DatasetPtr &dataset, const Config &config) override;
DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) override;
void Seal() override;
private:
void
SetParameters(const Config &config);
private:
PreprocessorPtr preprocessor_;
std::shared_ptr<SPTAG::VectorIndex> index_ptr_;
};
using CPUKDTRNGPtr = std::shared_ptr<CPUKDTRNG>;
class CPUKDTRNGIndexModel : public IndexModel {
public:
BinarySet
Serialize() override;
void
Load(const BinarySet &binary) override;
private:
std::shared_ptr<SPTAG::VectorIndex> index_;
};
using CPUKDTRNGIndexModelPtr = std::shared_ptr<CPUKDTRNGIndexModel>;
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <string>
namespace zilliz {
namespace knowhere {
#define META_ROWS ("rows")
#define META_DIM ("dimension")
#define META_K ("k")
} // namespace knowhere
} // namespace zilliz
#pragma once
#include <faiss/gpu/StandardGpuResources.h>
#include "ivf.h"
namespace zilliz {
namespace knowhere {
class FaissGpuResourceMgr {
public:
struct DeviceParams {
int64_t temp_mem_size = 0;
int64_t pinned_mem_size = 0;
int64_t resource_num = 2;
};
public:
using ResPtr = std::shared_ptr<faiss::gpu::StandardGpuResources>;
using ResWPtr = std::weak_ptr<faiss::gpu::StandardGpuResources>;
static FaissGpuResourceMgr &
GetInstance();
void
AllocateTempMem(ResPtr &res, const int64_t& device_id, const int64_t& size);
void
InitDevice(int64_t device_id,
int64_t pin_mem_size = 0,
int64_t temp_mem_size = 0,
int64_t res_num = 2);
void InitResource();
ResPtr GetRes(const int64_t &device_id, const int64_t& alloc_size = 0);
void MoveToInuse(const int64_t &device_id, const ResPtr& res);
void MoveToIdle(const int64_t &device_id, const ResPtr& res);
protected:
bool is_init = false;
std::mutex mutex_;
std::map<int64_t, DeviceParams> devices_params_;
std::map<int64_t, std::vector<ResPtr>> in_use_;
std::map<int64_t, std::vector<ResPtr>> idle_;
};
class ResScope {
public:
ResScope(const int64_t device_id,std::shared_ptr<faiss::gpu::StandardGpuResources> &res) : resource(res), device_id(device_id) {
FaissGpuResourceMgr::GetInstance().MoveToInuse(device_id, resource);
}
~ResScope() {
resource->noTempMemory();
FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource);
}
private:
std::shared_ptr<faiss::gpu::StandardGpuResources> resource;
int64_t device_id;
};
class GPUIndex {
public:
explicit GPUIndex(const int &device_id) : gpu_id_(device_id) {};
virtual VectorIndexPtr CopyGpuToCpu(const Config &config) = 0;
virtual VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) = 0;
void SetGpuDevice(const int &gpu_id);
const int64_t &GetGpuDevice();
protected:
int64_t gpu_id_;
};
class GPUIVF : public IVF, public GPUIndex {
public:
explicit GPUIVF(const int &device_id) : IVF(), GPUIndex(device_id) {}
explicit GPUIVF(std::shared_ptr<faiss::Index> index, const int64_t &device_id)
: IVF(std::move(index)), GPUIndex(device_id) {};
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
void set_index_model(IndexModelPtr model) override;
//DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override;
VectorIndexPtr Clone() final;
// TODO(linxj): Deprecated
virtual IVFIndexPtr Copy_index_gpu_to_cpu();
protected:
void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) override;
BinarySet SerializeImpl() override;
void LoadImpl(const BinarySet &index_binary) override;
};
class GPUIVFSQ : public GPUIVF {
public:
explicit GPUIVFSQ(const int &device_id) : GPUIVF(device_id) {}
explicit GPUIVFSQ(std::shared_ptr<faiss::Index> index, const int64_t& device_id) : GPUIVF(std::move(index),device_id) {};
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
public:
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
};
class GPUIVFPQ : public GPUIVF {
public:
explicit GPUIVFPQ(const int &device_id) : GPUIVF(device_id) {}
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
public:
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
protected:
// TODO(linxj): remove GenParams.
std::shared_ptr<faiss::IVFSearchParameters> GenParams(const Config &config) override;
};
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "ivf.h"
#include "gpu_ivf.h"
namespace zilliz {
namespace knowhere {
class IDMAP : public VectorIndex, public BasicIndex {
public:
IDMAP() : BasicIndex(nullptr) {};
explicit IDMAP(std::shared_ptr<faiss::Index> index) : BasicIndex(std::move(index)) {};
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
void Train(const Config &config);
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
int64_t Count() override;
VectorIndexPtr Clone() override;
int64_t Dimension() override;
void Add(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config);
void Seal() override;
virtual float *GetRawVectors();
virtual int64_t *GetRawIds();
protected:
std::mutex mutex_;
};
using IDMAPPtr = std::shared_ptr<IDMAP>;
class GPUIDMAP : public IDMAP, public GPUIndex {
public:
explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t &device_id)
: IDMAP(std::move(index)), GPUIndex(device_id) {}
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
float *GetRawVectors() override;
int64_t *GetRawIds() override;
VectorIndexPtr Clone() override;
VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override;
protected:
BinarySet SerializeImpl() override;
void LoadImpl(const BinarySet &index_binary) override;
};
using GPUIDMAPPtr = std::shared_ptr<GPUIDMAP>;
}
}
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <memory>
#include <mutex>
#include <faiss/IndexIVF.h>
#include <faiss/AuxIndexStructures.h>
#include <faiss/Index.h>
#include "knowhere/index/vector_index/vector_index.h"
namespace zilliz {
namespace knowhere {
class BasicIndex {
protected:
explicit BasicIndex(std::shared_ptr<faiss::Index> index);
virtual BinarySet SerializeImpl();
virtual void LoadImpl(const BinarySet &index_binary);
virtual void
SealImpl();
protected:
std::shared_ptr<faiss::Index> index_ = nullptr;
};
using Graph = std::vector<std::vector<int64_t>>;
class IVF : public VectorIndex, protected BasicIndex {
public:
IVF() : BasicIndex(nullptr) {};
explicit IVF(std::shared_ptr<faiss::Index> index) : BasicIndex(std::move(index)) {}
VectorIndexPtr Clone() override;;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
void set_index_model(IndexModelPtr model) override;
void Add(const DatasetPtr &dataset, const Config &config) override;
void AddWithoutIds(const DatasetPtr &dataset, const Config &config);
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
void GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, const Config &config);
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
int64_t Count() override;
int64_t Dimension() override;
void
Seal() override;
virtual VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config);
protected:
virtual std::shared_ptr<faiss::IVFSearchParameters> GenParams(const Config &config);
virtual VectorIndexPtr Clone_impl(const std::shared_ptr<faiss::Index> &index);
virtual void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg);
protected:
std::mutex mutex_;
};
using IVFIndexPtr = std::shared_ptr<IVF>;
class IVFSQ : public IVF {
public:
explicit IVFSQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {}
IVFSQ() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config) override;
protected:
VectorIndexPtr Clone_impl(const std::shared_ptr<faiss::Index> &index) override;
};
class IVFPQ : public IVF {
public:
explicit IVFPQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {}
IVFPQ() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
protected:
std::shared_ptr<faiss::IVFSearchParameters> GenParams(const Config &config) override;
VectorIndexPtr Clone_impl(const std::shared_ptr<faiss::Index> &index) override;
};
//class OPQIVFPQ : public IVFPQ {
// public:
// PreprocessorPtr BuildPreprocessor(const Dataset &dataset, const Config &config) override;
//};
class GPUIVF;
struct MemoryIOWriter : public faiss::IOWriter {
uint8_t *data_ = nullptr;
size_t total = 0;
size_t rp = 0;
size_t operator()(const void *ptr, size_t size, size_t nitems) override;
};
struct MemoryIOReader : public faiss::IOReader {
uint8_t *data_;
size_t rp = 0;
size_t total = 0;
size_t operator()(void *ptr, size_t size, size_t nitems) override;
};
class IVFIndexModel : public IndexModel, public BasicIndex {
friend IVF;
friend GPUIVF;
public:
explicit IVFIndexModel(std::shared_ptr<faiss::Index> index);
IVFIndexModel() : BasicIndex(nullptr) {};
BinarySet Serialize() override;
protected:
void SealImpl() override;
public:
void Load(const BinarySet &binary) override;
protected:
std::mutex mutex_;
};
using IVFIndexModelPtr = std::shared_ptr<IVFIndexModel>;
}
}
\ No newline at end of file
#pragma once
#include <string>
#include <vector>
namespace zilliz {
namespace knowhere {
using KDTParameter = std::pair<std::string, std::string>;
class KDTParameterManagement {
public:
const std::vector<KDTParameter> &
GetKDTParameters();
public:
static KDTParameterManagement &
GetInstance() {
static KDTParameterManagement instance;
return instance;
}
KDTParameterManagement(const KDTParameterManagement &) = delete;
KDTParameterManagement &operator=(const KDTParameterManagement &) = delete;
private:
KDTParameterManagement();
private:
std::vector<KDTParameter> kdt_parameters_;
};
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <mutex>
namespace zilliz {
namespace knowhere {
namespace algo {
using node_t = int64_t;
// TODO: search use simple neighbor
struct Neighbor {
node_t id; // offset of node in origin data
float distance;
bool has_explored;
Neighbor() = default;
explicit Neighbor(node_t id, float distance, bool f) : id{id}, distance{distance}, has_explored(f) {}
explicit Neighbor(node_t id, float distance) : id{id}, distance{distance}, has_explored(false) {}
inline bool operator<(const Neighbor &other) const {
return distance < other.distance;
}
};
//struct SimpleNeighbor {
// node_t id; // offset of node in origin data
// float distance;
//
// SimpleNeighbor() = default;
// explicit SimpleNeighbor(node_t id, float distance) : id{id}, distance{distance}{}
//
// inline bool operator<(const Neighbor &other) const {
// return distance < other.distance;
// }
//};
typedef std::lock_guard<std::mutex> LockGuard;
}
}
}
\ No newline at end of file
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <cstddef>
#include <vector>
#include <mutex>
#include <boost/dynamic_bitset.hpp>
#include "neighbor.h"
namespace zilliz {
namespace knowhere {
namespace algo {
using node_t = int64_t;
enum class MetricType {
METRIC_INNER_PRODUCT = 0,
METRIC_L2 = 1,
};
struct BuildParams {
size_t search_length;
size_t out_degree;
size_t candidate_pool_size;
};
struct SearchParams {
size_t search_length;
};
using Graph = std::vector<std::vector<node_t>>;
class NsgIndex {
public:
size_t dimension;
size_t ntotal; // totabl nb of indexed vectors
MetricType metric_type; // L2 | IP
float *ori_data_;
long *ids_; // TODO: support different type
Graph nsg; // final graph
Graph knng; // reset after build
node_t navigation_point; // offset of node in origin data
bool is_trained = false;
/*
* build and search parameter
*/
size_t search_length;
size_t candidate_pool_size; // search deepth in fullset
size_t out_degree;
public:
explicit NsgIndex(const size_t &dimension,
const size_t &n,
MetricType metric = MetricType::METRIC_L2);
NsgIndex() = default;
virtual ~NsgIndex();
void SetKnnGraph(Graph &knng);
virtual void Build_with_ids(size_t nb,
const float *data,
const long *ids,
const BuildParams &parameters);
void Search(const float *query,
const unsigned &nq,
const unsigned &dim,
const unsigned &k,
float *dist,
long *ids,
SearchParams &params);
// Not support yet.
//virtual void Add() = 0;
//virtual void Add_with_ids() = 0;
//virtual void Delete() = 0;
//virtual void Delete_with_ids() = 0;
//virtual void Rebuild(size_t nb,
// const float *data,
// const long *ids,
// const Parameters &parameters) = 0;
//virtual void Build(size_t nb,
// const float *data,
// const BuildParam &parameters);
protected:
virtual void InitNavigationPoint();
// link specify
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset,
boost::dynamic_bitset<> &has_calculated_dist);
// FindUnconnectedNode
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset);
// search and navigation-point
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
Graph &graph,
SearchParams *param = nullptr);
void Link();
void SyncPrune(size_t q,
std::vector<Neighbor> &pool,
boost::dynamic_bitset<> &has_calculated,
float *cut_graph_dist
);
void SelectEdge(unsigned &cursor,
std::vector<Neighbor> &sort_pool,
std::vector<Neighbor> &result,
bool limit = false);
void InterInsert(unsigned n, std::vector<std::mutex> &mutex_vec, float *dist);
void CheckConnectivity();
void DFS(size_t root, boost::dynamic_bitset<> &flags, int64_t &count);
void FindUnconnectedNode(boost::dynamic_bitset<> &flags, int64_t &root);
private:
void GetKnnGraphFromFile();
};
}
}
}
%module nsg
%{
#define SWIG_FILE_WITH_INIT
#include <numpy/arrayobject.h>
/* Include the header in the wrapper code */
#include "nsg.h"
%}
/* Parse the header file */
%include "index.h"
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "nsg.h"
#include "knowhere/index/vector_index/ivf.h"
namespace zilliz {
namespace knowhere {
namespace algo {
extern void write_index(NsgIndex* index, MemoryIOWriter& writer);
extern NsgIndex* read_index(MemoryIOReader& reader);
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "knowhere/index/vector_index/vector_index.h"
namespace zilliz {
namespace knowhere {
namespace algo {
class NsgIndex;
}
class NSG : public VectorIndex {
public:
explicit NSG(const int64_t& gpu_num):gpu_(gpu_num){}
NSG() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
void Add(const DatasetPtr &dataset, const Config &config) override;
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
int64_t Count() override;
int64_t Dimension() override;
VectorIndexPtr Clone() override;
void Seal() override;
private:
std::shared_ptr<algo::NsgIndex> index_;
int64_t gpu_;
};
using NSGIndexPtr = std::shared_ptr<NSG>();
}
}
#pragma once
#include <memory>
#include "knowhere/common/config.h"
#include "knowhere/common/dataset.h"
#include "knowhere/index/index.h"
#include "knowhere/index/preprocessor/preprocessor.h"
namespace zilliz {
namespace knowhere {
class VectorIndex;
using VectorIndexPtr = std::shared_ptr<VectorIndex>;
class VectorIndex : public Index {
public:
virtual PreprocessorPtr
BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { return nullptr; }
virtual IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) { return nullptr; }
virtual void
Add(const DatasetPtr &dataset, const Config &config) = 0;
virtual void
Seal() = 0;
virtual VectorIndexPtr
Clone() = 0;
virtual int64_t
Count() = 0;
virtual int64_t
Dimension() = 0;
};
} // namespace knowhere
} // namespace zilliz
set(TBB_DIR ${CMAKE_SOURCE_DIR}/thirdparty/tbb)
set(TBB_LIBRARIES ${TBB_DIR}/libtbb.so)
include_directories(${TBB_DIR}/include)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/thirdparty)
include_directories(${CMAKE_SOURCE_DIR}/thirdparty/SPTAG/AnnService)
include_directories(${CMAKE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include)
set(SPTAG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/SPTAG)
file(GLOB HDR_FILES
${SPTAG_SOURCE_DIR}/AnnService/inc/Core/*.h
${SPTAG_SOURCE_DIR}/AnnService/inc/Core/Common/*.h
${SPTAG_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h
${SPTAG_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h
${SPTAG_SOURCE_DIR}/AnnService/inc/Helper/*.h)
file(GLOB SRC_FILES
${SPTAG_SOURCE_DIR}/AnnService/src/Core/*.cpp
${SPTAG_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp
${SPTAG_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp
${SPTAG_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp
${SPTAG_SOURCE_DIR}/AnnService/src/Helper/*.cpp)
#add_library(SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES})
#target_link_libraries(SPTAGLib ${TBB_LIBRARIES})
add_library(SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES})
set(external_srcs
knowhere/adapter/sptag.cpp
knowhere/adapter/structure.cpp
knowhere/adapter/arrow.cpp
knowhere/common/exception.cpp
knowhere/common/timer.cpp
)
set(index_srcs
knowhere/index/preprocessor/normalize.cpp
knowhere/index/vector_index/cpu_kdt_rng.cpp
knowhere/index/vector_index/idmap.cpp
knowhere/index/vector_index/ivf.cpp
knowhere/index/vector_index/gpu_ivf.cpp
knowhere/index/vector_index/kdt_parameters.cpp
knowhere/index/vector_index/nsg_index.cpp
knowhere/index/vector_index/nsg/nsg.cpp
knowhere/index/vector_index/nsg/nsg_io.cpp
knowhere/index/vector_index/nsg/utils.cpp
knowhere/index/vector_index/cloner.cpp
)
set(depend_libs
# libtcmalloc.a
SPTAGLibStatic
${TBB_LIBRARIES}
faiss
openblas
lapack
arrow
jemalloc_pic
cudart
cublas
gomp
gfortran
pthread
)
add_library(
knowhere STATIC
${external_srcs}
${index_srcs}
)
#target_compile_options(knowhere PUBLIC "-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free")
target_link_libraries(
knowhere
${depend_libs}
)
#add_library(
# knowhereS SHARED
# ${external_srcs}
# ${index_srcs}
#)
#
#target_link_libraries(
# knowhereS
## ${TBB_LIBRARIES}
# ${depend_libs}
#)
INSTALL(TARGETS
knowhere
SPTAGLibStatic
DESTINATION
lib)
INSTALL(FILES
${ARROW_STATIC_LIB}
# ${PARQUET_STATIC_LIB}
${ARROW_PREFIX}/lib/libjemalloc_pic.a
${FAISS_STATIC_LIB}
${LAPACK_STATIC_LIB}
${BLAS_STATIC_LIB}
DESTINATION
lib
)
INSTALL(FILES ${OPENBLAS_REAL_STATIC_LIB}
RENAME "libopenblas.a"
DESTINATION lib
)
INSTALL(FILES ${CMAKE_SOURCE_DIR}/thirdparty/tbb/libtbb.so.2
# RENAME "libtbb.so.2"
DESTINATION lib
)
INSTALL(FILES ${CMAKE_SOURCE_DIR}/thirdparty/tbb/libtbb.so
# RENAME "libtbb.so"
DESTINATION lib
)
INSTALL(DIRECTORY
${CMAKE_SOURCE_DIR}/include/knowhere
${CMAKE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include/jsoncons
${CMAKE_SOURCE_DIR}/thirdparty/jsoncons-0.126.0/include/jsoncons_ext
${ARROW_INCLUDE_DIR}/arrow
# ${ARROW_INCLUDE_DIR}/parquet
${FAISS_PREFIX}/include/faiss
${OPENBLAS_INCLUDE_DIR}/
${CMAKE_SOURCE_DIR}/thirdparty/tbb/include/tbb
DESTINATION
include)
INSTALL(DIRECTORY
${SPTAG_SOURCE_DIR}/AnnService/inc/
DESTINATION
include/SPTAG/AnnService/inc)
#include "knowhere/adapter/arrow.h"
namespace zilliz {
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr &origin) {
ArrayPtr copy = nullptr;
auto copy_data = origin->data()->Copy();
switch (origin->type_id()) {
#define DEFINE_TYPE(type, clazz) \
case arrow::Type::type: { \
copy = std::make_shared<arrow::clazz>(copy_data); \
}
DEFINE_TYPE(BOOL, BooleanArray)
DEFINE_TYPE(BINARY, BinaryArray)
DEFINE_TYPE(FIXED_SIZE_BINARY, FixedSizeBinaryArray)
DEFINE_TYPE(DECIMAL, Decimal128Array)
DEFINE_TYPE(FLOAT, NumericArray<arrow::FloatType>)
DEFINE_TYPE(INT64, NumericArray<arrow::Int64Type>)
default:break;
}
return copy;
}
SchemaPtr
CopySchema(const SchemaPtr &origin) {
std::vector<std::shared_ptr<Field>> fields;
for (auto &field : origin->fields()) {
auto copy = std::make_shared<Field>(field->name(), field->type(),field->nullable(), nullptr);
fields.emplace_back(copy);
}
return std::make_shared<Schema>(std::move(fields));
}
} // namespace knowhere
} // namespace zilliz
#include "knowhere/index/vector_index/definitions.h"
#include "knowhere/adapter/sptag.h"
#include "knowhere/adapter/structure.h"
namespace zilliz {
namespace knowhere {
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr &dataset) {
auto array = dataset->array()[0];
auto elems = array->length();
auto p_data = array->data()->GetValues<int64_t>(1, 0);
auto p_offset = (int64_t *) malloc(sizeof(int64_t) * elems);
for (auto i = 0; i <= elems; ++i)
p_offset[i] = i * 8;
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
SPTAG::ByteArray((std::uint8_t *) p_data, elems * sizeof(int64_t), false),
SPTAG::ByteArray((std::uint8_t *) p_offset, elems * sizeof(int64_t), true),
elems));
return metaset;
}
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr &dataset) {
auto tensor = dataset->tensor()[0];
auto p_data = tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
auto num_bytes = tensor->size() * sizeof(float);
SPTAG::ByteArray byte_array(p_data, num_bytes, false);
auto vectorset = std::make_shared<SPTAG::BasicVectorSet>(byte_array,
SPTAG::VectorValueType::Float,
dimension,
rows);
return vectorset;
}
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr &dataset, const Config &config) {
auto tensor = dataset->tensor()[0];
auto p_data = (float *) tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
auto k = config[META_K].as<int64_t>();
std::vector<SPTAG::QueryResult> query_results(rows, SPTAG::QueryResult(nullptr, k, true));
for (auto i = 0; i < rows; ++i) {
query_results[i].SetTarget(&p_data[i * dimension]);
}
return query_results;
}
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
auto k = query_results[0].GetResultNum();
auto elems = query_results.size() * k;
auto p_id = (int64_t *) malloc(sizeof(int64_t) * elems);
auto p_dist = (float *) malloc(sizeof(float) * elems);
// TODO: throw if malloc failed.
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
auto results = query_results[i].GetResults();
auto num_result = query_results[i].GetResultNum();
for (auto j = 0; j < num_result; ++j) {
// p_id[i * k + j] = results[j].VID;
p_id[i * k + j] = *(int64_t *) query_results[i].GetMetadata(j).Data();
p_dist[i * k + j] = results[j].Dist;
}
}
auto id_buf = MakeMutableBufferSmart((uint8_t *) p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t *) p_dist, sizeof(float) * elems);
// TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
std::vector<FieldPtr> fields{field_id, field_dist};
auto schema = std::make_shared<Schema>(fields);
return std::make_shared<Dataset>(array, schema);
}
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "knowhere/adapter/structure.h"
namespace zilliz {
namespace knowhere {
ArrayPtr
ConstructInt64ArraySmart(uint8_t *data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(int64_t), id_buf);
return std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
}
ArrayPtr
ConstructFloatArraySmart(uint8_t *data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(float), id_buf);
return std::make_shared<NumericArray<arrow::FloatType>>(id_array_data);
}
TensorPtr
ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBufferSmart(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
ArrayPtr
ConstructInt64Array(uint8_t *data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(int64_t), id_buf);
return std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
}
ArrayPtr
ConstructFloatArray(uint8_t *data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(float), id_buf);
return std::make_shared<NumericArray<arrow::FloatType>>(id_array_data);
}
TensorPtr
ConstructFloatTensor(uint8_t *data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBuffer(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
FieldPtr
ConstructInt64Field(const std::string &name) {
auto type = std::make_shared<arrow::Int64Type>();
return std::make_shared<Field>(name, type);
}
FieldPtr
ConstructFloatField(const std::string &name) {
auto type = std::make_shared<arrow::FloatType>();
return std::make_shared<Field>(name, type);
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "knowhere/common/exception.h"
#include <cstdio>
namespace zilliz {
namespace knowhere {
KnowhereException::KnowhereException(const std::string &msg):msg(msg) {}
KnowhereException::KnowhereException(const std::string &m, const char *funcName, const char *file, int line) {
int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s",
funcName, file, line, m.c_str());
msg.resize(size + 1);
snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s",
funcName, file, line, m.c_str());
}
const char *KnowhereException::what() const noexcept {
return msg.c_str();
}
}
}
\ No newline at end of file
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <iostream> // TODO(linxj): using Log instead
#include "knowhere/common/timer.h"
namespace zilliz {
namespace knowhere {
TimeRecorder::TimeRecorder(const std::string &header,
int64_t log_level) :
header_(header),
log_level_(log_level) {
start_ = last_ = stdclock::now();
}
TimeRecorder::~TimeRecorder() {
}
std::string
TimeRecorder::GetTimeSpanStr(double span) {
std::string str_sec = std::to_string(span * 0.000001) + ((span > 1000000) ? " seconds" : " second");
std::string str_ms = std::to_string(span * 0.001) + " ms";
return str_sec + " [" + str_ms + "]";
}
void
TimeRecorder::PrintTimeRecord(const std::string &msg, double span) {
std::string str_log;
if (!header_.empty()) str_log += header_ + ": ";
str_log += msg;
str_log += " (";
str_log += TimeRecorder::GetTimeSpanStr(span);
str_log += ")";
switch (log_level_) {
case 0: {
std::cout << str_log << std::endl;
break;
}
//case 1: {
// SERVER_LOG_DEBUG << str_log;
// break;
//}
//case 2: {
// SERVER_LOG_INFO << str_log;
// break;
//}
//case 3: {
// SERVER_LOG_WARNING << str_log;
// break;
//}
//case 4: {
// SERVER_LOG_ERROR << str_log;
// break;
//}
//case 5: {
// SERVER_LOG_FATAL << str_log;
// break;
//}
//default: {
// SERVER_LOG_INFO << str_log;
// break;
//}
}
}
double
TimeRecorder::RecordSection(const std::string &msg) {
stdclock::time_point curr = stdclock::now();
double span = (std::chrono::duration<double, std::micro>(curr - last_)).count();
last_ = curr;
PrintTimeRecord(msg, span);
return span;
}
double
TimeRecorder::ElapseFromBegin(const std::string &msg) {
stdclock::time_point curr = stdclock::now();
double span = (std::chrono::duration<double, std::micro>(curr - start_)).count();
PrintTimeRecord(msg, span);
return span;
}
}
}
\ No newline at end of file
#include "knowhere/index/vector_index/definitions.h"
#include "knowhere/common/config.h"
#include "knowhere/index/preprocessor/normalize.h"
namespace zilliz {
namespace knowhere {
DatasetPtr
NormalizePreprocessor::Preprocess(const DatasetPtr &dataset) {
// TODO: wrap dataset->tensor
auto tensor = dataset->tensor()[0];
auto p_data = (float *)tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
#pragma omp parallel for
for (auto i = 0; i < rows; ++i) {
Normalize(&(p_data[i * dimension]), dimension);
}
}
void
NormalizePreprocessor::Normalize(float *arr, int64_t dimension) {
double vector_length = 0;
for (auto j = 0; j < dimension; j++) {
double val = arr[j];
vector_length += val * val;
}
vector_length = std::sqrt(vector_length);
if (vector_length < 1e-6) {
auto val = (float) (1.0 / std::sqrt((double) dimension));
for (int j = 0; j < dimension; j++) arr[j] = val;
} else {
for (int j = 0; j < dimension; j++) arr[j] = (float) (arr[j] / vector_length);
}
}
} // namespace knowhere
} // namespace zilliz
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#include "knowhere/common/exception.h"
#include "knowhere/index/vector_index/cloner.h"
#include "knowhere/index/vector_index/ivf.h"
#include "knowhere/index/vector_index/gpu_ivf.h"
#include "knowhere/index/vector_index/idmap.h"
namespace zilliz {
namespace knowhere {
VectorIndexPtr CopyGpuToCpu(const VectorIndexPtr &index, const Config &config) {
if (auto device_index = std::dynamic_pointer_cast<GPUIndex>(index)) {
return device_index->CopyGpuToCpu(config);
} else {
KNOWHERE_THROW_MSG("index type is not gpuindex");
}
}
VectorIndexPtr CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config) {
if (auto device_index = std::dynamic_pointer_cast<GPUIndex>(index)) {
return device_index->CopyGpuToGpu(device_id, config);
}
if (auto cpu_index = std::dynamic_pointer_cast<IVFSQ>(index)) {
return cpu_index->CopyCpuToGpu(device_id, config);
//KNOWHERE_THROW_MSG("IVFSQ not support tranfer to gpu");
} else if (auto cpu_index = std::dynamic_pointer_cast<IVFPQ>(index)) {
KNOWHERE_THROW_MSG("IVFPQ not support tranfer to gpu");
} else if (auto cpu_index = std::dynamic_pointer_cast<IVF>(index)) {
return cpu_index->CopyCpuToGpu(device_id, config);
} else if (auto cpu_index = std::dynamic_pointer_cast<IDMAP>(index)) {
return cpu_index->CopyCpuToGpu(device_id, config);
} else {
KNOWHERE_THROW_MSG("this index type not support tranfer to gpu");
}
}
}
}
#include <sstream>
#include <SPTAG/AnnService/inc/Server/QueryParser.h>
#include <SPTAG/AnnService/inc/Core/VectorSet.h>
#include <SPTAG/AnnService/inc/Core/Common.h>
#undef mkdir
#include "knowhere/index/vector_index/cpu_kdt_rng.h"
#include "knowhere/index/vector_index/definitions.h"
#include "knowhere/index/preprocessor/normalize.h"
#include "knowhere/index/vector_index/kdt_parameters.h"
#include "knowhere/adapter/sptag.h"
#include "knowhere/common/exception.h"
namespace zilliz {
namespace knowhere {
BinarySet
CPUKDTRNG::Serialize() {
std::vector<void *> index_blobs;
std::vector<int64_t> index_len;
index_ptr_->SaveIndexToMemory(index_blobs, index_len);
BinarySet binary_set;
auto sample = std::make_shared<uint8_t>();
sample.reset(static_cast<uint8_t *>(index_blobs[0]));
auto tree = std::make_shared<uint8_t>();
tree.reset(static_cast<uint8_t *>(index_blobs[1]));
auto graph = std::make_shared<uint8_t>();
graph.reset(static_cast<uint8_t *>(index_blobs[2]));
auto metadata = std::make_shared<uint8_t>();
metadata.reset(static_cast<uint8_t *>(index_blobs[3]));
binary_set.Append("samples", sample, index_len[0]);
binary_set.Append("tree", tree, index_len[1]);
binary_set.Append("graph", graph, index_len[2]);
binary_set.Append("metadata", metadata, index_len[3]);
return binary_set;
}
void
CPUKDTRNG::Load(const BinarySet &binary_set) {
std::vector<void *> index_blobs;
auto samples = binary_set.GetByName("samples");
index_blobs.push_back(samples->data.get());
auto tree = binary_set.GetByName("tree");
index_blobs.push_back(tree->data.get());
auto graph = binary_set.GetByName("graph");
index_blobs.push_back(graph->data.get());
auto metadata = binary_set.GetByName("metadata");
index_blobs.push_back(metadata->data.get());
index_ptr_->LoadIndexFromMemory(index_blobs);
}
PreprocessorPtr
CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) {
return std::make_shared<NormalizePreprocessor>();
}
IndexModelPtr
CPUKDTRNG::Train(const DatasetPtr &origin, const Config &train_config) {
SetParameters(train_config);
DatasetPtr dataset = origin->Clone();
if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
&& preprocessor_) {
preprocessor_->Preprocess(dataset);
}
auto vectorset = ConvertToVectorSet(dataset);
auto metaset = ConvertToMetadataSet(dataset);
index_ptr_->BuildIndex(vectorset, metaset);
// TODO: return IndexModelPtr
return nullptr;
}
void
CPUKDTRNG::Add(const DatasetPtr &origin, const Config &add_config) {
SetParameters(add_config);
DatasetPtr dataset = origin->Clone();
if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
&& preprocessor_) {
preprocessor_->Preprocess(dataset);
}
auto vectorset = ConvertToVectorSet(dataset);
auto metaset = ConvertToMetadataSet(dataset);
index_ptr_->AddIndex(vectorset, metaset);
}
void
CPUKDTRNG::SetParameters(const Config &config) {
for (auto &para : KDTParameterManagement::GetInstance().GetKDTParameters()) {
auto value = config.get_with_default(para.first, para.second);
index_ptr_->SetParameter(para.first, value);
}
}
DatasetPtr
CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) {
SetParameters(config);
auto tensor = dataset->tensor()[0];
auto p = (float *) tensor->raw_mutable_data();
for (auto i = 0; i < 10; ++i) {
for (auto j = 0; j < 10; ++j) {
std::cout << p[i * 10 + j] << " ";
}
std::cout << std::endl;
}
std::vector<SPTAG::QueryResult> query_results = ConvertToQueryResult(dataset, config);
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
auto target = (float *) query_results[i].GetTarget();
std::cout << target[0] << ", " << target[1] << ", " << target[2] << std::endl;
index_ptr_->SearchIndex(query_results[i]);
}
return ConvertToDataset(query_results);
}
int64_t CPUKDTRNG::Count() {
index_ptr_->GetNumSamples();
}
int64_t CPUKDTRNG::Dimension() {
index_ptr_->GetFeatureDim();
}
VectorIndexPtr CPUKDTRNG::Clone() {
KNOWHERE_THROW_MSG("not support");
}
void CPUKDTRNG::Seal() {
// do nothing
}
// TODO(linxj):
BinarySet
CPUKDTRNGIndexModel::Serialize() {}
void
CPUKDTRNGIndexModel::Load(const BinarySet &binary) {}
} // namespace knowhere
} // namespace zilliz
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVF.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/index_io.h>
#include "knowhere/common/exception.h"
#include "knowhere/index/vector_index/cloner.h"
#include "knowhere/adapter/faiss_adopt.h"
#include "knowhere/index/vector_index/gpu_ivf.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) {
auto nlist = config["nlist"].as<size_t>();
auto gpu_device = config.get_with_default("gpu_id", gpu_id_);
auto metric_type = config["metric_type"].as_string() == "L2" ?
faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
GETTENSOR(dataset)
// TODO(linxj): use device_id
auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_device);
ResScope rs(gpu_device, res);
faiss::gpu::GpuIndexIVFFlat device_index(res.get(), dim, nlist, metric_type);
device_index.train(rows, (float *) p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(&device_index));
return std::make_shared<IVFIndexModel>(host_index);
}
void GPUIVF::set_index_model(IndexModelPtr model) {
std::lock_guard<std::mutex> lk(mutex_);
auto host_index = std::static_pointer_cast<IVFIndexModel>(model);
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
ResScope rs(gpu_id_, res);
auto device_index = faiss::gpu::index_cpu_to_gpu(res.get(), gpu_id_, host_index->index_.get());
index_.reset(device_index);
} else {
KNOWHERE_THROW_MSG("load index model error, can't get gpu_resource");
}
}
BinarySet GPUIVF::SerializeImpl() {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
try {
MemoryIOWriter writer;
{
faiss::Index *index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(index);
SealImpl();
faiss::write_index(host_index, &writer);
delete host_index;
}
auto data = std::make_shared<uint8_t>();
data.reset(writer.data_);
BinarySet res_set;
res_set.Append("IVF", data, writer.rp);
return res_set;
} catch (std::exception &e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void GPUIVF::LoadImpl(const BinarySet &index_binary) {
auto binary = index_binary.GetByName("IVF");
MemoryIOReader reader;
{
reader.total = binary->size;
reader.data_ = binary->data.get();
faiss::Index *index = faiss::read_index(&reader);
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
ResScope rs(gpu_id_, res);
auto device_index = faiss::gpu::index_cpu_to_gpu(res.get(), gpu_id_, index);
index_.reset(device_index);
} else {
KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
}
delete index;
}
}
IVFIndexPtr GPUIVF::Copy_index_gpu_to_cpu() {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVF>(new_index);
}
void GPUIVF::search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) {
if (auto device_index = std::static_pointer_cast<faiss::gpu::GpuIndexIVF>(index_)) {
// todo: allocate search memory
auto nprobe = cfg.get_with_default("nprobe", size_t(1));
std::lock_guard<std::mutex> lk(mutex_);
device_index->setNumProbes(nprobe);
device_index->search(n, (float *) data, k, distances, labels);
}
}
VectorIndexPtr GPUIVF::CopyGpuToCpu(const Config &config) {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVF>(new_index);
}
VectorIndexPtr GPUIVF::Clone() {
auto cpu_idx = CopyGpuToCpu(Config());
return ::zilliz::knowhere::CopyCpuToGpu(cpu_idx, gpu_id_, Config());
}
VectorIndexPtr GPUIVF::CopyGpuToGpu(const int64_t &device_id, const Config &config) {
auto host_index = CopyGpuToCpu(config);
return std::static_pointer_cast<IVF>(host_index)->CopyCpuToGpu(device_id, config);
}
IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) {
auto nlist = config["nlist"].as<size_t>();
auto M = config["M"].as<size_t>(); // number of subquantizers(subvectors)
auto nbits = config["nbits"].as<size_t>();// number of bit per subvector index
auto gpu_num = config.get_with_default("gpu_id", gpu_id_);
auto metric_type = config["metric_type"].as_string() == "L2" ?
faiss::METRIC_L2 : faiss::METRIC_L2; // IP not support.
GETTENSOR(dataset)
// TODO(linxj): set device here.
faiss::gpu::StandardGpuResources res;
faiss::gpu::GpuIndexIVFPQ device_index(&res, dim, nlist, M, nbits, metric_type);
device_index.train(rows, (float *) p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(&device_index));
return std::make_shared<IVFIndexModel>(host_index);
}
std::shared_ptr<faiss::IVFSearchParameters> GPUIVFPQ::GenParams(const Config &config) {
auto params = std::make_shared<faiss::IVFPQSearchParameters>();
params->nprobe = config.get_with_default("nprobe", size_t(1));
//params->scan_table_threshold = 0;
//params->polysemous_ht = 0;
//params->max_codes = 0;
return params;
}
VectorIndexPtr GPUIVFPQ::CopyGpuToCpu(const Config &config) {
KNOWHERE_THROW_MSG("not support yet");
}
IndexModelPtr GPUIVFSQ::Train(const DatasetPtr &dataset, const Config &config) {
auto nlist = config["nlist"].as<size_t>();
auto nbits = config["nbits"].as<size_t>(); // TODO(linxj): gpu only support SQ4 SQ8 SQ16
auto gpu_num = config.get_with_default("gpu_id", gpu_id_);
auto metric_type = config["metric_type"].as_string() == "L2" ?
faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
GETTENSOR(dataset)
std::stringstream index_type;
index_type << "IVF" << nlist << "," << "SQ" << nbits;
auto build_index = faiss::index_factory(dim, index_type.str().c_str(), metric_type);
faiss::gpu::StandardGpuResources res;
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, gpu_num, build_index);
device_index->train(rows, (float *) p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index));
delete device_index;
delete build_index;
return std::make_shared<IVFIndexModel>(host_index);
}
VectorIndexPtr GPUIVFSQ::CopyGpuToCpu(const Config &config) {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVFSQ>(new_index);
}
FaissGpuResourceMgr &FaissGpuResourceMgr::GetInstance() {
static FaissGpuResourceMgr instance;
return instance;
}
void FaissGpuResourceMgr::AllocateTempMem(std::shared_ptr<faiss::gpu::StandardGpuResources> &res,
const int64_t &device_id,
const int64_t &size) {
if (size) {
res->setTempMemory(size);
}
else {
auto search = devices_params_.find(device_id);
if (search != devices_params_.end()) {
res->setTempMemory(search->second.temp_mem_size);
}
// else do nothing. allocate when use.
}
}
void FaissGpuResourceMgr::InitDevice(int64_t device_id,
int64_t pin_mem_size,
int64_t temp_mem_size,
int64_t res_num) {
DeviceParams params;
params.pinned_mem_size = pin_mem_size;
params.temp_mem_size = temp_mem_size;
params.resource_num = res_num;
devices_params_.emplace(device_id, params);
}
void FaissGpuResourceMgr::InitResource() {
for(auto& device : devices_params_) {
auto& resource_vec = idle_[device.first];
for (int i = 0; i < device.second.resource_num; ++i) {
auto res = std::make_shared<faiss::gpu::StandardGpuResources>();
res->noTempMemory();
resource_vec.push_back(res);
}
}
}
std::shared_ptr<faiss::gpu::StandardGpuResources> FaissGpuResourceMgr::GetRes(const int64_t &device_id,
const int64_t &alloc_size) {
std::lock_guard<std::mutex> lk(mutex_);
if (!is_init) {
InitResource();
is_init = true;
}
auto search = idle_.find(device_id);
if (search != idle_.end()) {
auto res = search->second.back();
AllocateTempMem(res, device_id, alloc_size);
search->second.pop_back();
return res;
}
}
void FaissGpuResourceMgr::MoveToInuse(const int64_t &device_id, const std::shared_ptr<faiss::gpu::StandardGpuResources> &res) {
std::lock_guard<std::mutex> lk(mutex_);
in_use_[device_id].push_back(res);
}
void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const std::shared_ptr<faiss::gpu::StandardGpuResources> &res) {
std::lock_guard<std::mutex> lk(mutex_);
idle_[device_id].push_back(res);
}
void GPUIndex::SetGpuDevice(const int &gpu_id) {
gpu_id_ = gpu_id;
}
const int64_t &GPUIndex::GetGpuDevice() {
return gpu_id_;
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <faiss/IndexFlat.h>
#include <faiss/AutoTune.h>
#include <faiss/MetaIndexes.h>
#include <faiss/index_io.h>
#include <faiss/gpu/GpuAutoTune.h>
#include "knowhere/common/exception.h"
#include "knowhere/adapter/faiss_adopt.h"
#include "knowhere/index/vector_index/idmap.h"
namespace zilliz {
namespace knowhere {
BinarySet IDMAP::Serialize() {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
std::lock_guard<std::mutex> lk(mutex_);
return SerializeImpl();
}
void IDMAP::Load(const BinarySet &index_binary) {
std::lock_guard<std::mutex> lk(mutex_);
LoadImpl(index_binary);
}
DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
auto k = config["k"].as<size_t>();
GETTENSOR(dataset)
// TODO(linxj): handle malloc exception
auto elems = rows * k;
auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems);
auto res_dis = (float *) malloc(sizeof(float) * elems);
index_->search(rows, (float *) p_data, k, res_dis, res_ids);
auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems);
// TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
}
void IDMAP::Add(const DatasetPtr &dataset, const Config &config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset)
// TODO: magic here.
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<long>(1, 0);
index_->add_with_ids(rows, (float *) p_data, p_ids);
}
int64_t IDMAP::Count() {
return index_->ntotal;
}
int64_t IDMAP::Dimension() {
return index_->d;
}
// TODO(linxj): return const pointer
float *IDMAP::GetRawVectors() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap *>(index_.get());
auto flat_index = dynamic_cast<faiss::IndexFlat*>(file_index->index);
return flat_index->xb.data();
} catch (std::exception &e) {
KNOWHERE_THROW_MSG(e.what());
}
}
// TODO(linxj): return const pointer
int64_t *IDMAP::GetRawIds() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap *>(index_.get());
return file_index->id_map.data();
} catch (std::exception &e) {
KNOWHERE_THROW_MSG(e.what());
}
}
const char* type = "IDMap,Flat";
void IDMAP::Train(const Config &config) {
auto metric_type = config["metric_type"].as_string() == "L2" ?
faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
auto dim = config["dim"].as<size_t>();
auto index = faiss::index_factory(dim, type, metric_type);
index_.reset(index);
}
VectorIndexPtr IDMAP::Clone() {
std::lock_guard<std::mutex> lk(mutex_);
auto clone_index = faiss::clone_index(index_.get());
std::shared_ptr<faiss::Index> new_index;
new_index.reset(clone_index);
return std::make_shared<IDMAP>(new_index);
}
VectorIndexPtr IDMAP::CopyCpuToGpu(const int64_t &device_id, const Config &config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){
ResScope rs(device_id, res);
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res.get(), device_id, index_.get());
std::shared_ptr<faiss::Index> device_index;
device_index.reset(gpu_index);
return std::make_shared<GPUIDMAP>(device_index, device_id);
} else {
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
}
}
void IDMAP::Seal() {
// do nothing
}
VectorIndexPtr GPUIDMAP::CopyGpuToCpu(const Config &config) {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IDMAP>(new_index);
}
VectorIndexPtr GPUIDMAP::Clone() {
auto cpu_idx = CopyGpuToCpu(Config());
if (auto idmap = std::dynamic_pointer_cast<IDMAP>(cpu_idx)){
return idmap->CopyCpuToGpu(gpu_id_, Config());
}
else {
KNOWHERE_THROW_MSG("IndexType not Support GpuClone");
}
}
BinarySet GPUIDMAP::SerializeImpl() {
try {
MemoryIOWriter writer;
{
faiss::Index *index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(index);
faiss::write_index(host_index, &writer);
delete host_index;
}
auto data = std::make_shared<uint8_t>();
data.reset(writer.data_);
BinarySet res_set;
res_set.Append("IVF", data, writer.rp);
return res_set;
} catch (std::exception &e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void GPUIDMAP::LoadImpl(const BinarySet &index_binary) {
auto binary = index_binary.GetByName("IVF");
MemoryIOReader reader;
{
reader.total = binary->size;
reader.data_ = binary->data.get();
faiss::Index *index = faiss::read_index(&reader);
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_) ){
ResScope rs(gpu_id_, res);
auto device_index = faiss::gpu::index_cpu_to_gpu(res.get(), gpu_id_, index);
index_.reset(device_index);
} else {
KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
}
delete index;
}
}
VectorIndexPtr GPUIDMAP::CopyGpuToGpu(const int64_t &device_id, const Config &config) {
auto cpu_index = CopyGpuToCpu(config);
return std::static_pointer_cast<IDMAP>(cpu_index)->CopyCpuToGpu(device_id, config);
}
float *GPUIDMAP::GetRawVectors() {
KNOWHERE_THROW_MSG("Not support");
}
int64_t *GPUIDMAP::GetRawIds() {
KNOWHERE_THROW_MSG("Not support");
}
}
}
此差异已折叠。
#include <mutex>
#include "knowhere/index/vector_index/kdt_parameters.h"
namespace zilliz {
namespace knowhere {
const std::vector<KDTParameter> &
KDTParameterManagement::GetKDTParameters() {
return kdt_parameters_;
}
KDTParameterManagement::KDTParameterManagement() {
kdt_parameters_ = std::vector<KDTParameter>{
{"KDTNumber", "1"},
{"NumTopDimensionKDTSplit", "5"},
{"NumSamplesKDTSplitConsideration", "100"},
{"TPTNumber", "32"},
{"TPTLeafSize", "2000"},
{"NumTopDimensionTPTSplit", "5"},
{"NeighborhoodSize", "32"},
{"GraphNeighborhoodScale", "2"},
{"GraphCEFScale", "2"},
{"RefineIterations", "0"},
{"CEF", "1000"},
{"MaxCheckForRefineGraph", "10000"},
{"NumberOfThreads", "1"},
{"MaxCheck", "8192"},
{"ThresholdOfNumberOfContinuousNoBetterPropagation", "3"},
{"NumberOfInitialDynamicPivots", "50"},
{"NumberOfOtherDynamicPivots", "4"},
};
}
} // namespace knowhere
} // namespace zilliz
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册